Netdev List
 help / color / mirror / Atom feed
* [net-next v2 01/15] i40e: convert queue stats to i40e_stats array
From: Jeff Kirsher @ 2018-08-30 21:11 UTC (permalink / raw)
  To: davem; +Cc: Jacob Keller, netdev, nhorman, sassmann, jogreene, Jeff Kirsher
In-Reply-To: <20180830211147.17008-1-jeffrey.t.kirsher@intel.com>

From: Jacob Keller <jacob.e.keller@intel.com>

Use an i40e_stats array to handle the queue stats, instead of coding
similar functionality separately. Because of how the queue stats are
accessed on some kernels, we can't easily use i40e_add_ethtool_stats.

Instead, implement a separate helper, i40e_add_queue_stats, which we'll
use instead. This helper will correctly implement the
u64_stats_fetch_begin_irq logic and allow retries until successful. We
share the most complex code by re-using i40e_add_one_ethtool_stat.

This logic additionally easily supports skipping disabled rings by using
a ternary operator before calling the u64_stats_fetch_begin_irq()
function, so that we correctly zero-out the stats values without having
to perform two separate sections of code.

This significantly reduces the boiler plate code in
i40e_get_ethtool_stats, and helps keep the complex logic contained to as
few functions as possible.

With this patch, we've finally converted all the statistics to use the
helpers and the i40e_stats function.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 .../net/ethernet/intel/i40e/i40e_ethtool.c    | 148 +++++++++++-------
 1 file changed, 89 insertions(+), 59 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 5ff6caa83948..235012b3bd42 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -33,6 +33,8 @@ struct i40e_stats {
 	I40E_STAT(struct i40e_veb, _name, _stat)
 #define I40E_PFC_STAT(_name, _stat) \
 	I40E_STAT(struct i40e_pfc_stats, _name, _stat)
+#define I40E_QUEUE_STAT(_name, _stat) \
+	I40E_STAT(struct i40e_ring, _name, _stat)
 
 static const struct i40e_stats i40e_gstrings_net_stats[] = {
 	I40E_NETDEV_STAT(rx_packets),
@@ -171,20 +173,16 @@ static const struct i40e_stats i40e_gstrings_pfc_stats[] = {
 	I40E_PFC_STAT("port.rx_priority_%u_xon_2_xoff", priority_xon_2_xoff),
 };
 
-/* We use num_tx_queues here as a proxy for the maximum number of queues
- * available because we always allocate queues symmetrically.
- */
-#define I40E_MAX_NUM_QUEUES(n) ((n)->num_tx_queues)
-#define I40E_QUEUE_STATS_LEN(n)                                              \
-	   (I40E_MAX_NUM_QUEUES(n)                                           \
-	    * 2 /* Tx and Rx together */                                     \
-	    * (sizeof(struct i40e_queue_stats) / sizeof(u64)))
-#define I40E_GLOBAL_STATS_LEN	ARRAY_SIZE(i40e_gstrings_stats)
+static const struct i40e_stats i40e_gstrings_queue_stats[] = {
+	I40E_QUEUE_STAT("%s-%u.packets", stats.packets),
+	I40E_QUEUE_STAT("%s-%u.bytes", stats.bytes),
+};
+
 #define I40E_NETDEV_STATS_LEN	ARRAY_SIZE(i40e_gstrings_net_stats)
+
 #define I40E_MISC_STATS_LEN	ARRAY_SIZE(i40e_gstrings_misc_stats)
-#define I40E_VSI_STATS_LEN(n)	(I40E_NETDEV_STATS_LEN + \
-				 I40E_MISC_STATS_LEN + \
-				 I40E_QUEUE_STATS_LEN((n)))
+
+#define I40E_VSI_STATS_LEN	(I40E_NETDEV_STATS_LEN + I40E_MISC_STATS_LEN)
 
 #define I40E_PFC_STATS_LEN	(ARRAY_SIZE(i40e_gstrings_pfc_stats) * \
 				 I40E_MAX_USER_PRIORITY)
@@ -193,10 +191,15 @@ static const struct i40e_stats i40e_gstrings_pfc_stats[] = {
 				 (ARRAY_SIZE(i40e_gstrings_veb_tc_stats) * \
 				  I40E_MAX_TRAFFIC_CLASS))
 
-#define I40E_PF_STATS_LEN(n)	(I40E_GLOBAL_STATS_LEN + \
+#define I40E_GLOBAL_STATS_LEN	ARRAY_SIZE(i40e_gstrings_stats)
+
+#define I40E_PF_STATS_LEN	(I40E_GLOBAL_STATS_LEN + \
 				 I40E_PFC_STATS_LEN + \
 				 I40E_VEB_STATS_LEN + \
-				 I40E_VSI_STATS_LEN((n)))
+				 I40E_VSI_STATS_LEN)
+
+/* Length of stats for a single queue */
+#define I40E_QUEUE_STATS_LEN	ARRAY_SIZE(i40e_gstrings_queue_stats)
 
 enum i40e_ethtool_test_id {
 	I40E_ETH_TEST_REG = 0,
@@ -1701,11 +1704,30 @@ static int i40e_get_stats_count(struct net_device *netdev)
 	struct i40e_netdev_priv *np = netdev_priv(netdev);
 	struct i40e_vsi *vsi = np->vsi;
 	struct i40e_pf *pf = vsi->back;
+	int stats_len;
 
 	if (vsi == pf->vsi[pf->lan_vsi] && pf->hw.partition_id == 1)
-		return I40E_PF_STATS_LEN(netdev);
+		stats_len = I40E_PF_STATS_LEN;
 	else
-		return I40E_VSI_STATS_LEN(netdev);
+		stats_len = I40E_VSI_STATS_LEN;
+
+	/* The number of stats reported for a given net_device must remain
+	 * constant throughout the life of that device.
+	 *
+	 * This is because the API for obtaining the size, strings, and stats
+	 * is spread out over three separate ethtool ioctls. There is no safe
+	 * way to lock the number of stats across these calls, so we must
+	 * assume that they will never change.
+	 *
+	 * Due to this, we report the maximum number of queues, even if not
+	 * every queue is currently configured. Since we always allocate
+	 * queues in pairs, we'll just use netdev->num_tx_queues * 2. This
+	 * works because the num_tx_queues is set at device creation and never
+	 * changes.
+	 */
+	stats_len += I40E_QUEUE_STATS_LEN * 2 * netdev->num_tx_queues;
+
+	return stats_len;
 }
 
 static int i40e_get_sset_count(struct net_device *netdev, int sset)
@@ -1734,8 +1756,8 @@ static int i40e_get_sset_count(struct net_device *netdev, int sset)
  * @stat: the stat definition
  *
  * Copies the stat data defined by the pointer and stat structure pair into
- * the memory supplied as data. Used to implement i40e_add_ethtool_stats.
- * If the pointer is null, data will be zero'd.
+ * the memory supplied as data. Used to implement i40e_add_ethtool_stats and
+ * i40e_add_queue_stats. If the pointer is null, data will be zero'd.
  */
 static inline void
 i40e_add_one_ethtool_stat(u64 *data, void *pointer,
@@ -1810,6 +1832,45 @@ __i40e_add_ethtool_stats(u64 **data, void *pointer,
 #define i40e_add_ethtool_stats(data, pointer, stats) \
 	__i40e_add_ethtool_stats(data, pointer, stats, ARRAY_SIZE(stats))
 
+/**
+ * i40e_add_queue_stats - copy queue statistics into supplied buffer
+ * @data: ethtool stats buffer
+ * @ring: the ring to copy
+ *
+ * Queue statistics must be copied while protected by
+ * u64_stats_fetch_begin_irq, so we can't directly use i40e_add_ethtool_stats.
+ * Assumes that queue stats are defined in i40e_gstrings_queue_stats. If the
+ * ring pointer is null, zero out the queue stat values and update the data
+ * pointer. Otherwise safely copy the stats from the ring into the supplied
+ * buffer and update the data pointer when finished.
+ *
+ * This function expects to be called while under rcu_read_lock().
+ **/
+static inline void
+i40e_add_queue_stats(u64 **data, struct i40e_ring *ring)
+{
+	const unsigned int size = ARRAY_SIZE(i40e_gstrings_queue_stats);
+	const struct i40e_stats *stats = i40e_gstrings_queue_stats;
+	unsigned int start;
+	unsigned int i;
+
+	/* To avoid invalid statistics values, ensure that we keep retrying
+	 * the copy until we get a consistent value according to
+	 * u64_stats_fetch_retry_irq. But first, make sure our ring is
+	 * non-null before attempting to access its syncp.
+	 */
+	do {
+		start = !ring ? 0 : u64_stats_fetch_begin_irq(&ring->syncp);
+		for (i = 0; i < size; i++) {
+			i40e_add_one_ethtool_stat(&(*data)[i], ring,
+						  &stats[i]);
+		}
+	} while (ring && u64_stats_fetch_retry_irq(&ring->syncp, start));
+
+	/* Once we successfully copy the stats in, update the data pointer */
+	data += size;
+}
+
 /**
  * i40e_get_pfc_stats - copy HW PFC statistics to formatted structure
  * @pf: the PF device structure
@@ -1853,12 +1914,10 @@ static void i40e_get_ethtool_stats(struct net_device *netdev,
 				   struct ethtool_stats *stats, u64 *data)
 {
 	struct i40e_netdev_priv *np = netdev_priv(netdev);
-	struct i40e_ring *tx_ring, *rx_ring;
 	struct i40e_vsi *vsi = np->vsi;
 	struct i40e_pf *pf = vsi->back;
 	struct i40e_veb *veb = pf->veb[pf->lan_veb];
 	unsigned int i;
-	unsigned int start;
 	bool veb_stats;
 	u64 *p = data;
 
@@ -1870,38 +1929,12 @@ static void i40e_get_ethtool_stats(struct net_device *netdev,
 	i40e_add_ethtool_stats(&data, vsi, i40e_gstrings_misc_stats);
 
 	rcu_read_lock();
-	for (i = 0; i < I40E_MAX_NUM_QUEUES(netdev) ; i++) {
-		tx_ring = READ_ONCE(vsi->tx_rings[i]);
-
-		if (!tx_ring) {
-			/* Bump the stat counter to skip these stats, and make
-			 * sure the memory is zero'd
-			 */
-			*(data++) = 0;
-			*(data++) = 0;
-			*(data++) = 0;
-			*(data++) = 0;
-			continue;
-		}
-
-		/* process Tx ring statistics */
-		do {
-			start = u64_stats_fetch_begin_irq(&tx_ring->syncp);
-			data[0] = tx_ring->stats.packets;
-			data[1] = tx_ring->stats.bytes;
-		} while (u64_stats_fetch_retry_irq(&tx_ring->syncp, start));
-		data += 2;
-
-		/* Rx ring is the 2nd half of the queue pair */
-		rx_ring = &tx_ring[1];
-		do {
-			start = u64_stats_fetch_begin_irq(&rx_ring->syncp);
-			data[0] = rx_ring->stats.packets;
-			data[1] = rx_ring->stats.bytes;
-		} while (u64_stats_fetch_retry_irq(&rx_ring->syncp, start));
-		data += 2;
+	for (i = 0; i < netdev->num_tx_queues; i++) {
+		i40e_add_queue_stats(&data, READ_ONCE(vsi->tx_rings[i]));
+		i40e_add_queue_stats(&data, READ_ONCE(vsi->rx_rings[i]));
 	}
 	rcu_read_unlock();
+
 	if (vsi != pf->vsi[pf->lan_vsi] || pf->hw.partition_id != 1)
 		goto check_data_pointer;
 
@@ -1990,16 +2023,13 @@ static void i40e_get_stat_strings(struct net_device *netdev, u8 *data)
 
 	i40e_add_stat_strings(&data, i40e_gstrings_misc_stats);
 
-	for (i = 0; i < I40E_MAX_NUM_QUEUES(netdev); i++) {
-		snprintf(data, ETH_GSTRING_LEN, "tx-%u.tx_packets", i);
-		data += ETH_GSTRING_LEN;
-		snprintf(data, ETH_GSTRING_LEN, "tx-%u.tx_bytes", i);
-		data += ETH_GSTRING_LEN;
-		snprintf(data, ETH_GSTRING_LEN, "rx-%u.rx_packets", i);
-		data += ETH_GSTRING_LEN;
-		snprintf(data, ETH_GSTRING_LEN, "rx-%u.rx_bytes", i);
-		data += ETH_GSTRING_LEN;
+	for (i = 0; i < netdev->num_tx_queues; i++) {
+		i40e_add_stat_strings(&data, i40e_gstrings_queue_stats,
+				      "tx", i);
+		i40e_add_stat_strings(&data, i40e_gstrings_queue_stats,
+				      "rx", i);
 	}
+
 	if (vsi != pf->vsi[pf->lan_vsi] || pf->hw.partition_id != 1)
 		return;
 
-- 
2.17.1

^ permalink raw reply related

* [net-next v2 06/15] i40e: use correct length for strncpy
From: Jeff Kirsher @ 2018-08-30 21:11 UTC (permalink / raw)
  To: davem; +Cc: Mitch Williams, netdev, nhorman, sassmann, jogreene, Jeff Kirsher
In-Reply-To: <20180830211147.17008-1-jeffrey.t.kirsher@intel.com>

From: Mitch Williams <mitch.a.williams@intel.com>

Caught by GCC 8. When we provide a length for strncpy, we should not
include the terminating null. So we must tell it one less than the size
of the destination buffer.

Signed-off-by: Mitch Williams <mitch.a.williams@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_ptp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c
index 35f2866b38c6..1199f0502d6d 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c
@@ -694,7 +694,8 @@ static long i40e_ptp_create_clock(struct i40e_pf *pf)
 	if (!IS_ERR_OR_NULL(pf->ptp_clock))
 		return 0;
 
-	strncpy(pf->ptp_caps.name, i40e_driver_name, sizeof(pf->ptp_caps.name));
+	strncpy(pf->ptp_caps.name, i40e_driver_name,
+		sizeof(pf->ptp_caps.name) - 1);
 	pf->ptp_caps.owner = THIS_MODULE;
 	pf->ptp_caps.max_adj = 999999999;
 	pf->ptp_caps.n_ext_ts = 0;
-- 
2.17.1

^ permalink raw reply related

* [net-next v2 05/15] i40evf: Validate the number of queues a PF sends
From: Jeff Kirsher @ 2018-08-30 21:11 UTC (permalink / raw)
  To: davem
  Cc: Paul M Stillwell Jr, netdev, nhorman, sassmann, jogreene,
	Jeff Kirsher
In-Reply-To: <20180830211147.17008-1-jeffrey.t.kirsher@intel.com>

From: Paul M Stillwell Jr <paul.m.stillwell.jr@intel.com>

A PF can send any number of queues to the VF and the VF may not
be able to support that many. Check to see that the number of
queues is less than or equal to the max number of queues the
VF can have.

Signed-off-by: Paul M Stillwell Jr <paul.m.stillwell.jr@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 .../ethernet/intel/i40evf/i40evf_virtchnl.c   | 32 +++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
index 79b7be83b5c3..6579dabab78c 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
@@ -153,6 +153,32 @@ int i40evf_send_vf_config_msg(struct i40evf_adapter *adapter)
 					  NULL, 0);
 }
 
+/**
+ * i40evf_validate_num_queues
+ * @adapter: adapter structure
+ *
+ * Validate that the number of queues the PF has sent in
+ * VIRTCHNL_OP_GET_VF_RESOURCES is not larger than the VF can handle.
+ **/
+static void i40evf_validate_num_queues(struct i40evf_adapter *adapter)
+{
+	if (adapter->vf_res->num_queue_pairs > I40EVF_MAX_REQ_QUEUES) {
+		struct virtchnl_vsi_resource *vsi_res;
+		int i;
+
+		dev_info(&adapter->pdev->dev, "Received %d queues, but can only have a max of %d\n",
+			 adapter->vf_res->num_queue_pairs,
+			 I40EVF_MAX_REQ_QUEUES);
+		dev_info(&adapter->pdev->dev, "Fixing by reducing queues to %d\n",
+			 I40EVF_MAX_REQ_QUEUES);
+		adapter->vf_res->num_queue_pairs = I40EVF_MAX_REQ_QUEUES;
+		for (i = 0; i < adapter->vf_res->num_vsis; i++) {
+			vsi_res = &adapter->vf_res->vsi_res[i];
+			vsi_res->num_queue_pairs = I40EVF_MAX_REQ_QUEUES;
+		}
+	}
+}
+
 /**
  * i40evf_get_vf_config
  * @adapter: private adapter structure
@@ -195,6 +221,11 @@ int i40evf_get_vf_config(struct i40evf_adapter *adapter)
 	err = (i40e_status)le32_to_cpu(event.desc.cookie_low);
 	memcpy(adapter->vf_res, event.msg_buf, min(event.msg_len, len));
 
+	/* some PFs send more queues than we should have so validate that
+	 * we aren't getting too many queues
+	 */
+	if (!err)
+		i40evf_validate_num_queues(adapter);
 	i40e_vf_parse_hw_config(hw, adapter->vf_res);
 out_alloc:
 	kfree(event.msg_buf);
@@ -1329,6 +1360,7 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
 			  I40E_MAX_VF_VSI *
 			  sizeof(struct virtchnl_vsi_resource);
 		memcpy(adapter->vf_res, msg, min(msglen, len));
+		i40evf_validate_num_queues(adapter);
 		i40e_vf_parse_hw_config(&adapter->hw, adapter->vf_res);
 		if (is_zero_ether_addr(adapter->hw.mac.addr)) {
 			/* restore current mac address */
-- 
2.17.1

^ permalink raw reply related

* [net-next v2 09/15] i40e: static analysis report from community
From: Jeff Kirsher @ 2018-08-30 21:11 UTC (permalink / raw)
  To: davem; +Cc: Martyna Szapar, netdev, nhorman, sassmann, jogreene, Jeff Kirsher
In-Reply-To: <20180830211147.17008-1-jeffrey.t.kirsher@intel.com>

From: Martyna Szapar <martyna.szapar@intel.com>

Static analysis tools report a problem from original driver submission.
Removing unnecessary check in condition.

Signed-off-by: Martyna Szapar <martyna.szapar@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index ac685ad4d877..62f2b5bce6bb 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -13033,7 +13033,7 @@ struct i40e_veb *i40e_veb_setup(struct i40e_pf *pf, u16 flags,
 	for (vsi_idx = 0; vsi_idx < pf->num_alloc_vsi; vsi_idx++)
 		if (pf->vsi[vsi_idx] && pf->vsi[vsi_idx]->seid == vsi_seid)
 			break;
-	if (vsi_idx >= pf->num_alloc_vsi && vsi_seid != 0) {
+	if (vsi_idx == pf->num_alloc_vsi && vsi_seid != 0) {
 		dev_info(&pf->pdev->dev, "vsi seid %d not found\n",
 			 vsi_seid);
 		return NULL;
-- 
2.17.1

^ permalink raw reply related

* [net-next v2 08/15] virtchnl: use u8 type for a field in the virtchnl_filter struct
From: Jeff Kirsher @ 2018-08-30 21:11 UTC (permalink / raw)
  To: davem
  Cc: Harshitha Ramamurthy, netdev, nhorman, sassmann, jogreene,
	Jeff Kirsher
In-Reply-To: <20180830211147.17008-1-jeffrey.t.kirsher@intel.com>

From: Harshitha Ramamurthy <harshitha.ramamurthy@intel.com>

The virtchnl_filter struct has a field called field_flags. A previous
commit mistakenly had the type to be a __u8. What we want is for the
field to be an unsigned 8 bit value, so let's just use the existing
kernel type u8 for that.

Signed-off-by: Harshitha Ramamurthy <harshitha.ramamurthy@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 include/linux/avf/virtchnl.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h
index 212b3822d180..b41f7bc958ef 100644
--- a/include/linux/avf/virtchnl.h
+++ b/include/linux/avf/virtchnl.h
@@ -573,7 +573,7 @@ struct virtchnl_filter {
 	enum	virtchnl_flow_type flow_type;
 	enum	virtchnl_action action;
 	u32	action_meta;
-	__u8	field_flags;
+	u8	field_flags;
 };
 
 VIRTCHNL_CHECK_STRUCT_LEN(272, virtchnl_filter);
-- 
2.17.1

^ permalink raw reply related

* [net-next v2 03/15] i40evf: update ethtool stats code and use helper functions
From: Jeff Kirsher @ 2018-08-30 21:11 UTC (permalink / raw)
  To: davem; +Cc: Jacob Keller, netdev, nhorman, sassmann, jogreene, Jeff Kirsher
In-Reply-To: <20180830211147.17008-1-jeffrey.t.kirsher@intel.com>

From: Jacob Keller <jacob.e.keller@intel.com>

Fix a bug in the way we handled VF queues, by always showing stats for
the maximum number of queues, even if they aren't allocated. It is not
safe to change the number of strings reported to ethtool, as grabbing
statistics occurs over multiple ethtool ops for which the rtnl_lock()
cannot be held the entire time.

Avoid this by always reporting queue stats for the maximum number of
queues in the netdevice. Share some of the helper functionality for
adding stats with the PF code in i40e_ethtool_stats.h

This should reduce the chance of potential future bugs, and make adding
new statistics easier.

Note for the queue stats, unlike the PF driver we do not keep an array
of queue pointers, but an array of queues, so care must be taken to
avoid accessing queue memory that hasn't yet been allocated.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 .../intel/i40evf/i40e_ethtool_stats.h         | 221 ++++++++++++++++++
 .../ethernet/intel/i40evf/i40evf_ethtool.c    | 137 ++++++-----
 2 files changed, 298 insertions(+), 60 deletions(-)
 create mode 100644 drivers/net/ethernet/intel/i40evf/i40e_ethtool_stats.h

diff --git a/drivers/net/ethernet/intel/i40evf/i40e_ethtool_stats.h b/drivers/net/ethernet/intel/i40evf/i40e_ethtool_stats.h
new file mode 100644
index 000000000000..60b595dd8c39
--- /dev/null
+++ b/drivers/net/ethernet/intel/i40evf/i40e_ethtool_stats.h
@@ -0,0 +1,221 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+/* ethtool statistics helpers */
+
+/**
+ * struct i40e_stats - definition for an ethtool statistic
+ * @stat_string: statistic name to display in ethtool -S output
+ * @sizeof_stat: the sizeof() the stat, must be no greater than sizeof(u64)
+ * @stat_offset: offsetof() the stat from a base pointer
+ *
+ * This structure defines a statistic to be added to the ethtool stats buffer.
+ * It defines a statistic as offset from a common base pointer. Stats should
+ * be defined in constant arrays using the I40E_STAT macro, with every element
+ * of the array using the same _type for calculating the sizeof_stat and
+ * stat_offset.
+ *
+ * The @sizeof_stat is expected to be sizeof(u8), sizeof(u16), sizeof(u32) or
+ * sizeof(u64). Other sizes are not expected and will produce a WARN_ONCE from
+ * the i40e_add_ethtool_stat() helper function.
+ *
+ * The @stat_string is interpreted as a format string, allowing formatted
+ * values to be inserted while looping over multiple structures for a given
+ * statistics array. Thus, every statistic string in an array should have the
+ * same type and number of format specifiers, to be formatted by variadic
+ * arguments to the i40e_add_stat_string() helper function.
+ **/
+struct i40e_stats {
+	char stat_string[ETH_GSTRING_LEN];
+	int sizeof_stat;
+	int stat_offset;
+};
+
+/* Helper macro to define an i40e_stat structure with proper size and type.
+ * Use this when defining constant statistics arrays. Note that @_type expects
+ * only a type name and is used multiple times.
+ */
+#define I40E_STAT(_type, _name, _stat) { \
+	.stat_string = _name, \
+	.sizeof_stat = FIELD_SIZEOF(_type, _stat), \
+	.stat_offset = offsetof(_type, _stat) \
+}
+
+/* Helper macro for defining some statistics directly copied from the netdev
+ * stats structure.
+ */
+#define I40E_NETDEV_STAT(_net_stat) \
+	I40E_STAT(struct rtnl_link_stats64, #_net_stat, _net_stat)
+
+/* Helper macro for defining some statistics related to queues */
+#define I40E_QUEUE_STAT(_name, _stat) \
+	I40E_STAT(struct i40e_ring, _name, _stat)
+
+/* Stats associated with a Tx or Rx ring */
+static const struct i40e_stats i40e_gstrings_queue_stats[] = {
+	I40E_QUEUE_STAT("%s-%u.packets", stats.packets),
+	I40E_QUEUE_STAT("%s-%u.bytes", stats.bytes),
+};
+
+/**
+ * i40evf_add_one_ethtool_stat - copy the stat into the supplied buffer
+ * @data: location to store the stat value
+ * @pointer: basis for where to copy from
+ * @stat: the stat definition
+ *
+ * Copies the stat data defined by the pointer and stat structure pair into
+ * the memory supplied as data. Used to implement i40e_add_ethtool_stats and
+ * i40evf_add_queue_stats. If the pointer is null, data will be zero'd.
+ */
+static inline void
+i40evf_add_one_ethtool_stat(u64 *data, void *pointer,
+			    const struct i40e_stats *stat)
+{
+	char *p;
+
+	if (!pointer) {
+		/* ensure that the ethtool data buffer is zero'd for any stats
+		 * which don't have a valid pointer.
+		 */
+		*data = 0;
+		return;
+	}
+
+	p = (char *)pointer + stat->stat_offset;
+	switch (stat->sizeof_stat) {
+	case sizeof(u64):
+		*data = *((u64 *)p);
+		break;
+	case sizeof(u32):
+		*data = *((u32 *)p);
+		break;
+	case sizeof(u16):
+		*data = *((u16 *)p);
+		break;
+	case sizeof(u8):
+		*data = *((u8 *)p);
+		break;
+	default:
+		WARN_ONCE(1, "unexpected stat size for %s",
+			  stat->stat_string);
+		*data = 0;
+	}
+}
+
+/**
+ * __i40evf_add_ethtool_stats - copy stats into the ethtool supplied buffer
+ * @data: ethtool stats buffer
+ * @pointer: location to copy stats from
+ * @stats: array of stats to copy
+ * @size: the size of the stats definition
+ *
+ * Copy the stats defined by the stats array using the pointer as a base into
+ * the data buffer supplied by ethtool. Updates the data pointer to point to
+ * the next empty location for successive calls to __i40evf_add_ethtool_stats.
+ * If pointer is null, set the data values to zero and update the pointer to
+ * skip these stats.
+ **/
+static inline void
+__i40evf_add_ethtool_stats(u64 **data, void *pointer,
+			   const struct i40e_stats stats[],
+			   const unsigned int size)
+{
+	unsigned int i;
+
+	for (i = 0; i < size; i++)
+		i40evf_add_one_ethtool_stat((*data)++, pointer, &stats[i]);
+}
+
+/**
+ * i40e_add_ethtool_stats - copy stats into ethtool supplied buffer
+ * @data: ethtool stats buffer
+ * @pointer: location where stats are stored
+ * @stats: static const array of stat definitions
+ *
+ * Macro to ease the use of __i40evf_add_ethtool_stats by taking a static
+ * constant stats array and passing the ARRAY_SIZE(). This avoids typos by
+ * ensuring that we pass the size associated with the given stats array.
+ *
+ * The parameter @stats is evaluated twice, so parameters with side effects
+ * should be avoided.
+ **/
+#define i40e_add_ethtool_stats(data, pointer, stats) \
+	__i40evf_add_ethtool_stats(data, pointer, stats, ARRAY_SIZE(stats))
+
+/**
+ * i40evf_add_queue_stats - copy queue statistics into supplied buffer
+ * @data: ethtool stats buffer
+ * @ring: the ring to copy
+ *
+ * Queue statistics must be copied while protected by
+ * u64_stats_fetch_begin_irq, so we can't directly use i40e_add_ethtool_stats.
+ * Assumes that queue stats are defined in i40e_gstrings_queue_stats. If the
+ * ring pointer is null, zero out the queue stat values and update the data
+ * pointer. Otherwise safely copy the stats from the ring into the supplied
+ * buffer and update the data pointer when finished.
+ *
+ * This function expects to be called while under rcu_read_lock().
+ **/
+static inline void
+i40evf_add_queue_stats(u64 **data, struct i40e_ring *ring)
+{
+	const unsigned int size = ARRAY_SIZE(i40e_gstrings_queue_stats);
+	const struct i40e_stats *stats = i40e_gstrings_queue_stats;
+	unsigned int start;
+	unsigned int i;
+
+	/* To avoid invalid statistics values, ensure that we keep retrying
+	 * the copy until we get a consistent value according to
+	 * u64_stats_fetch_retry_irq. But first, make sure our ring is
+	 * non-null before attempting to access its syncp.
+	 */
+	do {
+		start = !ring ? 0 : u64_stats_fetch_begin_irq(&ring->syncp);
+		for (i = 0; i < size; i++) {
+			i40evf_add_one_ethtool_stat(&(*data)[i], ring,
+						    &stats[i]);
+		}
+	} while (ring && u64_stats_fetch_retry_irq(&ring->syncp, start));
+
+	/* Once we successfully copy the stats in, update the data pointer */
+	*data += size;
+}
+
+/**
+ * __i40e_add_stat_strings - copy stat strings into ethtool buffer
+ * @p: ethtool supplied buffer
+ * @stats: stat definitions array
+ * @size: size of the stats array
+ *
+ * Format and copy the strings described by stats into the buffer pointed at
+ * by p.
+ **/
+static void __i40e_add_stat_strings(u8 **p, const struct i40e_stats stats[],
+				    const unsigned int size, ...)
+{
+	unsigned int i;
+
+	for (i = 0; i < size; i++) {
+		va_list args;
+
+		va_start(args, size);
+		vsnprintf(*p, ETH_GSTRING_LEN, stats[i].stat_string, args);
+		*p += ETH_GSTRING_LEN;
+		va_end(args);
+	}
+}
+
+/**
+ * 40e_add_stat_strings - copy stat strings into ethtool buffer
+ * @p: ethtool supplied buffer
+ * @stats: stat definitions array
+ *
+ * Format and copy the strings described by the const static stats value into
+ * the buffer pointed at by p.
+ *
+ * The parameter @stats is evaluated twice, so parameters with side effects
+ * should be avoided. Additionally, stats must be an array such that
+ * ARRAY_SIZE can be called on it.
+ **/
+#define i40e_add_stat_strings(p, stats, ...) \
+	__i40e_add_stat_strings(p, stats, ARRAY_SIZE(stats), ## __VA_ARGS__)
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
index 69efe0aec76a..9319971c5c92 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
@@ -6,18 +6,12 @@
 
 #include <linux/uaccess.h>
 
-struct i40evf_stats {
-	char stat_string[ETH_GSTRING_LEN];
-	int stat_offset;
-};
+#include "i40e_ethtool_stats.h"
 
-#define I40EVF_STAT(_name, _stat) { \
-	.stat_string = _name, \
-	.stat_offset = offsetof(struct i40evf_adapter, _stat) \
-}
+#define I40EVF_STAT(_name, _stat) \
+	I40E_STAT(struct i40evf_adapter, _name, _stat)
 
-/* All stats are u64, so we don't need to track the size of the field. */
-static const struct i40evf_stats i40evf_gstrings_stats[] = {
+static const struct i40e_stats i40evf_gstrings_stats[] = {
 	I40EVF_STAT("rx_bytes", current_stats.rx_bytes),
 	I40EVF_STAT("rx_unicast", current_stats.rx_unicast),
 	I40EVF_STAT("rx_multicast", current_stats.rx_multicast),
@@ -32,13 +26,9 @@ static const struct i40evf_stats i40evf_gstrings_stats[] = {
 	I40EVF_STAT("tx_errors", current_stats.tx_errors),
 };
 
-#define I40EVF_GLOBAL_STATS_LEN ARRAY_SIZE(i40evf_gstrings_stats)
-#define I40EVF_QUEUE_STATS_LEN(_dev) \
-	(((struct i40evf_adapter *)\
-		netdev_priv(_dev))->num_active_queues \
-		  * 2 * (sizeof(struct i40e_queue_stats) / sizeof(u64)))
-#define I40EVF_STATS_LEN(_dev) \
-	(I40EVF_GLOBAL_STATS_LEN + I40EVF_QUEUE_STATS_LEN(_dev))
+#define I40EVF_STATS_LEN	ARRAY_SIZE(i40evf_gstrings_stats)
+
+#define I40EVF_QUEUE_STATS_LEN	ARRAY_SIZE(i40e_gstrings_queue_stats)
 
 /* For now we have one and only one private flag and it is only defined
  * when we have support for the SKIP_CPU_SYNC DMA attribute.  Instead
@@ -117,13 +107,13 @@ static int i40evf_get_link_ksettings(struct net_device *netdev,
  * @netdev: network interface device structure
  * @sset: id of string set
  *
- * Reports size of string table. This driver only supports
- * strings for statistics.
+ * Reports size of various string tables.
  **/
 static int i40evf_get_sset_count(struct net_device *netdev, int sset)
 {
 	if (sset == ETH_SS_STATS)
-		return I40EVF_STATS_LEN(netdev);
+		return I40EVF_STATS_LEN +
+			(I40EVF_QUEUE_STATS_LEN * 2 * I40EVF_MAX_REQ_QUEUES);
 	else if (sset == ETH_SS_PRIV_FLAGS)
 		return I40EVF_PRIV_FLAGS_STR_LEN;
 	else
@@ -142,20 +132,66 @@ static void i40evf_get_ethtool_stats(struct net_device *netdev,
 				     struct ethtool_stats *stats, u64 *data)
 {
 	struct i40evf_adapter *adapter = netdev_priv(netdev);
-	unsigned int i, j;
-	char *p;
+	unsigned int i;
+
+	i40e_add_ethtool_stats(&data, adapter, i40evf_gstrings_stats);
+
+	rcu_read_lock();
+	for (i = 0; i < I40EVF_MAX_REQ_QUEUES; i++) {
+		struct i40e_ring *ring;
 
-	for (i = 0; i < I40EVF_GLOBAL_STATS_LEN; i++) {
-		p = (char *)adapter + i40evf_gstrings_stats[i].stat_offset;
-		data[i] =  *(u64 *)p;
+		/* Avoid accessing un-allocated queues */
+		ring = (i < adapter->num_active_queues ?
+			&adapter->tx_rings[i] : NULL);
+		i40evf_add_queue_stats(&data, ring);
+
+		/* Avoid accessing un-allocated queues */
+		ring = (i < adapter->num_active_queues ?
+			&adapter->rx_rings[i] : NULL);
+		i40evf_add_queue_stats(&data, ring);
 	}
-	for (j = 0; j < adapter->num_active_queues; j++) {
-		data[i++] = adapter->tx_rings[j].stats.packets;
-		data[i++] = adapter->tx_rings[j].stats.bytes;
+	rcu_read_unlock();
+}
+
+/**
+ * i40evf_get_priv_flag_strings - Get private flag strings
+ * @netdev: network interface device structure
+ * @data: buffer for string data
+ *
+ * Builds the private flags string table
+ **/
+static void i40evf_get_priv_flag_strings(struct net_device *netdev, u8 *data)
+{
+	unsigned int i;
+
+	for (i = 0; i < I40EVF_PRIV_FLAGS_STR_LEN; i++) {
+		snprintf(data, ETH_GSTRING_LEN, "%s",
+			 i40evf_gstrings_priv_flags[i].flag_string);
+		data += ETH_GSTRING_LEN;
 	}
-	for (j = 0; j < adapter->num_active_queues; j++) {
-		data[i++] = adapter->rx_rings[j].stats.packets;
-		data[i++] = adapter->rx_rings[j].stats.bytes;
+}
+
+/**
+ * i40evf_get_stat_strings - Get stat strings
+ * @netdev: network interface device structure
+ * @data: buffer for string data
+ *
+ * Builds the statistics string table
+ **/
+static void i40evf_get_stat_strings(struct net_device *netdev, u8 *data)
+{
+	unsigned int i;
+
+	i40e_add_stat_strings(&data, i40evf_gstrings_stats);
+
+	/* Queues are always allocated in pairs, so we just use num_tx_queues
+	 * for both Tx and Rx queues.
+	 */
+	for (i = 0; i < netdev->num_tx_queues; i++) {
+		i40e_add_stat_strings(&data, i40e_gstrings_queue_stats,
+				      "tx", i);
+		i40e_add_stat_strings(&data, i40e_gstrings_queue_stats,
+				      "rx", i);
 	}
 }
 
@@ -165,38 +201,19 @@ static void i40evf_get_ethtool_stats(struct net_device *netdev,
  * @sset: id of string set
  * @data: buffer for string data
  *
- * Builds stats string table.
+ * Builds string tables for various string sets
  **/
 static void i40evf_get_strings(struct net_device *netdev, u32 sset, u8 *data)
 {
-	struct i40evf_adapter *adapter = netdev_priv(netdev);
-	u8 *p = data;
-	int i;
-
-	if (sset == ETH_SS_STATS) {
-		for (i = 0; i < (int)I40EVF_GLOBAL_STATS_LEN; i++) {
-			memcpy(p, i40evf_gstrings_stats[i].stat_string,
-			       ETH_GSTRING_LEN);
-			p += ETH_GSTRING_LEN;
-		}
-		for (i = 0; i < adapter->num_active_queues; i++) {
-			snprintf(p, ETH_GSTRING_LEN, "tx-%u.packets", i);
-			p += ETH_GSTRING_LEN;
-			snprintf(p, ETH_GSTRING_LEN, "tx-%u.bytes", i);
-			p += ETH_GSTRING_LEN;
-		}
-		for (i = 0; i < adapter->num_active_queues; i++) {
-			snprintf(p, ETH_GSTRING_LEN, "rx-%u.packets", i);
-			p += ETH_GSTRING_LEN;
-			snprintf(p, ETH_GSTRING_LEN, "rx-%u.bytes", i);
-			p += ETH_GSTRING_LEN;
-		}
-	} else if (sset == ETH_SS_PRIV_FLAGS) {
-		for (i = 0; i < I40EVF_PRIV_FLAGS_STR_LEN; i++) {
-			snprintf(p, ETH_GSTRING_LEN, "%s",
-				 i40evf_gstrings_priv_flags[i].flag_string);
-			p += ETH_GSTRING_LEN;
-		}
+	switch (sset) {
+	case ETH_SS_STATS:
+		i40evf_get_stat_strings(netdev, data);
+		break;
+	case ETH_SS_PRIV_FLAGS:
+		i40evf_get_priv_flag_strings(netdev, data);
+		break;
+	default:
+		break;
 	}
 }
 
-- 
2.17.1

^ permalink raw reply related

* [net-next v2 10/15] i40e: report correct statistics when XDP is enabled
From: Jeff Kirsher @ 2018-08-30 21:11 UTC (permalink / raw)
  To: davem
  Cc: Björn Töpel, netdev, nhorman, sassmann, jogreene,
	Jeff Kirsher
In-Reply-To: <20180830211147.17008-1-jeffrey.t.kirsher@intel.com>

From: Björn Töpel <bjorn.topel@intel.com>

When XDP is enabled, the driver will report incorrect
statistics. Received frames will reported as transmitted frames.

This commits fixes the i40e implementation of ndo_get_stats64 (struct
net_device_ops), so that iproute2 will report correct statistics
(e.g. when running "ip -stats link show dev eth0") even when XDP is
enabled.

Reported-by: Jesper Dangaard Brouer <brouer@redhat.com>
Fixes: 74608d17fe29 ("i40e: add support for XDP_TX action")
Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_main.c | 24 +++++++++++----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 62f2b5bce6bb..6cb4076e8fba 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -420,9 +420,9 @@ static void i40e_get_netdev_stats_struct(struct net_device *netdev,
 				  struct rtnl_link_stats64 *stats)
 {
 	struct i40e_netdev_priv *np = netdev_priv(netdev);
-	struct i40e_ring *tx_ring, *rx_ring;
 	struct i40e_vsi *vsi = np->vsi;
 	struct rtnl_link_stats64 *vsi_stats = i40e_get_vsi_stats_struct(vsi);
+	struct i40e_ring *ring;
 	int i;
 
 	if (test_bit(__I40E_VSI_DOWN, vsi->state))
@@ -436,24 +436,26 @@ static void i40e_get_netdev_stats_struct(struct net_device *netdev,
 		u64 bytes, packets;
 		unsigned int start;
 
-		tx_ring = READ_ONCE(vsi->tx_rings[i]);
-		if (!tx_ring)
+		ring = READ_ONCE(vsi->tx_rings[i]);
+		if (!ring)
 			continue;
-		i40e_get_netdev_stats_struct_tx(tx_ring, stats);
+		i40e_get_netdev_stats_struct_tx(ring, stats);
 
-		rx_ring = &tx_ring[1];
+		if (i40e_enabled_xdp_vsi(vsi)) {
+			ring++;
+			i40e_get_netdev_stats_struct_tx(ring, stats);
+		}
 
+		ring++;
 		do {
-			start = u64_stats_fetch_begin_irq(&rx_ring->syncp);
-			packets = rx_ring->stats.packets;
-			bytes   = rx_ring->stats.bytes;
-		} while (u64_stats_fetch_retry_irq(&rx_ring->syncp, start));
+			start   = u64_stats_fetch_begin_irq(&ring->syncp);
+			packets = ring->stats.packets;
+			bytes   = ring->stats.bytes;
+		} while (u64_stats_fetch_retry_irq(&ring->syncp, start));
 
 		stats->rx_packets += packets;
 		stats->rx_bytes   += bytes;
 
-		if (i40e_enabled_xdp_vsi(vsi))
-			i40e_get_netdev_stats_struct_tx(&rx_ring[1], stats);
 	}
 	rcu_read_unlock();
 
-- 
2.17.1

^ permalink raw reply related

* [net-next v2 12/15] i40evf: Don't enable vlan stripping when rx offload is turned on
From: Jeff Kirsher @ 2018-08-30 21:11 UTC (permalink / raw)
  To: davem; +Cc: Patryk Małek, netdev, nhorman, sassmann, jogreene,
	Jeff Kirsher
In-Reply-To: <20180830211147.17008-1-jeffrey.t.kirsher@intel.com>

From: Patryk Małek <patryk.malek@intel.com>

With current implementation of i40evf_set_features when user sets
any offload via ethtool we set I40EVF_FLAG_AQ_ENABLE_VLAN_STRIPPING
as a required aq which triggers driver to call
i40evf_enable_vlan_stripping. This shouldn't take place.
This patches fixes it by setting the flag only when VLAN offload
is turned on.

Signed-off-by: Patryk Małek <patryk.malek@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40evf/i40evf_main.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
index 07f187b5bcac..c7048cf484fb 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
@@ -3120,18 +3120,19 @@ static int i40evf_set_features(struct net_device *netdev,
 {
 	struct i40evf_adapter *adapter = netdev_priv(netdev);
 
-	/* Don't allow changing VLAN_RX flag when VLAN is set for VF
-	 * and return an error in this case
+	/* Don't allow changing VLAN_RX flag when adapter is not capable
+	 * of VLAN offload
 	 */
-	if (VLAN_ALLOWED(adapter)) {
+	if (!VLAN_ALLOWED(adapter)) {
+		if ((netdev->features ^ features) & NETIF_F_HW_VLAN_CTAG_RX)
+			return -EINVAL;
+	} else if ((netdev->features ^ features) & NETIF_F_HW_VLAN_CTAG_RX) {
 		if (features & NETIF_F_HW_VLAN_CTAG_RX)
 			adapter->aq_required |=
 				I40EVF_FLAG_AQ_ENABLE_VLAN_STRIPPING;
 		else
 			adapter->aq_required |=
 				I40EVF_FLAG_AQ_DISABLE_VLAN_STRIPPING;
-	} else if ((netdev->features ^ features) & NETIF_F_HW_VLAN_CTAG_RX) {
-		return -EINVAL;
 	}
 
 	return 0;
-- 
2.17.1

^ permalink raw reply related

* [net-next v2 02/15] i40e: move ethtool stats boiler plate code to i40e_ethtool_stats.h
From: Jeff Kirsher @ 2018-08-30 21:11 UTC (permalink / raw)
  To: davem; +Cc: Jacob Keller, netdev, nhorman, sassmann, jogreene, Jeff Kirsher
In-Reply-To: <20180830211147.17008-1-jeffrey.t.kirsher@intel.com>

From: Jacob Keller <jacob.e.keller@intel.com>

Move the boiler plate structures and helper functions we recently
added into their own header file, so that the complete collection is
located together.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
v2: Based on feedback from David Miller, inline the
    __i40e_add_stat_strings function

 .../net/ethernet/intel/i40e/i40e_ethtool.c    | 182 +--------------
 .../ethernet/intel/i40e/i40e_ethtool_stats.h  | 221 ++++++++++++++++++
 2 files changed, 222 insertions(+), 181 deletions(-)
 create mode 100644 drivers/net/ethernet/intel/i40e/i40e_ethtool_stats.h

diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 235012b3bd42..d7d3974beca2 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -6,25 +6,8 @@
 #include "i40e.h"
 #include "i40e_diag.h"
 
-struct i40e_stats {
-	/* The stat_string is expected to be a format string formatted using
-	 * vsnprintf by i40e_add_stat_strings. Every member of a stats array
-	 * should use the same format specifiers as they will be formatted
-	 * using the same variadic arguments.
-	 */
-	char stat_string[ETH_GSTRING_LEN];
-	int sizeof_stat;
-	int stat_offset;
-};
-
-#define I40E_STAT(_type, _name, _stat) { \
-	.stat_string = _name, \
-	.sizeof_stat = FIELD_SIZEOF(_type, _stat), \
-	.stat_offset = offsetof(_type, _stat) \
-}
+#include "i40e_ethtool_stats.h"
 
-#define I40E_NETDEV_STAT(_net_stat) \
-	I40E_STAT(struct rtnl_link_stats64, #_net_stat, _net_stat)
 #define I40E_PF_STAT(_name, _stat) \
 	I40E_STAT(struct i40e_pf, _name, _stat)
 #define I40E_VSI_STAT(_name, _stat) \
@@ -173,11 +156,6 @@ static const struct i40e_stats i40e_gstrings_pfc_stats[] = {
 	I40E_PFC_STAT("port.rx_priority_%u_xon_2_xoff", priority_xon_2_xoff),
 };
 
-static const struct i40e_stats i40e_gstrings_queue_stats[] = {
-	I40E_QUEUE_STAT("%s-%u.packets", stats.packets),
-	I40E_QUEUE_STAT("%s-%u.bytes", stats.bytes),
-};
-
 #define I40E_NETDEV_STATS_LEN	ARRAY_SIZE(i40e_gstrings_net_stats)
 
 #define I40E_MISC_STATS_LEN	ARRAY_SIZE(i40e_gstrings_misc_stats)
@@ -1749,128 +1727,6 @@ static int i40e_get_sset_count(struct net_device *netdev, int sset)
 	}
 }
 
-/**
- * i40e_add_one_ethtool_stat - copy the stat into the supplied buffer
- * @data: location to store the stat value
- * @pointer: basis for where to copy from
- * @stat: the stat definition
- *
- * Copies the stat data defined by the pointer and stat structure pair into
- * the memory supplied as data. Used to implement i40e_add_ethtool_stats and
- * i40e_add_queue_stats. If the pointer is null, data will be zero'd.
- */
-static inline void
-i40e_add_one_ethtool_stat(u64 *data, void *pointer,
-			  const struct i40e_stats *stat)
-{
-	char *p;
-
-	if (!pointer) {
-		/* ensure that the ethtool data buffer is zero'd for any stats
-		 * which don't have a valid pointer.
-		 */
-		*data = 0;
-		return;
-	}
-
-	p = (char *)pointer + stat->stat_offset;
-	switch (stat->sizeof_stat) {
-	case sizeof(u64):
-		*data = *((u64 *)p);
-		break;
-	case sizeof(u32):
-		*data = *((u32 *)p);
-		break;
-	case sizeof(u16):
-		*data = *((u16 *)p);
-		break;
-	case sizeof(u8):
-		*data = *((u8 *)p);
-		break;
-	default:
-		WARN_ONCE(1, "unexpected stat size for %s",
-			  stat->stat_string);
-		*data = 0;
-	}
-}
-
-/**
- * __i40e_add_ethtool_stats - copy stats into the ethtool supplied buffer
- * @data: ethtool stats buffer
- * @pointer: location to copy stats from
- * @stats: array of stats to copy
- * @size: the size of the stats definition
- *
- * Copy the stats defined by the stats array using the pointer as a base into
- * the data buffer supplied by ethtool. Updates the data pointer to point to
- * the next empty location for successive calls to __i40e_add_ethtool_stats.
- * If pointer is null, set the data values to zero and update the pointer to
- * skip these stats.
- **/
-static inline void
-__i40e_add_ethtool_stats(u64 **data, void *pointer,
-			 const struct i40e_stats stats[],
-			 const unsigned int size)
-{
-	unsigned int i;
-
-	for (i = 0; i < size; i++)
-		i40e_add_one_ethtool_stat((*data)++, pointer, &stats[i]);
-}
-
-/**
- * i40e_add_ethtool_stats - copy stats into ethtool supplied buffer
- * @data: ethtool stats buffer
- * @pointer: location where stats are stored
- * @stats: static const array of stat definitions
- *
- * Macro to ease the use of __i40e_add_ethtool_stats by taking a static
- * constant stats array and passing the ARRAY_SIZE(). This avoids typos by
- * ensuring that we pass the size associated with the given stats array.
- * Assumes that stats is an array.
- **/
-#define i40e_add_ethtool_stats(data, pointer, stats) \
-	__i40e_add_ethtool_stats(data, pointer, stats, ARRAY_SIZE(stats))
-
-/**
- * i40e_add_queue_stats - copy queue statistics into supplied buffer
- * @data: ethtool stats buffer
- * @ring: the ring to copy
- *
- * Queue statistics must be copied while protected by
- * u64_stats_fetch_begin_irq, so we can't directly use i40e_add_ethtool_stats.
- * Assumes that queue stats are defined in i40e_gstrings_queue_stats. If the
- * ring pointer is null, zero out the queue stat values and update the data
- * pointer. Otherwise safely copy the stats from the ring into the supplied
- * buffer and update the data pointer when finished.
- *
- * This function expects to be called while under rcu_read_lock().
- **/
-static inline void
-i40e_add_queue_stats(u64 **data, struct i40e_ring *ring)
-{
-	const unsigned int size = ARRAY_SIZE(i40e_gstrings_queue_stats);
-	const struct i40e_stats *stats = i40e_gstrings_queue_stats;
-	unsigned int start;
-	unsigned int i;
-
-	/* To avoid invalid statistics values, ensure that we keep retrying
-	 * the copy until we get a consistent value according to
-	 * u64_stats_fetch_retry_irq. But first, make sure our ring is
-	 * non-null before attempting to access its syncp.
-	 */
-	do {
-		start = !ring ? 0 : u64_stats_fetch_begin_irq(&ring->syncp);
-		for (i = 0; i < size; i++) {
-			i40e_add_one_ethtool_stat(&(*data)[i], ring,
-						  &stats[i]);
-		}
-	} while (ring && u64_stats_fetch_retry_irq(&ring->syncp, start));
-
-	/* Once we successfully copy the stats in, update the data pointer */
-	data += size;
-}
-
 /**
  * i40e_get_pfc_stats - copy HW PFC statistics to formatted structure
  * @pf: the PF device structure
@@ -1965,42 +1821,6 @@ static void i40e_get_ethtool_stats(struct net_device *netdev,
 		  "ethtool stats count mismatch!");
 }
 
-/**
- * __i40e_add_stat_strings - copy stat strings into ethtool buffer
- * @p: ethtool supplied buffer
- * @stats: stat definitions array
- * @size: size of the stats array
- *
- * Format and copy the strings described by stats into the buffer pointed at
- * by p.
- **/
-static void __i40e_add_stat_strings(u8 **p, const struct i40e_stats stats[],
-				    const unsigned int size, ...)
-{
-	unsigned int i;
-
-	for (i = 0; i < size; i++) {
-		va_list args;
-
-		va_start(args, size);
-		vsnprintf(*p, ETH_GSTRING_LEN, stats[i].stat_string, args);
-		*p += ETH_GSTRING_LEN;
-		va_end(args);
-	}
-}
-
-/**
- * 40e_add_stat_strings - copy stat strings into ethtool buffer
- * @p: ethtool supplied buffer
- * @stats: stat definitions array
- *
- * Format and copy the strings described by the const static stats value into
- * the buffer pointed at by p. Assumes that stats can have ARRAY_SIZE called
- * for it.
- **/
-#define i40e_add_stat_strings(p, stats, ...) \
-	__i40e_add_stat_strings(p, stats, ARRAY_SIZE(stats), ## __VA_ARGS__)
-
 /**
  * i40e_get_stat_strings - copy stat strings into supplied buffer
  * @netdev: the netdev to collect strings for
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool_stats.h b/drivers/net/ethernet/intel/i40e/i40e_ethtool_stats.h
new file mode 100644
index 000000000000..bba1cb0b658f
--- /dev/null
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool_stats.h
@@ -0,0 +1,221 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+/* ethtool statistics helpers */
+
+/**
+ * struct i40e_stats - definition for an ethtool statistic
+ * @stat_string: statistic name to display in ethtool -S output
+ * @sizeof_stat: the sizeof() the stat, must be no greater than sizeof(u64)
+ * @stat_offset: offsetof() the stat from a base pointer
+ *
+ * This structure defines a statistic to be added to the ethtool stats buffer.
+ * It defines a statistic as offset from a common base pointer. Stats should
+ * be defined in constant arrays using the I40E_STAT macro, with every element
+ * of the array using the same _type for calculating the sizeof_stat and
+ * stat_offset.
+ *
+ * The @sizeof_stat is expected to be sizeof(u8), sizeof(u16), sizeof(u32) or
+ * sizeof(u64). Other sizes are not expected and will produce a WARN_ONCE from
+ * the i40e_add_ethtool_stat() helper function.
+ *
+ * The @stat_string is interpreted as a format string, allowing formatted
+ * values to be inserted while looping over multiple structures for a given
+ * statistics array. Thus, every statistic string in an array should have the
+ * same type and number of format specifiers, to be formatted by variadic
+ * arguments to the i40e_add_stat_string() helper function.
+ **/
+struct i40e_stats {
+	char stat_string[ETH_GSTRING_LEN];
+	int sizeof_stat;
+	int stat_offset;
+};
+
+/* Helper macro to define an i40e_stat structure with proper size and type.
+ * Use this when defining constant statistics arrays. Note that @_type expects
+ * only a type name and is used multiple times.
+ */
+#define I40E_STAT(_type, _name, _stat) { \
+	.stat_string = _name, \
+	.sizeof_stat = FIELD_SIZEOF(_type, _stat), \
+	.stat_offset = offsetof(_type, _stat) \
+}
+
+/* Helper macro for defining some statistics directly copied from the netdev
+ * stats structure.
+ */
+#define I40E_NETDEV_STAT(_net_stat) \
+	I40E_STAT(struct rtnl_link_stats64, #_net_stat, _net_stat)
+
+/* Helper macro for defining some statistics related to queues */
+#define I40E_QUEUE_STAT(_name, _stat) \
+	I40E_STAT(struct i40e_ring, _name, _stat)
+
+/* Stats associated with a Tx or Rx ring */
+static const struct i40e_stats i40e_gstrings_queue_stats[] = {
+	I40E_QUEUE_STAT("%s-%u.packets", stats.packets),
+	I40E_QUEUE_STAT("%s-%u.bytes", stats.bytes),
+};
+
+/**
+ * i40e_add_one_ethtool_stat - copy the stat into the supplied buffer
+ * @data: location to store the stat value
+ * @pointer: basis for where to copy from
+ * @stat: the stat definition
+ *
+ * Copies the stat data defined by the pointer and stat structure pair into
+ * the memory supplied as data. Used to implement i40e_add_ethtool_stats and
+ * i40e_add_queue_stats. If the pointer is null, data will be zero'd.
+ */
+static inline void
+i40e_add_one_ethtool_stat(u64 *data, void *pointer,
+			  const struct i40e_stats *stat)
+{
+	char *p;
+
+	if (!pointer) {
+		/* ensure that the ethtool data buffer is zero'd for any stats
+		 * which don't have a valid pointer.
+		 */
+		*data = 0;
+		return;
+	}
+
+	p = (char *)pointer + stat->stat_offset;
+	switch (stat->sizeof_stat) {
+	case sizeof(u64):
+		*data = *((u64 *)p);
+		break;
+	case sizeof(u32):
+		*data = *((u32 *)p);
+		break;
+	case sizeof(u16):
+		*data = *((u16 *)p);
+		break;
+	case sizeof(u8):
+		*data = *((u8 *)p);
+		break;
+	default:
+		WARN_ONCE(1, "unexpected stat size for %s",
+			  stat->stat_string);
+		*data = 0;
+	}
+}
+
+/**
+ * __i40e_add_ethtool_stats - copy stats into the ethtool supplied buffer
+ * @data: ethtool stats buffer
+ * @pointer: location to copy stats from
+ * @stats: array of stats to copy
+ * @size: the size of the stats definition
+ *
+ * Copy the stats defined by the stats array using the pointer as a base into
+ * the data buffer supplied by ethtool. Updates the data pointer to point to
+ * the next empty location for successive calls to __i40e_add_ethtool_stats.
+ * If pointer is null, set the data values to zero and update the pointer to
+ * skip these stats.
+ **/
+static inline void
+__i40e_add_ethtool_stats(u64 **data, void *pointer,
+			 const struct i40e_stats stats[],
+			 const unsigned int size)
+{
+	unsigned int i;
+
+	for (i = 0; i < size; i++)
+		i40e_add_one_ethtool_stat((*data)++, pointer, &stats[i]);
+}
+
+/**
+ * i40e_add_ethtool_stats - copy stats into ethtool supplied buffer
+ * @data: ethtool stats buffer
+ * @pointer: location where stats are stored
+ * @stats: static const array of stat definitions
+ *
+ * Macro to ease the use of __i40e_add_ethtool_stats by taking a static
+ * constant stats array and passing the ARRAY_SIZE(). This avoids typos by
+ * ensuring that we pass the size associated with the given stats array.
+ *
+ * The parameter @stats is evaluated twice, so parameters with side effects
+ * should be avoided.
+ **/
+#define i40e_add_ethtool_stats(data, pointer, stats) \
+	__i40e_add_ethtool_stats(data, pointer, stats, ARRAY_SIZE(stats))
+
+/**
+ * i40e_add_queue_stats - copy queue statistics into supplied buffer
+ * @data: ethtool stats buffer
+ * @ring: the ring to copy
+ *
+ * Queue statistics must be copied while protected by
+ * u64_stats_fetch_begin_irq, so we can't directly use i40e_add_ethtool_stats.
+ * Assumes that queue stats are defined in i40e_gstrings_queue_stats. If the
+ * ring pointer is null, zero out the queue stat values and update the data
+ * pointer. Otherwise safely copy the stats from the ring into the supplied
+ * buffer and update the data pointer when finished.
+ *
+ * This function expects to be called while under rcu_read_lock().
+ **/
+static inline void
+i40e_add_queue_stats(u64 **data, struct i40e_ring *ring)
+{
+	const unsigned int size = ARRAY_SIZE(i40e_gstrings_queue_stats);
+	const struct i40e_stats *stats = i40e_gstrings_queue_stats;
+	unsigned int start;
+	unsigned int i;
+
+	/* To avoid invalid statistics values, ensure that we keep retrying
+	 * the copy until we get a consistent value according to
+	 * u64_stats_fetch_retry_irq. But first, make sure our ring is
+	 * non-null before attempting to access its syncp.
+	 */
+	do {
+		start = !ring ? 0 : u64_stats_fetch_begin_irq(&ring->syncp);
+		for (i = 0; i < size; i++) {
+			i40e_add_one_ethtool_stat(&(*data)[i], ring,
+						  &stats[i]);
+		}
+	} while (ring && u64_stats_fetch_retry_irq(&ring->syncp, start));
+
+	/* Once we successfully copy the stats in, update the data pointer */
+	*data += size;
+}
+
+/**
+ * __i40e_add_stat_strings - copy stat strings into ethtool buffer
+ * @p: ethtool supplied buffer
+ * @stats: stat definitions array
+ * @size: size of the stats array
+ *
+ * Format and copy the strings described by stats into the buffer pointed at
+ * by p.
+ **/
+static inline void __i40e_add_stat_strings(u8 **p, const struct i40e_stats stats[],
+				    const unsigned int size, ...)
+{
+	unsigned int i;
+
+	for (i = 0; i < size; i++) {
+		va_list args;
+
+		va_start(args, size);
+		vsnprintf(*p, ETH_GSTRING_LEN, stats[i].stat_string, args);
+		*p += ETH_GSTRING_LEN;
+		va_end(args);
+	}
+}
+
+/**
+ * 40e_add_stat_strings - copy stat strings into ethtool buffer
+ * @p: ethtool supplied buffer
+ * @stats: stat definitions array
+ *
+ * Format and copy the strings described by the const static stats value into
+ * the buffer pointed at by p.
+ *
+ * The parameter @stats is evaluated twice, so parameters with side effects
+ * should be avoided. Additionally, stats must be an array such that
+ * ARRAY_SIZE can be called on it.
+ **/
+#define i40e_add_stat_strings(p, stats, ...) \
+	__i40e_add_stat_strings(p, stats, ARRAY_SIZE(stats), ## __VA_ARGS__)
-- 
2.17.1

^ permalink raw reply related

* [net-next v2 14/15] i40evf: cancel workqueue sync for adminq when a VF is removed
From: Jeff Kirsher @ 2018-08-30 21:11 UTC (permalink / raw)
  To: davem; +Cc: Lihong Yang, netdev, nhorman, sassmann, jogreene, Jeff Kirsher
In-Reply-To: <20180830211147.17008-1-jeffrey.t.kirsher@intel.com>

From: Lihong Yang <lihong.yang@intel.com>

If a VF is being removed, there is no need to continue with the
workqueue sync for the adminq task, thus cancel it. Without this call,
when VFs are created and removed right away, there might be a chance for
the driver to crash with events stuck in the adminq.

Signed-off-by: Lihong Yang <lihong.yang@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40evf/i40evf_main.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
index c7048cf484fb..174d1da2857b 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
@@ -3910,6 +3910,8 @@ static void i40evf_remove(struct pci_dev *pdev)
 	if (adapter->watchdog_timer.function)
 		del_timer_sync(&adapter->watchdog_timer);
 
+	cancel_work_sync(&adapter->adminq_task);
+
 	i40evf_free_rss(adapter);
 
 	if (hw->aq.asq.count)
-- 
2.17.1

^ permalink raw reply related

* [net-next v2 04/15] i40evf: Change a VF mac without reloading the VF driver
From: Jeff Kirsher @ 2018-08-30 21:11 UTC (permalink / raw)
  To: davem
  Cc: Paweł Jabłoński, netdev, nhorman, sassmann,
	jogreene, Jeff Kirsher
In-Reply-To: <20180830211147.17008-1-jeffrey.t.kirsher@intel.com>

From: Paweł Jabłoński <pawel.jablonski@intel.com>

Add possibility to change a VF mac address from host side
without reloading the VF driver on the guest side. Without
this patch it is not possible to change the VF mac because
executing i40evf_virtchnl_completion function with
VIRTCHNL_OP_GET_VF_RESOURCES opcode resets the VF mac
address to previous value.

Signed-off-by: Paweł Jabłoński <pawel.jablonski@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c  |  8 +++++---
 drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c | 11 +++++++++--
 2 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index c6d24eaede18..f56c645588f3 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -2458,7 +2458,7 @@ static inline int i40e_check_vf_permission(struct i40e_vf *vf,
 		    !is_multicast_ether_addr(addr) && vf->pf_set_mac &&
 		    !ether_addr_equal(addr, vf->default_lan_addr.addr)) {
 			dev_err(&pf->pdev->dev,
-				"VF attempting to override administratively set MAC address, reload the VF driver to resume normal operation\n");
+				"VF attempting to override administratively set MAC address, bring down and up the VF interface to resume normal operation\n");
 			return -EPERM;
 		}
 	}
@@ -3873,9 +3873,11 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
 			 mac, vf_id);
 	}
 
-	/* Force the VF driver stop so it has to reload with new MAC address */
+	/* Force the VF interface down so it has to bring up with new MAC
+	 * address
+	 */
 	i40e_vc_disable_vf(vf);
-	dev_info(&pf->pdev->dev, "Reload the VF driver to make this change effective.\n");
+	dev_info(&pf->pdev->dev, "Bring down and up the VF interface to make this change effective.\n");
 
 error_param:
 	return ret;
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
index 565677de5ba3..79b7be83b5c3 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
@@ -1330,8 +1330,15 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
 			  sizeof(struct virtchnl_vsi_resource);
 		memcpy(adapter->vf_res, msg, min(msglen, len));
 		i40e_vf_parse_hw_config(&adapter->hw, adapter->vf_res);
-		/* restore current mac address */
-		ether_addr_copy(adapter->hw.mac.addr, netdev->dev_addr);
+		if (is_zero_ether_addr(adapter->hw.mac.addr)) {
+			/* restore current mac address */
+			ether_addr_copy(adapter->hw.mac.addr, netdev->dev_addr);
+		} else {
+			/* refresh current mac address if changed */
+			ether_addr_copy(netdev->dev_addr, adapter->hw.mac.addr);
+			ether_addr_copy(netdev->perm_addr,
+					adapter->hw.mac.addr);
+		}
 		i40evf_process_config(adapter);
 		}
 		break;
-- 
2.17.1

^ permalink raw reply related

* [net-next v2 07/15] i40evf: set IFF_UNICAST_FLT flag for the VF
From: Jeff Kirsher @ 2018-08-30 21:11 UTC (permalink / raw)
  To: davem; +Cc: Lihong Yang, netdev, nhorman, sassmann, jogreene, Jeff Kirsher
In-Reply-To: <20180830211147.17008-1-jeffrey.t.kirsher@intel.com>

From: Lihong Yang <lihong.yang@intel.com>

Set IFF_UNICAST_FLT flag for the VF to prevent it from entering
promiscuous mode when macvlan is added to the VF.

Signed-off-by: Lihong Yang <lihong.yang@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40evf/i40evf_main.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
index 5906c1c1d19d..07f187b5bcac 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
@@ -3358,6 +3358,8 @@ int i40evf_process_config(struct i40evf_adapter *adapter)
 	if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN)
 		netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
 
+	netdev->priv_flags |= IFF_UNICAST_FLT;
+
 	/* Do not turn on offloads when they are requested to be turned off.
 	 * TSO needs minimum 576 bytes to work correctly.
 	 */
-- 
2.17.1

^ permalink raw reply related

* [net-next v2 13/15] i40e: hold the rtnl lock on clearing interrupt scheme
From: Jeff Kirsher @ 2018-08-30 21:11 UTC (permalink / raw)
  To: davem; +Cc: Patryk Małek, netdev, nhorman, sassmann, jogreene,
	Jeff Kirsher
In-Reply-To: <20180830211147.17008-1-jeffrey.t.kirsher@intel.com>

From: Patryk Małek <patryk.malek@intel.com>

Hold the rtnl lock when we're clearing interrupt scheme
in i40e_shutdown and in i40e_remove.

Signed-off-by: Patryk Małek <patryk.malek@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_main.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index c30feb27e1c0..112245f32d7d 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -14182,6 +14182,7 @@ static void i40e_remove(struct pci_dev *pdev)
 	mutex_destroy(&hw->aq.asq_mutex);
 
 	/* Clear all dynamic memory lists of rings, q_vectors, and VSIs */
+	rtnl_lock();
 	i40e_clear_interrupt_scheme(pf);
 	for (i = 0; i < pf->num_alloc_vsi; i++) {
 		if (pf->vsi[i]) {
@@ -14190,6 +14191,7 @@ static void i40e_remove(struct pci_dev *pdev)
 			pf->vsi[i] = NULL;
 		}
 	}
+	rtnl_unlock();
 
 	for (i = 0; i < I40E_MAX_VEB; i++) {
 		kfree(pf->veb[i]);
@@ -14401,7 +14403,13 @@ static void i40e_shutdown(struct pci_dev *pdev)
 	wr32(hw, I40E_PFPM_WUFC,
 	     (pf->wol_en ? I40E_PFPM_WUFC_MAG_MASK : 0));
 
+	/* Since we're going to destroy queues during the
+	 * i40e_clear_interrupt_scheme() we should hold the RTNL lock for this
+	 * whole section
+	 */
+	rtnl_lock();
 	i40e_clear_interrupt_scheme(pf);
+	rtnl_unlock();
 
 	if (system_state == SYSTEM_POWER_OFF) {
 		pci_wake_from_d3(pdev, pf->wol_en);
-- 
2.17.1

^ permalink raw reply related

* [net-next v2 11/15] i40e: Check and correct speed values for link on open
From: Jeff Kirsher @ 2018-08-30 21:11 UTC (permalink / raw)
  To: davem; +Cc: Jan Sokolowski, netdev, nhorman, sassmann, jogreene, Jeff Kirsher
In-Reply-To: <20180830211147.17008-1-jeffrey.t.kirsher@intel.com>

From: Jan Sokolowski <jan.sokolowski@intel.com>

If our card has been put in an unstable state due to
other drivers interacting with it, speed settings
might be incorrect. If incorrect, forcefully reset them
on open to known default values.

Signed-off-by: Jan Sokolowski <jan.sokolowski@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_main.c | 27 ++++++++++++++++++---
 1 file changed, 24 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 6cb4076e8fba..c30feb27e1c0 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -6570,6 +6570,24 @@ static i40e_status i40e_force_link_state(struct i40e_pf *pf, bool is_up)
 	struct i40e_hw *hw = &pf->hw;
 	i40e_status err;
 	u64 mask;
+	u8 speed;
+
+	/* Card might've been put in an unstable state by other drivers
+	 * and applications, which causes incorrect speed values being
+	 * set on startup. In order to clear speed registers, we call
+	 * get_phy_capabilities twice, once to get initial state of
+	 * available speeds, and once to get current PHY config.
+	 */
+	err = i40e_aq_get_phy_capabilities(hw, false, true, &abilities,
+					   NULL);
+	if (err) {
+		dev_err(&pf->pdev->dev,
+			"failed to get phy cap., ret =  %s last_status =  %s\n",
+			i40e_stat_str(hw, err),
+			i40e_aq_str(hw, hw->aq.asq_last_status));
+		return err;
+	}
+	speed = abilities.link_speed;
 
 	/* Get the current phy config */
 	err = i40e_aq_get_phy_capabilities(hw, false, false, &abilities,
@@ -6583,9 +6601,9 @@ static i40e_status i40e_force_link_state(struct i40e_pf *pf, bool is_up)
 	}
 
 	/* If link needs to go up, but was not forced to go down,
-	 * no need for a flap
+	 * and its speed values are OK, no need for a flap
 	 */
-	if (is_up && abilities.phy_type != 0)
+	if (is_up && abilities.phy_type != 0 && abilities.link_speed != 0)
 		return I40E_SUCCESS;
 
 	/* To force link we need to set bits for all supported PHY types,
@@ -6597,7 +6615,10 @@ static i40e_status i40e_force_link_state(struct i40e_pf *pf, bool is_up)
 	config.phy_type_ext = is_up ? (u8)((mask >> 32) & 0xff) : 0;
 	/* Copy the old settings, except of phy_type */
 	config.abilities = abilities.abilities;
-	config.link_speed = abilities.link_speed;
+	if (abilities.link_speed != 0)
+		config.link_speed = abilities.link_speed;
+	else
+		config.link_speed = speed;
 	config.eee_capability = abilities.eee_capability;
 	config.eeer = abilities.eeer_val;
 	config.low_power_ctrl = abilities.d3_lpan;
-- 
2.17.1

^ permalink raw reply related

* [net-next v2 15/15] i40e: Prevent deleting MAC address from VF when set by PF
From: Jeff Kirsher @ 2018-08-30 21:11 UTC (permalink / raw)
  To: davem; +Cc: Patryk Małek, netdev, nhorman, sassmann, jogreene,
	Jeff Kirsher
In-Reply-To: <20180830211147.17008-1-jeffrey.t.kirsher@intel.com>

From: Patryk Małek <patryk.malek@intel.com>

To prevent VF from deleting MAC address that was assigned by the
PF we need to check for that scenario when we try to delete a MAC
address from a VF.

Signed-off-by: Patryk Małek <patryk.malek@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index f56c645588f3..3e707c7e6782 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -2569,6 +2569,16 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 			ret = I40E_ERR_INVALID_MAC_ADDR;
 			goto error_param;
 		}
+
+		if (vf->pf_set_mac &&
+		    ether_addr_equal(al->list[i].addr,
+				     vf->default_lan_addr.addr)) {
+			dev_err(&pf->pdev->dev,
+				"MAC addr %pM has been set by PF, cannot delete it for VF %d, reset VF to change MAC addr\n",
+				vf->default_lan_addr.addr, vf->vf_id);
+			ret = I40E_ERR_PARAM;
+			goto error_param;
+		}
 	}
 	vsi = pf->vsi[vf->lan_vsi_idx];
 
-- 
2.17.1

^ permalink raw reply related

* [PATCH net] net/ipv6: Only update MTU metric if it set
From: dsahern @ 2018-08-30 21:15 UTC (permalink / raw)
  To: netdev; +Cc: medhefgo, David Ahern

From: David Ahern <dsahern@gmail.com>

Jan reported a regression after an update to 4.18.5. In this case ipv6
default route is setup by systemd-networkd based on data from an RA. The
RA contains an MTU of 1492 which is used when the route is first inserted
but then systemd-networkd pushes down updates to the default route
without the mtu set.

Prior to the change to fib6_info, metrics such as MTU were held in the
dst_entry and rt6i_pmtu in rt6_info contained an update to the mtu if
any. ip6_mtu would look at rt6i_pmtu first and use it if set. If not,
the value from the metrics is used if it is set and finally falling
back to the idev value.

After the fib6_info change metrics are contained in the fib6_info struct
and there is no equivalent to rt6i_pmtu. To maintain consistency with
the old behavior the new code should only reset the MTU in the metrics
if the route update has it set.

Fixes: d4ead6b34b67 ("net/ipv6: move metrics from dst to rt6_info")
Reported-by: Jan Janssen <medhefgo@web.de>
Signed-off-by: David Ahern <dsahern@gmail.com>
---
 net/ipv6/ip6_fib.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index d212738e9d10..f43d278e0040 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -987,7 +987,10 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
 					fib6_clean_expires(iter);
 				else
 					fib6_set_expires(iter, rt->expires);
-				fib6_metric_set(iter, RTAX_MTU, rt->fib6_pmtu);
+
+				if (rt->fib6_pmtu)
+					fib6_metric_set(iter, RTAX_MTU,
+							rt->fib6_pmtu);
 				return -EEXIST;
 			}
 			/* If we have the same destination and the same metric,
-- 
2.15.2 (Apple Git-101.1)

^ permalink raw reply related

* Re: KMSAN: uninit-value in rds_connect
From: syzbot @ 2018-08-31  1:25 UTC (permalink / raw)
  To: davem, linux-kernel, linux-rdma, netdev, rds-devel,
	santosh.shilimkar, syzkaller-bugs
In-Reply-To: <00000000000005859b0574ab47e5@google.com>

syzbot has found a reproducer for the following crash on:

HEAD commit:    25114c64b719 kmsan: unpoison pages moved from iov_iter to ..
git tree:       https://github.com/google/kmsan.git/master
console output: https://syzkaller.appspot.com/x/log.txt?x=14465da6400000
kernel config:  https://syzkaller.appspot.com/x/.config?x=3431f03869413153
dashboard link: https://syzkaller.appspot.com/bug?extid=0049bebbf3042dbd2e8f
compiler:       clang version 8.0.0 (trunk 339414)
syz repro:      https://syzkaller.appspot.com/x/repro.syz?x=13b8513e400000
C reproducer:   https://syzkaller.appspot.com/x/repro.c?x=10d64e96400000

IMPORTANT: if you fix the bug, please add the following tag to the commit:
Reported-by: syzbot+0049bebbf3042dbd2e8f@syzkaller.appspotmail.com

random: sshd: uninitialized urandom read (32 bytes read)
random: sshd: uninitialized urandom read (32 bytes read)
random: sshd: uninitialized urandom read (32 bytes read)
random: sshd: uninitialized urandom read (32 bytes read)
==================================================================
BUG: KMSAN: uninit-value in rds_connect+0x213/0x950 net/rds/af_rds.c:511
CPU: 0 PID: 4780 Comm: syz-executor239 Not tainted 4.19.0-rc1+ #38
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS  
Google 01/01/2011
Call Trace:
  __dump_stack lib/dump_stack.c:77 [inline]
  dump_stack+0x14b/0x190 lib/dump_stack.c:113
  kmsan_report+0x183/0x2b0 mm/kmsan/kmsan.c:956
  __msan_warning+0x70/0xc0 mm/kmsan/kmsan_instr.c:645
  rds_connect+0x213/0x950 net/rds/af_rds.c:511
  __sys_connect+0x64e/0x750 net/socket.c:1662
  __do_sys_connect net/socket.c:1673 [inline]
  __se_sys_connect net/socket.c:1670 [inline]
  __x64_sys_connect+0xd8/0x120 net/socket.c:1670
  do_syscall_64+0x15b/0x220 arch/x86/entry/common.c:290
  entry_SYSCALL_64_after_hwframe+0x63/0xe7
RIP: 0033:0x440199
Code: 18 89 d0 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 48 89 f8 48 89 f7  
48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff  
ff 0f 83 fb 13 fc ff c3 66 2e 0f 1f 84 00 00 00 00
RSP: 002b:00007fff5f41c118 EFLAGS: 00000213 ORIG_RAX: 000000000000002a
RAX: ffffffffffffffda RBX: 00000000004002c8 RCX: 0000000000440199
RDX: 0000000000000000 RSI: 0000000020000080 RDI: 0000000000000003
RBP: 00000000006ca018 R08: 0000000000000000 R09: 00000000004002c8
R10: 0000000000000000 R11: 0000000000000213 R12: 0000000000401a20
R13: 0000000000401ab0 R14: 0000000000000000 R15: 0000000000000000

Local variable description: ----address@__sys_connect
Variable was created at:
  __sys_connect+0x6a/0x750 net/socket.c:1645
  __do_sys_connect net/socket.c:1673 [inline]
  __se_sys_connect net/socket.c:1670 [inline]
  __x64_sys_connect+0xd8/0x120 net/socket.c:1670
==================================================================

^ permalink raw reply

* Re: [RFC PATCH v1 0/3] device property: Support MAC address in VPD
From: Brian Norris @ 2018-08-31  1:26 UTC (permalink / raw)
  To: Rob Herring
  Cc: Brian Norris, Florian Fainelli, Greg Kroah-Hartman,
	Rafael J. Wysocki, Andrew Lunn, Dmitry Torokhov, Guenter Roeck,
	netdev, devicetree, linux-kernel, Julius Werner, Stephen Boyd,
	Srinivas Kandagatla
In-Reply-To: <20180815221601.GB24830@rob-hp-laptop>

Hi, sorry for the delay, and thanks for the response.

I'm still not sure whether I'll pursue this series further, but I'd like
to at least narrow down what it'd look like.

On Wed, Aug 15, 2018 at 04:16:01PM -0600, Rob Herring wrote:
> On Tue, Aug 14, 2018 at 06:44:36PM -0700, Brian Norris wrote:
> > On Tue, Aug 14, 2018 at 05:52:49PM -0700, Florian Fainelli wrote:
> > > On 08/14/2018 05:22 PM, Brian Norris wrote:
> > > >> Also, aliases in DT are meant to provide some stability.
> > > > 
> > > > How, specifically? I don't see any relevant binding description for
> > > > aliases under Documentation/devicetree/bindings/net/.
> > > 
> > > Indeed they are not, likewise, we should probably update devicetree-spec
> > > to come up with standard names that of_alias_get_id() already consumes.
> > 
> > A quick grep shows we already have divergence: both "eth" and "ethernet"
> > are in use.
> 
> Uggg, it would be nice to clean that up. 

Is it fair to just change them, since they were never documented? Of
course, only after documenting the "right" way.

> There's several aliases I'd like to get rid of (some platforms went a 
> little crazy with them) and I'd like to start requiring alias names to 
> be documented. I created an issue for the spec. Patches welcomeTM. :)

So e.g., new text in Documentation/devicetree/bindings/net/ethernet.txt
and Documentation/devicetree/bindings/net/wireless/ieee80211.txt about
'aliases' entries? I can do that, especially if I give up on a new
binding like in this series.

> > But anyway, would the idea be that you just put 'ethernet{0,1,...}' and
> > 'wifi{0,1,...}' aliases in the /chosen node, then require boot firmware
> > to insert any {ethernet,wifi}_mac{0,1,...} into the paths represented by
> > the corresponding aliases?
> 
> In the /aliases node, but yes.

Oops, yes.

> Seems to me that nvmem needs to be extended to allow providers to 
> retrieve and interpret data. Not everything is at some fixed offset and 
> size. Something like this is valid dts:
> 
> nvmem = <&phandle> "a-string";
> 
> But that's pretty uncommon (I can't think of a binding that actually 
> uses that). Perhaps the provider has an array of keys defined and the 
> consumer just provides the index.

In the case of VPD, all keys are 0-terminated strings (there's also a
length field, but the key is still 0-terminated), so that scheme could
work. (I'm not sure an indexed provider is extremely relevant right now,
although it probably could be supported if I expand the of_nvmem
retrieval to support a generic of_xlate() override anyway.) The
information represented is almost the same as in my proposal, except that:
(a) now I have to give the VPD a phandle -- so far, I've avoided that,
    as it's just an auto-enumerated device underneath the
    /firmware/coreboot device (see drivers/firmware/google/vpd.c)
(b) this is no longer directly useful to ACPI systems -- I'm not
    actually sure how (if at all) nvmem provider/consumer is supposed to
    work there

But maybe this isn't really that useful to ACPI, and it's sufficient to
just have fwnode_get_mac_address() call of_get_nvmem_mac_address() when
we're using DT.

> Or we could do '<key>-nvmem = <&phandle>', but parsing that is a bit 
> more complicated.

That doesn't seem to have much advantage to me.

Brian

^ permalink raw reply

* Maintainer / Kernel Summit 2018 planning kick-off
From: Theodore Y. Ts'o @ 2018-08-30 21:35 UTC (permalink / raw)
  To: linux-kernel, linux-fsdevel, linux-mm, netdev, linux-block

[ Feel free to forward this to other Linux kernel mailing lists as
  appropriate -- Ted ]

This year, the Maintainer and Kernel Summit will be in Vancouver,
B.C., November 12th -- 15th.  The Maintainer's summit will be held on
Monday, November 12th, in Vancouver, immediately before the Linux
Plumber's Conference (LPC) November 13th -- 15th.

For the past few years, before 2017, we've scheduled mostly management
and development process issues on the first day.  We then opened up
the second day of the Kernel Summit to all attendees of the conference
with which the Kernel Summit has been colocated, and called it the
"Open Technical Day".  This is something that just made sense in order
to assure that all of the necessary people needed to discuss a
particular technical issue could be in the room.

Starting last year in Prague, we took the next logical step, and split
the Kernel Summit in two.  The "Maintainer's Summit" is an
invite-only, half-day event, where the primary focus will be process
issues of Linux Kernel Development.  It will be limited to 30 invitees
and a handful of sponsored attendees.  This makes it smaller than the
first few kernel summits (which were limited to around 50 attendees).

The "Kernel Summit" is now organized as a track which is run in
parallel with the other tracks at the Linux Plumber's Conference, and
is open to all registered attendees of Plumbers.  Much as how we
organized the Kernel Summit "open technical day" in 2016 in Santa Fe,
the Kernel Summit schedule will be synchronized with the other tracks
at the Plumber's Conference, and it will be open to all registered
Plumber's attendees.

Linus has suggested the following ten people as the core of the people
he would like invited to the Maintainer's Summit, which was calculated
from statistics from his git tree.

	David Miller
	Dave Airlie
	Greg KH
	Arnd Bergmann
	Ingo Molnar
	Mauro Carvalho Chehab
	Takashi Iwai
	Thomas Gleixner
	Andrew Morton
	Olof Johansson

As we did last year, there will be a mini-program committee that will
be pick enough names to bring the total number of 30 for the
Maintainer's Summit.  That program committee will consist of Arnd
Bergmann, Thomas Gleixner, Greg KH, Paul McKenney, and Ted Ts'o.

We will use the rest of names on the list generated by Linus's script
as a starting point of people to be considered.  People who suggest
topics that should be discussed on the Maintainer's summit will also
be added to the list.  To make topic suggestions for the Maintainer's
Summit, please send e-mail to the ksummit-discuss list with a subject
prefix of [MAINTAINERS SUMMIT].


The other job of the program committee will be to organize the program
for the Kernel Summit.  The goal of the Kernel Summit track will be to
provide a forum to discuss specific technical issues that would be
easier to resolve in person than over e-mail.  The program committee
will also consider "information sharing" topics if they are clearly of
interest to the wider development community (i.e., advanced training
in topics that would be useful to kernel developers).

To suggest a topic for the Kernel Summit, please tag your e-mail with
[TECH TOPIC].  As before, please use a separate e-mail for each topic,
and send the topic suggestions to:

	ksummit-discuss@lists.linuxfoundation.org

People who submit topic suggestions before September 21st and which
are accepted, will be given a free admission to the Linux Plumbers
Conference.

We will reserving roughly half the Kernel Summit slots for last-minute
discussions that will be scheduled during the week of Plumber's, in an
"unconference style".  This was extremely popular in Santa Fe and in
Prague, since it allowed ideas that came up in hallway discussions,
and in Plumber's Miniconference, to be given scheduled, dedicated
times for that discussion.


If you were not subscribed on to the kernel-discuss mailing list from
last year (or if you had removed yourself after the kernel summit),
you can subscribe to the discuss list using mailman:

   https://lists.linuxfoundation.org/mailman/listinfo/ksummit-discuss

^ permalink raw reply

* Re: [PATCH net] net/ipv6: Only update MTU metric if it set
From: Jan Janssen @ 2018-08-30 21:49 UTC (permalink / raw)
  To: dsahern; +Cc: netdev, David Ahern
In-Reply-To: <20180830211543.27111-1-dsahern@kernel.org>

On Donnerstag, 30. August 2018 23:15:43 CEST dsahern@kernel.org wrote:
> From: David Ahern <dsahern@gmail.com>
> 
> Jan reported a regression after an update to 4.18.5. In this case ipv6
> default route is setup by systemd-networkd based on data from an RA. The
> RA contains an MTU of 1492 which is used when the route is first inserted
> but then systemd-networkd pushes down updates to the default route
> without the mtu set.
> 
> Prior to the change to fib6_info, metrics such as MTU were held in the
> dst_entry and rt6i_pmtu in rt6_info contained an update to the mtu if
> any. ip6_mtu would look at rt6i_pmtu first and use it if set. If not,
> the value from the metrics is used if it is set and finally falling
> back to the idev value.
> 
> After the fib6_info change metrics are contained in the fib6_info struct
> and there is no equivalent to rt6i_pmtu. To maintain consistency with
> the old behavior the new code should only reset the MTU in the metrics
> if the route update has it set.
> 
> Fixes: d4ead6b34b67 ("net/ipv6: move metrics from dst to rt6_info")
> Reported-by: Jan Janssen <medhefgo@web.de>
> Signed-off-by: David Ahern <dsahern@gmail.com>
> ---
>  net/ipv6/ip6_fib.c | 5 ++++-
>  1 file changed, 4 insertions(+), 1 deletion(-)
> 
> diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
> index d212738e9d10..f43d278e0040 100644
> --- a/net/ipv6/ip6_fib.c
> +++ b/net/ipv6/ip6_fib.c
> @@ -987,7 +987,10 @@ static int fib6_add_rt2node(struct fib6_node *fn,
> struct fib6_info *rt, fib6_clean_expires(iter);
>  				else
>  					fib6_set_expires(iter, rt->expires);
> -				fib6_metric_set(iter, RTAX_MTU, rt->fib6_pmtu);
> +
> +				if (rt->fib6_pmtu)
> +					fib6_metric_set(iter, RTAX_MTU,
> +							rt->fib6_pmtu);
>  				return -EEXIST;
>  			}
>  			/* If we have the same destination and the same metric,

I just tested this and can confirm that it fixes my problem.

You mentioned that systemd-networkd is doing something silly here, so I was 
wondering if you could make a bug report to fix the underlying issue? I don't 
mind doing it myself but you're the expert and can explain things better.

Thank you for fixing this.

^ permalink raw reply

* Re: [PATCH 1/2] dt-bindings: net: cpsw: Document cpsw-phy-sel usage but prefer phandle
From: Tony Lindgren @ 2018-08-30 21:55 UTC (permalink / raw)
  To: Grygorii Strashko
  Cc: David Miller, netdev, linux-omap, devicetree, Andrew Lunn,
	Ivan Khoronzhuk, Mark Rutland, Murali Karicheri, Rob Herring
In-Reply-To: <69161960-259c-6911-67b5-546ccb433165@ti.com>

* Grygorii Strashko <grygorii.strashko@ti.com> [180830 17:08]:
> On 08/29/2018 07:47 PM, Tony Lindgren wrote:
> > In general, it seems cpsw is just an interconnect instance
> > (L4_FAST) with a control module (CPSW_WR) and a pile of
> > independent other modules. That's described nicely in
> > am437x TRM chapter "2.1.4 L4 Fast Peripheral Memory Map".
> > So from that point of view the binding reg entries right
> > now are all wrong :)
> 
> TRM not consistent - for am5 it's one MMIO region.

Well that same information is there in 57xx TRM in chapter
"Table 26-1454. GMAC_SW Instance Summary". But yeah, all
the cpsw internal devices are stuffed into a single
interconnect target module.

> > In the long run cpsw should be really treated as an
> > interconnect instance with it's control module providing
> > standard Linux framework services such as clock /
> > regulator / phy / pinctrl / iio whatever for the other
> > modules.
> > 
> > Just my 2c based on looking at the interconnect, I'm
> > not too familiar with cpsw otherwise.
> 
> It's not separate modules. this is composite module which have only one
> fck/ick and most of blocks can't even function without each other.
> Above might be the case for Keystone 2, but not omap CPSW.
> Keystone 2 - has packet processor, security accelerator, queue manager in
> addition to its basic switch block.

Yeah there's just one fck/ick as it's all in a single
interconnect module. But you might want to look at the
CPSW_WR device registers and see what gate clocks and other
Linux generic subsystem services CPSW_WR could provide for
the other cpsw internal devices. It might just make your
life easier maintaining all these variants ;)

Regards,

Tony

^ permalink raw reply

* Re: [PATCH net] net/ipv6: Only update MTU metric if it set
From: David Ahern @ 2018-08-30 22:13 UTC (permalink / raw)
  To: Jan Janssen; +Cc: netdev
In-Reply-To: <11723218.PRMC7IWbep@minako>

On 8/30/18 3:49 PM, Jan Janssen wrote:
> You mentioned that systemd-networkd is doing something silly here, so I was 
> wondering if you could make a bug report to fix the underlying issue? I don't 
> mind doing it myself but you're the expert and can explain things better.

It is silly in the sense that systemd-networkd installs a default route
with an MTU and then pushes down updates (route replace) without the MTU.

^ permalink raw reply

* [bpf PATCH] bpf: avoid kcm psock use and tcp_bpf from colliding
From: John Fastabend @ 2018-08-30 22:33 UTC (permalink / raw)
  To: ast, daniel; +Cc: netdev

Currently we check sk_user_data is non NULL to determine if the sk
exists in a map. However, this is not sufficient to ensure the psock
is not in use by another (non-ULP TCP) user, such as kcm. To avoid
this when adding a sock to a map also verify it is of the correct ULP
type.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
---
 0 files changed

diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index ce63e58..1c05794 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -1808,6 +1808,11 @@ static int sock_map_delete_elem(struct bpf_map *map, void *key)
 	return 0;
 }
 
+static bool psock_is_smap_sk(struct sock *sk)
+{
+	return inet_csk(sk)->icsk_ulp_ops == &bpf_tcp_ulp_ops;
+}
+
 /* Locking notes: Concurrent updates, deletes, and lookups are allowed and are
  * done inside rcu critical sections. This ensures on updates that the psock
  * will not be released via smap_release_sock() until concurrent updates/deletes
@@ -1892,6 +1897,10 @@ static int __sock_map_ctx_update_elem(struct bpf_map *map,
 	 * doesn't update user data.
 	 */
 	if (psock) {
+		if (!psock_is_smap_sk(sock)) {
+			err = -EBUSY;
+			goto out_progs;
+		}
 		if (READ_ONCE(psock->bpf_parse) && parse) {
 			err = -EBUSY;
 			goto out_progs;

^ permalink raw reply related

* [RFC PATCH] bpf: test case for kcm + sockmap
From: John Fastabend @ 2018-08-30 22:39 UTC (permalink / raw)
  To: ast, daniel; +Cc: netdev

This finds the issue with kcm and sockmap where when a kcm socket is
added to a sockmap we fail to notice it is a kcm sock and not a tcp
bpf ULP sock. This results in a refcount_inc_not_zero() being called
on the psock which is assigned to a smap_psock incorrectly. On my
system it just happens to fail there and returns EAGAIN.

With the fix, just submitted for bpf tree, we get the correct EBUSY
error.

https://patchwork.ozlabs.org/project/netdev/list/?series=63393

RFC for now and will submit properly when bpf fix makes its way into
bpf-next. But, wanted to get the test on the list for folks to look
at.

---

John Fastabend (1):
      bpf: test_maps add a test to catch kcm + sockmap


 tools/testing/selftests/bpf/Makefile    |    2 -
 tools/testing/selftests/bpf/test_maps.c |   64 ++++++++++++++++++++++++++++++-
 2 files changed, 63 insertions(+), 3 deletions(-)

^ permalink raw reply

* [RFC PATCH] bpf: test_maps add a test to catch kcm + sockmap
From: John Fastabend @ 2018-08-30 22:39 UTC (permalink / raw)
  To: ast, daniel; +Cc: netdev
In-Reply-To: <20180830223555.11553.24093.stgit@john-Precision-Tower-5810>

RFC for now, once the fix is in bpf-next will submit there.

Adding a socket to both sockmap and kcm is not supported due to
collision on sk_user_data usage. At some point we will fix this
but until then lets ensure we don't corrupt things.

Now if selftests is run without KCM support we will issue a
warning and continue with the tests.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
---
 tools/testing/selftests/bpf/Makefile    |    2 -
 tools/testing/selftests/bpf/test_maps.c |   64 ++++++++++++++++++++++++++++++-
 2 files changed, 63 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index fff7fb1..16f5dcc 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -35,7 +35,7 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test
 	test_get_stack_rawtp.o test_sockmap_kern.o test_sockhash_kern.o \
 	test_lwt_seg6local.o sendmsg4_prog.o sendmsg6_prog.o test_lirc_mode2_kern.o \
 	get_cgroup_id_kern.o socket_cookie_prog.o test_select_reuseport_kern.o \
-	test_skb_cgroup_id_kern.o
+	test_skb_cgroup_id_kern.o sockmap_kcm.o
 
 # Order correspond to 'make run_tests' order
 TEST_PROGS := test_kmod.sh \
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 6f54f84..473b690 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -20,6 +20,7 @@
 #include <sys/socket.h>
 #include <netinet/in.h>
 #include <linux/bpf.h>
+#include <linux/kcm.h>
 
 #include <bpf/bpf.h>
 #include <bpf/libbpf.h>
@@ -479,14 +480,16 @@ static void test_devmap(int task, void *data)
 #define SOCKMAP_PARSE_PROG "./sockmap_parse_prog.o"
 #define SOCKMAP_VERDICT_PROG "./sockmap_verdict_prog.o"
 #define SOCKMAP_TCP_MSG_PROG "./sockmap_tcp_msg_prog.o"
+#define KCM_PROG "./sockmap_kcm.o"
 static void test_sockmap(int tasks, void *data)
 {
 	struct bpf_map *bpf_map_rx, *bpf_map_tx, *bpf_map_msg, *bpf_map_break;
-	int map_fd_msg = 0, map_fd_rx = 0, map_fd_tx = 0, map_fd_break;
+	int map_fd_msg = 0, map_fd_rx = 0, map_fd_tx = 0, map_fd_break, kcm;
 	int ports[] = {50200, 50201, 50202, 50204};
 	int err, i, fd, udp, sfd[6] = {0xdeadbeef};
 	u8 buf[20] = {0x0, 0x5, 0x3, 0x2, 0x1, 0x0};
-	int parse_prog, verdict_prog, msg_prog;
+	int parse_prog, verdict_prog, msg_prog, kcm_prog;
+	struct kcm_attach attach_info;
 	struct sockaddr_in addr;
 	int one = 1, s, sc, rc;
 	struct bpf_object *obj;
@@ -740,6 +743,62 @@ static void test_sockmap(int tasks, void *data)
 		goto out_sockmap;
 	}
 
+	/* Test adding a KCM socket into map */
+#define AF_KCM 41
+	kcm = socket(AF_KCM, SOCK_DGRAM, KCMPROTO_CONNECTED);
+	if (kcm == -1) {
+		printf("Warning, KCM+Sockmap could not be tested.\n");
+		goto skip_kcm;
+	}
+
+	err = bpf_prog_load(KCM_PROG,
+			    BPF_PROG_TYPE_SOCKET_FILTER,
+			    &obj, &kcm_prog);
+	if (err) {
+		printf("Failed to load SK_SKB parse prog\n");
+		goto out_sockmap;
+	}
+
+	i = 2;
+	memset(&attach_info, 0, sizeof(attach_info));
+	attach_info.fd = sfd[i];
+	attach_info.bpf_fd = kcm_prog;
+	err = ioctl(kcm, SIOCKCMATTACH, &attach_info);
+	if (!err) {
+		perror("Failed KCM attached to sockmap fd: ");
+		goto out_sockmap;
+	}
+
+	err = bpf_map_delete_elem(fd, &i);
+	if (err) {
+		printf("Failed delete sockmap from empty map %i %i\n", err, errno);
+		goto out_sockmap;
+	}
+
+	err = ioctl(kcm, SIOCKCMATTACH, &attach_info);
+	if (err) {
+		perror("Failed KCM attach");
+		goto out_sockmap;
+	}
+
+	err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_ANY);
+	if (!err) {
+		printf("Failed sockmap attached KCM sock!\n");
+		goto out_sockmap;
+	}
+	err = ioctl(kcm, SIOCKCMUNATTACH, &attach_info);
+	if (err) {
+		printf("Failed detattch KCM sock!\n");
+		goto out_sockmap;
+	}
+
+	err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_ANY);
+	if (err) {
+		printf("Failed post-kcm update sockmap '%i:%i'\n",
+		       i, sfd[i]);
+		goto out_sockmap;
+	}
+
 	/* Test map update elem afterwards fd lives in fd and map_fd */
 	for (i = 0; i < 6; i++) {
 		err = bpf_map_update_elem(map_fd_rx, &i, &sfd[i], BPF_ANY);
@@ -772,6 +831,7 @@ static void test_sockmap(int tasks, void *data)
 		}
 	}
 
+skip_kcm:
 	/* Put sfd[2] (sending fd below) into msg map to test sendmsg bpf */
 	i = 0;
 	err = bpf_map_update_elem(map_fd_msg, &i, &sfd[2], BPF_ANY);

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox