Netdev List
 help / color / mirror / Atom feed
* [net-next 14/40] ixgbe: configure minimal packet buffers to support TC
From: Jeff Kirsher @ 2011-06-07 12:32 UTC (permalink / raw)
  To: davem; +Cc: John Fastabend, netdev, gospo, Jeff Kirsher
In-Reply-To: <1307449995-9458-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: John Fastabend <john.r.fastabend@intel.com>

ixgbe devices support different numbers of packet buffers either
8 or 4. Here we only allocate the minimal number of packet
buffers required to implement the net_devices number of traffic
classes.

Fewer traffic classes allows for larger packet buffers in
hardware. Also more Tx/Rx queues can be given to each
traffic class.

This patch is mostly about propagating the number of traffic
classes through the init path. Specifically this adds the 4TC
cases to the MRQC and MTQC setup routines. Also ixgbe_setup_tc()
was sanitized to handle other traffic class value.

Finally changing the number of packet buffers in the hardware
requires the device to reinit. So this moves the reinit work
from DCB into the main ixgbe_setup_tc() routine to consolidate
the reset code. Now dcbnl_xxx ops call ixgbe_setup_tc() to
configure packet buffers if needed.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Tested-by: Ross Brattain <ross.b.brattain@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ixgbe/ixgbe_dcb_nl.c |   16 +---
 drivers/net/ixgbe/ixgbe_main.c   |  235 ++++++++++++++++++++++----------------
 drivers/net/ixgbe/ixgbe_type.h   |    1 +
 3 files changed, 138 insertions(+), 114 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_dcb_nl.c b/drivers/net/ixgbe/ixgbe_dcb_nl.c
index 293ff06..b229feb 100644
--- a/drivers/net/ixgbe/ixgbe_dcb_nl.c
+++ b/drivers/net/ixgbe/ixgbe_dcb_nl.c
@@ -125,9 +125,7 @@ static u8 ixgbe_dcbnl_set_state(struct net_device *netdev, u8 state)
 			goto out;
 		}
 
-		if (netif_running(netdev))
-			netdev->netdev_ops->ndo_stop(netdev);
-		ixgbe_clear_interrupt_scheme(adapter);
+		adapter->flags |= IXGBE_FLAG_DCB_ENABLED;
 
 		switch (adapter->hw.mac.type) {
 		case ixgbe_mac_82598EB:
@@ -143,18 +141,12 @@ static u8 ixgbe_dcbnl_set_state(struct net_device *netdev, u8 state)
 			break;
 		}
 
-		adapter->flags |= IXGBE_FLAG_DCB_ENABLED;
-		if (!netdev_get_num_tc(netdev))
-			ixgbe_setup_tc(netdev, MAX_TRAFFIC_CLASS);
+		ixgbe_setup_tc(netdev, MAX_TRAFFIC_CLASS);
 	} else {
 		/* Turn off DCB */
 		if (!(adapter->flags & IXGBE_FLAG_DCB_ENABLED))
 			goto out;
 
-		if (netif_running(netdev))
-			netdev->netdev_ops->ndo_stop(netdev);
-		ixgbe_clear_interrupt_scheme(adapter);
-
 		adapter->hw.fc.requested_mode = adapter->last_lfc_mode;
 		adapter->temp_dcb_cfg.pfc_mode_enable = false;
 		adapter->dcb_cfg.pfc_mode_enable = false;
@@ -167,13 +159,9 @@ static u8 ixgbe_dcbnl_set_state(struct net_device *netdev, u8 state)
 		default:
 			break;
 		}
-
 		ixgbe_setup_tc(netdev, 0);
 	}
 
-	ixgbe_init_interrupt_scheme(adapter);
-	if (netif_running(netdev))
-		netdev->netdev_ops->ndo_open(netdev);
 out:
 	return err;
 }
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index 66fc94a..f6d8105 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -2814,8 +2814,8 @@ static void ixgbe_setup_mtqc(struct ixgbe_adapter *adapter)
 {
 	struct ixgbe_hw *hw = &adapter->hw;
 	u32 rttdcs;
-	u32 mask;
 	u32 reg;
+	u8 tcs = netdev_get_num_tc(adapter->netdev);
 
 	if (hw->mac.type == ixgbe_mac_82598EB)
 		return;
@@ -2826,28 +2826,27 @@ static void ixgbe_setup_mtqc(struct ixgbe_adapter *adapter)
 	IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
 
 	/* set transmit pool layout */
-	mask = (IXGBE_FLAG_SRIOV_ENABLED | IXGBE_FLAG_DCB_ENABLED);
-	switch (adapter->flags & mask) {
-
+	switch (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) {
 	case (IXGBE_FLAG_SRIOV_ENABLED):
 		IXGBE_WRITE_REG(hw, IXGBE_MTQC,
 				(IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF));
 		break;
+	default:
+		if (!tcs)
+			reg = IXGBE_MTQC_64Q_1PB;
+		else if (tcs <= 4)
+			reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
+		else
+			reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
 
-	case (IXGBE_FLAG_DCB_ENABLED):
-		/* We enable 8 traffic classes, DCB only */
-		IXGBE_WRITE_REG(hw, IXGBE_MTQC,
-			      (IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ));
-
-		/* Enable Security TX Buffer IFG for DCB */
-		reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
-		reg |= IXGBE_SECTX_DCB;
-		IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
-
-		break;
+		IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
 
-	default:
-		IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB);
+		/* Enable Security TX Buffer IFG for multiple pb */
+		if (tcs) {
+			reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
+			reg |= IXGBE_SECTX_DCB;
+			IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
+		}
 		break;
 	}
 
@@ -2938,7 +2937,7 @@ static void ixgbe_setup_mrqc(struct ixgbe_adapter *adapter)
 	u32 mrqc = 0, reta = 0;
 	u32 rxcsum;
 	int i, j;
-	int mask;
+	u8 tcs = netdev_get_num_tc(adapter->netdev);
 
 	/* Fill out hash function seeds */
 	for (i = 0; i < 10; i++)
@@ -2960,33 +2959,28 @@ static void ixgbe_setup_mrqc(struct ixgbe_adapter *adapter)
 	rxcsum |= IXGBE_RXCSUM_PCSD;
 	IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
 
-	if (adapter->hw.mac.type == ixgbe_mac_82598EB)
-		mask = adapter->flags & IXGBE_FLAG_RSS_ENABLED;
-	else
-		mask = adapter->flags & (IXGBE_FLAG_RSS_ENABLED
-#ifdef CONFIG_IXGBE_DCB
-					 | IXGBE_FLAG_DCB_ENABLED
-#endif
-					 | IXGBE_FLAG_SRIOV_ENABLED
-					);
-
-	switch (mask) {
-#ifdef CONFIG_IXGBE_DCB
-	case (IXGBE_FLAG_DCB_ENABLED | IXGBE_FLAG_RSS_ENABLED):
-		mrqc = IXGBE_MRQC_RTRSS8TCEN;
-		break;
-	case (IXGBE_FLAG_DCB_ENABLED):
-		mrqc = IXGBE_MRQC_RT8TCEN;
-		break;
-#endif /* CONFIG_IXGBE_DCB */
-	case (IXGBE_FLAG_RSS_ENABLED):
+	if (adapter->hw.mac.type == ixgbe_mac_82598EB &&
+	    (adapter->flags & IXGBE_FLAG_RSS_ENABLED)) {
 		mrqc = IXGBE_MRQC_RSSEN;
-		break;
-	case (IXGBE_FLAG_SRIOV_ENABLED):
-		mrqc = IXGBE_MRQC_VMDQEN;
-		break;
-	default:
-		break;
+	} else {
+		int mask = adapter->flags & (IXGBE_FLAG_RSS_ENABLED
+					     | IXGBE_FLAG_SRIOV_ENABLED);
+
+		switch (mask) {
+		case (IXGBE_FLAG_RSS_ENABLED):
+			if (!tcs)
+				mrqc = IXGBE_MRQC_RSSEN;
+			else if (tcs <= 4)
+				mrqc = IXGBE_MRQC_RTRSS4TCEN;
+			else
+				mrqc = IXGBE_MRQC_RTRSS8TCEN;
+			break;
+		case (IXGBE_FLAG_SRIOV_ENABLED):
+			mrqc = IXGBE_MRQC_VMDQEN;
+			break;
+		default:
+			break;
+		}
 	}
 
 	/* Perform hash on these packet types */
@@ -4460,14 +4454,17 @@ static inline bool ixgbe_set_dcb_queues(struct ixgbe_adapter *adapter)
 {
 	bool ret = false;
 	struct ixgbe_ring_feature *f = &adapter->ring_feature[RING_F_DCB];
-	int i, q;
+	int tcs = netdev_get_num_tc(adapter->netdev);
+	int max_q, i, q;
 
-	if (!(adapter->flags & IXGBE_FLAG_DCB_ENABLED))
+	if (!(adapter->flags & IXGBE_FLAG_DCB_ENABLED) || !tcs)
 		return ret;
 
+	max_q = adapter->netdev->num_tx_queues / tcs;
+
 	f->indices = 0;
-	for (i = 0; i < MAX_TRAFFIC_CLASS; i++) {
-		q = min((int)num_online_cpus(), MAX_TRAFFIC_CLASS);
+	for (i = 0; i < tcs; i++) {
+		q = min((int)num_online_cpus(), max_q);
 		f->indices += q;
 	}
 
@@ -4679,55 +4676,6 @@ static void ixgbe_get_first_reg_idx(struct ixgbe_adapter *adapter, u8 tc,
 	}
 }
 
-#define IXGBE_MAX_Q_PER_TC	(IXGBE_MAX_DCB_INDICES / MAX_TRAFFIC_CLASS)
-
-/* ixgbe_setup_tc - routine to configure net_device for multiple traffic
- * classes.
- *
- * @netdev: net device to configure
- * @tc: number of traffic classes to enable
- */
-int ixgbe_setup_tc(struct net_device *dev, u8 tc)
-{
-	int i;
-	unsigned int q, offset = 0;
-
-	if (!tc) {
-		netdev_reset_tc(dev);
-	} else {
-		struct ixgbe_adapter *adapter = netdev_priv(dev);
-
-		/* Hardware supports up to 8 traffic classes */
-		if (tc > MAX_TRAFFIC_CLASS || netdev_set_num_tc(dev, tc))
-			return -EINVAL;
-
-		/* Partition Tx queues evenly amongst traffic classes */
-		for (i = 0; i < tc; i++) {
-			q = min((int)num_online_cpus(), IXGBE_MAX_Q_PER_TC);
-			netdev_set_prio_tc_map(dev, i, i);
-			netdev_set_tc_queue(dev, i, q, offset);
-			offset += q;
-		}
-
-		/* This enables multiple traffic class support in the hardware
-		 * which defaults to strict priority transmission by default.
-		 * If traffic classes are already enabled perhaps through DCB
-		 * code path then existing configuration will be used.
-		 */
-		if (!(adapter->flags & IXGBE_FLAG_DCB_ENABLED) &&
-		    dev->dcbnl_ops && dev->dcbnl_ops->setdcbx) {
-			struct ieee_ets ets = {
-					.prio_tc = {0, 1, 2, 3, 4, 5, 6, 7},
-					      };
-			u8 mode = DCB_CAP_DCBX_HOST | DCB_CAP_DCBX_VER_IEEE;
-
-			dev->dcbnl_ops->setdcbx(dev, mode);
-			dev->dcbnl_ops->ieee_setets(dev, &ets);
-		}
-	}
-	return 0;
-}
-
 /**
  * ixgbe_cache_ring_dcb - Descriptor ring to register mapping for DCB
  * @adapter: board private structure to initialize
@@ -4741,7 +4689,7 @@ static inline bool ixgbe_cache_ring_dcb(struct ixgbe_adapter *adapter)
 	int i, j, k;
 	u8 num_tcs = netdev_get_num_tc(dev);
 
-	if (!(adapter->flags & IXGBE_FLAG_DCB_ENABLED))
+	if (!num_tcs)
 		return false;
 
 	for (i = 0, k = 0; i < num_tcs; i++) {
@@ -7219,6 +7167,95 @@ static struct rtnl_link_stats64 *ixgbe_get_stats64(struct net_device *netdev,
 	return stats;
 }
 
+/* ixgbe_validate_rtr - verify 802.1Qp to Rx packet buffer mapping is valid.
+ * #adapter: pointer to ixgbe_adapter
+ * @tc: number of traffic classes currently enabled
+ *
+ * Configure a valid 802.1Qp to Rx packet buffer mapping ie confirm
+ * 802.1Q priority maps to a packet buffer that exists.
+ */
+static void ixgbe_validate_rtr(struct ixgbe_adapter *adapter, u8 tc)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 reg, rsave;
+	int i;
+
+	/* 82598 have a static priority to TC mapping that can not
+	 * be changed so no validation is needed.
+	 */
+	if (hw->mac.type == ixgbe_mac_82598EB)
+		return;
+
+	reg = IXGBE_READ_REG(hw, IXGBE_RTRUP2TC);
+	rsave = reg;
+
+	for (i = 0; i < MAX_TRAFFIC_CLASS; i++) {
+		u8 up2tc = reg >> (i * IXGBE_RTRUP2TC_UP_SHIFT);
+
+		/* If up2tc is out of bounds default to zero */
+		if (up2tc > tc)
+			reg &= ~(0x7 << IXGBE_RTRUP2TC_UP_SHIFT);
+	}
+
+	if (reg != rsave)
+		IXGBE_WRITE_REG(hw, IXGBE_RTRUP2TC, reg);
+
+	return;
+}
+
+
+/* ixgbe_setup_tc - routine to configure net_device for multiple traffic
+ * classes.
+ *
+ * @netdev: net device to configure
+ * @tc: number of traffic classes to enable
+ */
+int ixgbe_setup_tc(struct net_device *dev, u8 tc)
+{
+	unsigned int q, i, offset = 0;
+	struct ixgbe_adapter *adapter = netdev_priv(dev);
+	struct ixgbe_hw *hw = &adapter->hw;
+	int max_q = adapter->netdev->num_tx_queues / tc;
+
+	/* If DCB is anabled do not remove traffic classes, multiple
+	 * traffic classes are required to implement DCB
+	 */
+	if (!tc && (adapter->flags & IXGBE_FLAG_DCB_ENABLED))
+		return 0;
+
+	/* Hardware supports up to 8 traffic classes */
+	if (tc > MAX_TRAFFIC_CLASS ||
+	    (hw->mac.type == ixgbe_mac_82598EB && tc < MAX_TRAFFIC_CLASS))
+		return -EINVAL;
+
+	/* Hardware has to reinitialize queues and interrupts to
+	 * match packet buffer alignment. Unfortunantly, the
+	 * hardware is not flexible enough to do this dynamically.
+	 */
+	if (netif_running(dev))
+		ixgbe_close(dev);
+	ixgbe_clear_interrupt_scheme(adapter);
+
+	if (tc)
+		netdev_set_num_tc(dev, tc);
+	else
+		netdev_reset_tc(dev);
+
+	/* Partition Tx queues evenly amongst traffic classes */
+	for (i = 0; i < tc; i++) {
+		q = min((int)num_online_cpus(), max_q);
+		netdev_set_prio_tc_map(dev, i, i);
+		netdev_set_tc_queue(dev, i, q, offset);
+		offset += q;
+	}
+
+	ixgbe_init_interrupt_scheme(adapter);
+	ixgbe_validate_rtr(adapter, tc);
+	if (netif_running(dev))
+		ixgbe_open(dev);
+
+	return 0;
+}
 
 static const struct net_device_ops ixgbe_netdev_ops = {
 	.ndo_open		= ixgbe_open,
@@ -7239,9 +7276,7 @@ static const struct net_device_ops ixgbe_netdev_ops = {
 	.ndo_set_vf_tx_rate	= ixgbe_ndo_set_vf_bw,
 	.ndo_get_vf_config	= ixgbe_ndo_get_vf_config,
 	.ndo_get_stats64	= ixgbe_get_stats64,
-#ifdef CONFIG_IXGBE_DCB
 	.ndo_setup_tc		= ixgbe_setup_tc,
-#endif
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller	= ixgbe_netpoll,
 #endif
diff --git a/drivers/net/ixgbe/ixgbe_type.h b/drivers/net/ixgbe/ixgbe_type.h
index c0849a6..5455064 100644
--- a/drivers/net/ixgbe/ixgbe_type.h
+++ b/drivers/net/ixgbe/ixgbe_type.h
@@ -1881,6 +1881,7 @@ enum {
 #define IXGBE_MTQC_32VF         0x8 /* 4 TX Queues per pool w/32VF's */
 #define IXGBE_MTQC_64VF         0x4 /* 2 TX Queues per pool w/64VF's */
 #define IXGBE_MTQC_8TC_8TQ      0xC /* 8 TC if RT_ENA or 8 TQ if VT_ENA */
+#define IXGBE_MTQC_4TC_4TQ	0x8 /* 4 TC if RT_ENA or 4 TQ if VT_ENA */
 
 /* Receive Descriptor bit definitions */
 #define IXGBE_RXD_STAT_DD       0x01    /* Descriptor Done */
-- 
1.7.5.2


^ permalink raw reply related

* [net-next 13/40] ixgbe: consolidate MRQC and MTQC handling
From: Jeff Kirsher @ 2011-06-07 12:32 UTC (permalink / raw)
  To: davem; +Cc: John Fastabend, netdev, gospo, Jeff Kirsher
In-Reply-To: <1307449995-9458-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: John Fastabend <john.r.fastabend@intel.com>

The MRQC and MTQC registers are configured in the main
setup path but are also reconfigured in the DCB setup
path. The DCB path fixes the DCB configuration by configuring
the SECTXMINIFG gap which is required for DCB pause
to operate correctly.

This patch reduces the duplicate code and does all setup
in ixgbe_setup_mtqc() and ixgbe_setup_mrqc().

Additionally, this removes the IXGBE_QDE. This write never
set the WRITE bit in the register so the write was not
actually doing anything. Also this was to clear the register
but, it is never set and defaults to zero. If this is
needed for SRIOV it should be added correctly in a follow
up patch. But it's never been working so removing it here
should be OK.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Tested-by: Ross Brattain <ross.b.brattain@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ixgbe/ixgbe_dcb_82599.c |   57 -----------------------------------
 drivers/net/ixgbe/ixgbe_main.c      |    7 ++++
 2 files changed, 7 insertions(+), 57 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_dcb_82599.c b/drivers/net/ixgbe/ixgbe_dcb_82599.c
index befe8ad..ade9820 100644
--- a/drivers/net/ixgbe/ixgbe_dcb_82599.c
+++ b/drivers/net/ixgbe/ixgbe_dcb_82599.c
@@ -319,62 +319,6 @@ static s32 ixgbe_dcb_config_tc_stats_82599(struct ixgbe_hw *hw)
 }
 
 /**
- * ixgbe_dcb_config_82599 - Configure general DCB parameters
- * @hw: pointer to hardware structure
- *
- * Configure general DCB parameters.
- */
-static s32 ixgbe_dcb_config_82599(struct ixgbe_hw *hw)
-{
-	u32 reg;
-	u32 q;
-
-	/* Disable the Tx desc arbiter so that MTQC can be changed */
-	reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
-	reg |= IXGBE_RTTDCS_ARBDIS;
-	IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
-
-	/* Enable DCB for Rx with 8 TCs */
-	reg = IXGBE_READ_REG(hw, IXGBE_MRQC);
-	switch (reg & IXGBE_MRQC_MRQE_MASK) {
-	case 0:
-	case IXGBE_MRQC_RT4TCEN:
-		/* RSS disabled cases */
-		reg = (reg & ~IXGBE_MRQC_MRQE_MASK) | IXGBE_MRQC_RT8TCEN;
-		break;
-	case IXGBE_MRQC_RSSEN:
-	case IXGBE_MRQC_RTRSS4TCEN:
-		/* RSS enabled cases */
-		reg = (reg & ~IXGBE_MRQC_MRQE_MASK) | IXGBE_MRQC_RTRSS8TCEN;
-		break;
-	default:
-		/* Unsupported value, assume stale data, overwrite no RSS */
-		reg = (reg & ~IXGBE_MRQC_MRQE_MASK) | IXGBE_MRQC_RT8TCEN;
-	}
-	IXGBE_WRITE_REG(hw, IXGBE_MRQC, reg);
-
-	/* Enable DCB for Tx with 8 TCs */
-	reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
-	IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
-
-	/* Disable drop for all queues */
-	for (q = 0; q < 128; q++)
-		IXGBE_WRITE_REG(hw, IXGBE_QDE, q << IXGBE_QDE_IDX_SHIFT);
-
-	/* Enable the Tx desc arbiter */
-	reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
-	reg &= ~IXGBE_RTTDCS_ARBDIS;
-	IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
-
-	/* Enable Security TX Buffer IFG for DCB */
-	reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
-	reg |= IXGBE_SECTX_DCB;
-	IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
-
-	return 0;
-}
-
-/**
  * ixgbe_dcb_hw_config_82599 - Configure and enable DCB
  * @hw: pointer to hardware structure
  * @refill: refill credits index by traffic class
@@ -388,7 +332,6 @@ static s32 ixgbe_dcb_config_82599(struct ixgbe_hw *hw)
 s32 ixgbe_dcb_hw_config_82599(struct ixgbe_hw *hw, u8 pfc_en, u16 *refill,
 			      u16 *max, u8 *bwg_id, u8 *prio_type, u8 *prio_tc)
 {
-	ixgbe_dcb_config_82599(hw);
 	ixgbe_dcb_config_rx_arbiter_82599(hw, refill, max, bwg_id,
 					  prio_type, prio_tc);
 	ixgbe_dcb_config_tx_desc_arbiter_82599(hw, refill, max,
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index ff813eb..66fc94a 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -2815,6 +2815,7 @@ static void ixgbe_setup_mtqc(struct ixgbe_adapter *adapter)
 	struct ixgbe_hw *hw = &adapter->hw;
 	u32 rttdcs;
 	u32 mask;
+	u32 reg;
 
 	if (hw->mac.type == ixgbe_mac_82598EB)
 		return;
@@ -2837,6 +2838,12 @@ static void ixgbe_setup_mtqc(struct ixgbe_adapter *adapter)
 		/* We enable 8 traffic classes, DCB only */
 		IXGBE_WRITE_REG(hw, IXGBE_MTQC,
 			      (IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ));
+
+		/* Enable Security TX Buffer IFG for DCB */
+		reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
+		reg |= IXGBE_SECTX_DCB;
+		IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
+
 		break;
 
 	default:
-- 
1.7.5.2


^ permalink raw reply related

* [net-next 15/40] ixgbe: DCB use existing TX and RX queues
From: Jeff Kirsher @ 2011-06-07 12:32 UTC (permalink / raw)
  To: davem; +Cc: John Fastabend, netdev, gospo, Jeff Kirsher
In-Reply-To: <1307449995-9458-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: John Fastabend <john.r.fastabend@intel.com>

The number of TX and RX queues allocated depends on the device
type, the current features set, online CPUs, and various
compile flags.

To enable DCB with multiple queues and allow it to coexist with
all the features currently implemented it has to setup a valid
queue count. This is done at init time using the FDIR and RSS
max queue counts and allowing each TC to allocate a queue per
CPU.

DCB will now use available queues up to (8 x TCs) this is somewhat
arbitrary cap but allows DCB to use up to 64 queues. Its easy to
increase this later if that is needed.

This is prep work to enable Flow Director with DCB. After this
DCB can easily coexist with existing features and no longer
needs its own DCB feature ring.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Tested-by: Ross Brattain <ross.b.brattain@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ixgbe/ixgbe.h      |    2 -
 drivers/net/ixgbe/ixgbe_main.c |  107 ++++++++++++++++++---------------------
 2 files changed, 49 insertions(+), 60 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe.h b/drivers/net/ixgbe/ixgbe.h
index e467b20..d5674fc 100644
--- a/drivers/net/ixgbe/ixgbe.h
+++ b/drivers/net/ixgbe/ixgbe.h
@@ -244,7 +244,6 @@ struct ixgbe_ring {
 
 enum ixgbe_ring_f_enum {
 	RING_F_NONE = 0,
-	RING_F_DCB,
 	RING_F_VMDQ,  /* SR-IOV uses the same ring feature */
 	RING_F_RSS,
 	RING_F_FDIR,
@@ -255,7 +254,6 @@ enum ixgbe_ring_f_enum {
 	RING_F_ARRAY_SIZE      /* must be last in enum set */
 };
 
-#define IXGBE_MAX_DCB_INDICES  64
 #define IXGBE_MAX_RSS_INDICES  16
 #define IXGBE_MAX_VMDQ_INDICES 64
 #define IXGBE_MAX_FDIR_INDICES 64
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index f6d8105..747da66 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -4416,72 +4416,72 @@ static inline bool ixgbe_set_fcoe_queues(struct ixgbe_adapter *adapter)
 	if (!(adapter->flags & IXGBE_FLAG_FCOE_ENABLED))
 		return false;
 
-	if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) {
-#ifdef CONFIG_IXGBE_DCB
-		int tc;
-		struct net_device *dev = adapter->netdev;
+	f->indices = min((int)num_online_cpus(), f->indices);
 
-		tc = netdev_get_prio_tc_map(dev, adapter->fcoe.up);
-		f->indices = dev->tc_to_txq[tc].count;
-		f->mask = dev->tc_to_txq[tc].offset;
-#endif
-	} else {
-		f->indices = min((int)num_online_cpus(), f->indices);
-
-		adapter->num_rx_queues = 1;
-		adapter->num_tx_queues = 1;
+	adapter->num_rx_queues = 1;
+	adapter->num_tx_queues = 1;
 
-		if (adapter->flags & IXGBE_FLAG_RSS_ENABLED) {
-			e_info(probe, "FCoE enabled with RSS\n");
-			if ((adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE) ||
-			    (adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE))
-				ixgbe_set_fdir_queues(adapter);
-			else
-				ixgbe_set_rss_queues(adapter);
-		}
-		/* adding FCoE rx rings to the end */
-		f->mask = adapter->num_rx_queues;
-		adapter->num_rx_queues += f->indices;
-		adapter->num_tx_queues += f->indices;
+	if (adapter->flags & IXGBE_FLAG_RSS_ENABLED) {
+		e_info(probe, "FCoE enabled with RSS\n");
+		if ((adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE) ||
+		    (adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE))
+			ixgbe_set_fdir_queues(adapter);
+		else
+			ixgbe_set_rss_queues(adapter);
 	}
+	/* adding FCoE rx rings to the end */
+	f->mask = adapter->num_rx_queues;
+	adapter->num_rx_queues += f->indices;
+	adapter->num_tx_queues += f->indices;
 
 	return true;
 }
 #endif /* IXGBE_FCOE */
 
+/* Artificial max queue cap per traffic class in DCB mode */
+#define DCB_QUEUE_CAP 8
+
 #ifdef CONFIG_IXGBE_DCB
 static inline bool ixgbe_set_dcb_queues(struct ixgbe_adapter *adapter)
 {
-	bool ret = false;
-	struct ixgbe_ring_feature *f = &adapter->ring_feature[RING_F_DCB];
-	int tcs = netdev_get_num_tc(adapter->netdev);
-	int max_q, i, q;
+	int per_tc_q, q, i, offset = 0;
+	struct net_device *dev = adapter->netdev;
+	int tcs = netdev_get_num_tc(dev);
 
-	if (!(adapter->flags & IXGBE_FLAG_DCB_ENABLED) || !tcs)
-		return ret;
+	if (!tcs)
+		return false;
 
-	max_q = adapter->netdev->num_tx_queues / tcs;
+	/* Map queue offset and counts onto allocated tx queues */
+	per_tc_q = min(dev->num_tx_queues / tcs, (unsigned int)DCB_QUEUE_CAP);
+	q = min((int)num_online_cpus(), per_tc_q);
 
-	f->indices = 0;
 	for (i = 0; i < tcs; i++) {
-		q = min((int)num_online_cpus(), max_q);
-		f->indices += q;
+		netdev_set_prio_tc_map(dev, i, i);
+		netdev_set_tc_queue(dev, i, q, offset);
+		offset += q;
 	}
 
-	f->mask = 0x7 << 3;
-	adapter->num_rx_queues = f->indices;
-	adapter->num_tx_queues = f->indices;
-	ret = true;
+	adapter->num_tx_queues = q * tcs;
+	adapter->num_rx_queues = q * tcs;
 
 #ifdef IXGBE_FCOE
-	/* FCoE enabled queues require special configuration done through
-	 * configure_fcoe() and others. Here we map FCoE indices onto the
-	 * DCB queue pairs allowing FCoE to own configuration later.
+	/* FCoE enabled queues require special configuration indexed
+	 * by feature specific indices and mask. Here we map FCoE
+	 * indices onto the DCB queue pairs allowing FCoE to own
+	 * configuration later.
 	 */
-	ixgbe_set_fcoe_queues(adapter);
+	if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED) {
+		int tc;
+		struct ixgbe_ring_feature *f =
+					&adapter->ring_feature[RING_F_FCOE];
+
+		tc = netdev_get_prio_tc_map(dev, adapter->fcoe.up);
+		f->indices = dev->tc_to_txq[tc].count;
+		f->mask = dev->tc_to_txq[tc].offset;
+	}
 #endif
 
-	return ret;
+	return true;
 }
 #endif
 
@@ -5171,7 +5171,6 @@ static int __devinit ixgbe_sw_init(struct ixgbe_adapter *adapter)
 	rss = min(IXGBE_MAX_RSS_INDICES, (int)num_online_cpus());
 	adapter->ring_feature[RING_F_RSS].indices = rss;
 	adapter->flags |= IXGBE_FLAG_RSS_ENABLED;
-	adapter->ring_feature[RING_F_DCB].indices = IXGBE_MAX_DCB_INDICES;
 	switch (hw->mac.type) {
 	case ixgbe_mac_82598EB:
 		if (hw->device_id == IXGBE_DEV_ID_82598AT)
@@ -7212,10 +7211,8 @@ static void ixgbe_validate_rtr(struct ixgbe_adapter *adapter, u8 tc)
  */
 int ixgbe_setup_tc(struct net_device *dev, u8 tc)
 {
-	unsigned int q, i, offset = 0;
 	struct ixgbe_adapter *adapter = netdev_priv(dev);
 	struct ixgbe_hw *hw = &adapter->hw;
-	int max_q = adapter->netdev->num_tx_queues / tc;
 
 	/* If DCB is anabled do not remove traffic classes, multiple
 	 * traffic classes are required to implement DCB
@@ -7241,14 +7238,6 @@ int ixgbe_setup_tc(struct net_device *dev, u8 tc)
 	else
 		netdev_reset_tc(dev);
 
-	/* Partition Tx queues evenly amongst traffic classes */
-	for (i = 0; i < tc; i++) {
-		q = min((int)num_online_cpus(), max_q);
-		netdev_set_prio_tc_map(dev, i, i);
-		netdev_set_tc_queue(dev, i, q, offset);
-		offset += q;
-	}
-
 	ixgbe_init_interrupt_scheme(adapter);
 	ixgbe_validate_rtr(adapter, tc);
 	if (netif_running(dev))
@@ -7435,14 +7424,16 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev,
 	pci_set_master(pdev);
 	pci_save_state(pdev);
 
+#ifdef CONFIG_IXGBE_DCB
+	indices *= MAX_TRAFFIC_CLASS;
+#endif
+
 	if (ii->mac == ixgbe_mac_82598EB)
 		indices = min_t(unsigned int, indices, IXGBE_MAX_RSS_INDICES);
 	else
 		indices = min_t(unsigned int, indices, IXGBE_MAX_FDIR_INDICES);
 
-#if defined(CONFIG_DCB)
-	indices = max_t(unsigned int, indices, IXGBE_MAX_DCB_INDICES);
-#elif defined(IXGBE_FCOE)
+#ifdef IXGBE_FCOE
 	indices += min_t(unsigned int, num_possible_cpus(),
 			 IXGBE_MAX_FCOE_INDICES);
 #endif
-- 
1.7.5.2


^ permalink raw reply related

* [net-next 17/40] ixgbe: setup redirection table for multiple packet buffers
From: Jeff Kirsher @ 2011-06-07 12:32 UTC (permalink / raw)
  To: davem; +Cc: John Fastabend, netdev, gospo, Jeff Kirsher
In-Reply-To: <1307449995-9458-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: John Fastabend <john.r.fastabend@intel.com>

Setup RSS redirection table to be compatible with multiple packet
buffers. Currently, this works on 82599 devices because the RSS
redirection index is masked by the number of queues per packet
buffer.

This sets the cap on the RSS table to maxq.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Tested-by: Ross Brattain <ross.b.brattain@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ixgbe/ixgbe_main.c |    6 +++++-
 1 files changed, 5 insertions(+), 1 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index 111c3f9..2634c67 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -2938,6 +2938,10 @@ static void ixgbe_setup_mrqc(struct ixgbe_adapter *adapter)
 	u32 rxcsum;
 	int i, j;
 	u8 tcs = netdev_get_num_tc(adapter->netdev);
+	int maxq = adapter->ring_feature[RING_F_RSS].indices;
+
+	if (tcs)
+		maxq = min(maxq, adapter->num_tx_queues / tcs);
 
 	/* Fill out hash function seeds */
 	for (i = 0; i < 10; i++)
@@ -2945,7 +2949,7 @@ static void ixgbe_setup_mrqc(struct ixgbe_adapter *adapter)
 
 	/* Fill out redirection table */
 	for (i = 0, j = 0; i < 128; i++, j++) {
-		if (j == adapter->ring_feature[RING_F_RSS].indices)
+		if (j == maxq)
 			j = 0;
 		/* reta = 4-byte sliding window of
 		 * 0x00..(indices-1)(indices-1)00..etc. */
-- 
1.7.5.2


^ permalink raw reply related

* [net-next 16/40] ixgbe: DCB 82598 devices, tx_idx and rx_idx swapped
From: Jeff Kirsher @ 2011-06-07 12:32 UTC (permalink / raw)
  To: davem; +Cc: John Fastabend, netdev, gospo, Jeff Kirsher
In-Reply-To: <1307449995-9458-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: John Fastabend <john.r.fastabend@intel.com>

The tx_idx and rx_idx values are swapped on 82598 devices
with DCB enabled.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Tested-by: Ross Brattain <ross.b.brattain@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ixgbe/ixgbe_main.c |    4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index 747da66..111c3f9 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -4635,8 +4635,8 @@ static void ixgbe_get_first_reg_idx(struct ixgbe_adapter *adapter, u8 tc,
 
 	switch (hw->mac.type) {
 	case ixgbe_mac_82598EB:
-		*tx = tc << 3;
-		*rx = tc << 2;
+		*tx = tc << 2;
+		*rx = tc << 3;
 		break;
 	case ixgbe_mac_82599EB:
 	case ixgbe_mac_X540:
-- 
1.7.5.2


^ permalink raw reply related

* [net-next 19/40] ixgbe: DCB and perfect filters can coexist
From: Jeff Kirsher @ 2011-06-07 12:32 UTC (permalink / raw)
  To: davem; +Cc: John Fastabend, netdev, gospo, Jeff Kirsher
In-Reply-To: <1307449995-9458-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: John Fastabend <john.r.fastabend@intel.com>

Now flow directors perfect filters features can coexist with DCB.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Tested-by: Ross Brattain <ross.b.brattain@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ixgbe/ixgbe_dcb_nl.c |    1 -
 1 files changed, 0 insertions(+), 1 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_dcb_nl.c b/drivers/net/ixgbe/ixgbe_dcb_nl.c
index b229feb..08c7aeb 100644
--- a/drivers/net/ixgbe/ixgbe_dcb_nl.c
+++ b/drivers/net/ixgbe/ixgbe_dcb_nl.c
@@ -135,7 +135,6 @@ static u8 ixgbe_dcbnl_set_state(struct net_device *netdev, u8 state)
 		case ixgbe_mac_82599EB:
 		case ixgbe_mac_X540:
 			adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE;
-			adapter->flags &= ~IXGBE_FLAG_FDIR_PERFECT_CAPABLE;
 			break;
 		default:
 			break;
-- 
1.7.5.2


^ permalink raw reply related

* [net-next 18/40] ixgbe: fix bit mask for DCB version
From: Jeff Kirsher @ 2011-06-07 12:32 UTC (permalink / raw)
  To: davem; +Cc: John Fastabend, netdev, gospo, Jeff Kirsher
In-Reply-To: <1307449995-9458-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: John Fastabend <john.r.fastabend@intel.com>

This bit mask is wrong DCBX_HOST is always set. It was missed up
until now because lldpad reprograms the device on a link
event. However this is still wrong and it is best not to be
mis-configured for some time immediately following ixgbe_up().

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Tested-by: Ross Brattain <ross.b.brattain@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ixgbe/ixgbe_main.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index 2634c67..fad1803 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -3744,7 +3744,7 @@ static void ixgbe_configure_dcb(struct ixgbe_adapter *adapter)
 	hw->mac.ops.set_vfta(&adapter->hw, 0, 0, true);
 
 	/* reconfigure the hardware */
-	if (adapter->dcbx_cap & (DCB_CAP_DCBX_HOST | DCB_CAP_DCBX_VER_CEE)) {
+	if (adapter->dcbx_cap & DCB_CAP_DCBX_VER_CEE) {
 #ifdef CONFIG_FCOE
 		if (adapter->netdev->features & NETIF_F_FCOE_MTU)
 			max_frame = max(max_frame, IXGBE_FCOE_JUMBO_FRAME_SIZE);
-- 
1.7.5.2


^ permalink raw reply related

* [net-next 20/40] ixgbe: DCB, remove unneeded ixgbe_dcb_txq_to_tc() routine
From: Jeff Kirsher @ 2011-06-07 12:32 UTC (permalink / raw)
  To: davem; +Cc: John Fastabend, netdev, gospo, Jeff Kirsher
In-Reply-To: <1307449995-9458-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: John Fastabend <john.r.fastabend@intel.com>

The ixgbe_dcb_txq_to_tc() routine was used to map TX rings to
a DCB traffic class. Now that a tx_ring has a DCB traffic class
associated with it this routine is no longer needed.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Tested-by: Ross Brattain <ross.b.brattain@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ixgbe/ixgbe_main.c |   58 +---------------------------------------
 1 files changed, 1 insertions(+), 57 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index fad1803..89ba538 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -664,62 +664,6 @@ void ixgbe_unmap_and_free_tx_resource(struct ixgbe_ring *tx_ring,
 	/* tx_buffer_info must be completely set up in the transmit path */
 }
 
-/**
- * ixgbe_dcb_txq_to_tc - convert a reg index to a traffic class
- * @adapter: driver private struct
- * @index: reg idx of queue to query (0-127)
- *
- * Helper function to determine the traffic index for a particular
- * register index.
- *
- * Returns : a tc index for use in range 0-7, or 0-3
- */
-static u8 ixgbe_dcb_txq_to_tc(struct ixgbe_adapter *adapter, u8 reg_idx)
-{
-	int tc = -1;
-	int dcb_i = netdev_get_num_tc(adapter->netdev);
-
-	/* if DCB is not enabled the queues have no TC */
-	if (!(adapter->flags & IXGBE_FLAG_DCB_ENABLED))
-		return tc;
-
-	/* check valid range */
-	if (reg_idx >= adapter->hw.mac.max_tx_queues)
-		return tc;
-
-	switch (adapter->hw.mac.type) {
-	case ixgbe_mac_82598EB:
-		tc = reg_idx >> 2;
-		break;
-	default:
-		if (dcb_i != 4 && dcb_i != 8)
-			break;
-
-		/* if VMDq is enabled the lowest order bits determine TC */
-		if (adapter->flags & (IXGBE_FLAG_SRIOV_ENABLED |
-				      IXGBE_FLAG_VMDQ_ENABLED)) {
-			tc = reg_idx & (dcb_i - 1);
-			break;
-		}
-
-		/*
-		 * Convert the reg_idx into the correct TC. This bitmask
-		 * targets the last full 32 ring traffic class and assigns
-		 * it a value of 1. From there the rest of the rings are
-		 * based on shifting the mask further up to include the
-		 * reg_idx / 16 and then reg_idx / 8. It assumes dcB_i
-		 * will only ever be 8 or 4 and that reg_idx will never
-		 * be greater then 128. The code without the power of 2
-		 * optimizations would be:
-		 * (((reg_idx % 32) + 32) * dcb_i) >> (9 - reg_idx / 32)
-		 */
-		tc = ((reg_idx & 0X1F) + 0x20) * dcb_i;
-		tc >>= 9 - (reg_idx >> 5);
-	}
-
-	return tc;
-}
-
 static void ixgbe_update_xoff_received(struct ixgbe_adapter *adapter)
 {
 	struct ixgbe_hw *hw = &adapter->hw;
@@ -765,7 +709,7 @@ static void ixgbe_update_xoff_received(struct ixgbe_adapter *adapter)
 	/* disarm tx queues that have received xoff frames */
 	for (i = 0; i < adapter->num_tx_queues; i++) {
 		struct ixgbe_ring *tx_ring = adapter->tx_ring[i];
-		u32 tc = ixgbe_dcb_txq_to_tc(adapter, tx_ring->reg_idx);
+		u8 tc = tx_ring->dcb_tc;
 
 		if (xoff[tc])
 			clear_bit(__IXGBE_HANG_CHECK_ARMED, &tx_ring->state);
-- 
1.7.5.2


^ permalink raw reply related

* [net-next 22/40] ixgbe: setup per CPU PCI pool for FCoE DDP
From: Jeff Kirsher @ 2011-06-07 12:32 UTC (permalink / raw)
  To: davem; +Cc: Vasu Dev, netdev, gospo, Jeff Kirsher
In-Reply-To: <1307449995-9458-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Vasu Dev <vasu.dev@intel.com>

Currently single PCI pool used across all CPUs and that
doesn't scales up as number of CPU increases, so this
patch adds per CPU PCI pool to setup udl and that aligns
well from FCoE stack as that already has per CPU exch locking.

Adds per CPU PCI alloc setup and free in
ixgbe_fcoe_ddp_pools_alloc and ixgbe_fcoe_ddp_pools_free,
use CPU specific pool during DDP setup.

Re-arranged ixgbe_fcoe struct to have fewer holes
along with adding pools ptr using pahole.

Signed-off-by: Vasu Dev <vasu.dev@intel.com>
Tested-by: Ross Brattain <ross.b.brattain@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ixgbe/ixgbe_fcoe.c |  105 ++++++++++++++++++++++++++++-----------
 drivers/net/ixgbe/ixgbe_fcoe.h |   13 +++--
 2 files changed, 82 insertions(+), 36 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_fcoe.c b/drivers/net/ixgbe/ixgbe_fcoe.c
index 0592072..f5f39ed 100644
--- a/drivers/net/ixgbe/ixgbe_fcoe.c
+++ b/drivers/net/ixgbe/ixgbe_fcoe.c
@@ -128,7 +128,11 @@ int ixgbe_fcoe_ddp_put(struct net_device *netdev, u16 xid)
 	if (ddp->sgl)
 		pci_unmap_sg(adapter->pdev, ddp->sgl, ddp->sgc,
 			     DMA_FROM_DEVICE);
-	pci_pool_free(fcoe->pool, ddp->udl, ddp->udp);
+	if (ddp->pool) {
+		pci_pool_free(ddp->pool, ddp->udl, ddp->udp);
+		ddp->pool = NULL;
+	}
+
 	ixgbe_fcoe_clear_ddp(ddp);
 
 out_ddp_put:
@@ -163,6 +167,7 @@ static int ixgbe_fcoe_ddp_setup(struct net_device *netdev, u16 xid,
 	unsigned int thislen = 0;
 	u32 fcbuff, fcdmarw, fcfltrw, fcrxctl;
 	dma_addr_t addr = 0;
+	struct pci_pool *pool;
 
 	if (!netdev || !sgl)
 		return 0;
@@ -199,12 +204,14 @@ static int ixgbe_fcoe_ddp_setup(struct net_device *netdev, u16 xid,
 		return 0;
 	}
 
-	/* alloc the udl from our ddp pool */
-	ddp->udl = pci_pool_alloc(fcoe->pool, GFP_ATOMIC, &ddp->udp);
+	/* alloc the udl from per cpu ddp pool */
+	pool = *per_cpu_ptr(fcoe->pool, get_cpu());
+	ddp->udl = pci_pool_alloc(pool, GFP_ATOMIC, &ddp->udp);
 	if (!ddp->udl) {
 		e_err(drv, "failed allocated ddp context\n");
 		goto out_noddp_unmap;
 	}
+	ddp->pool = pool;
 	ddp->sgl = sgl;
 	ddp->sgc = sgc;
 
@@ -268,6 +275,7 @@ static int ixgbe_fcoe_ddp_setup(struct net_device *netdev, u16 xid,
 		j++;
 		lastsize = 1;
 	}
+	put_cpu();
 
 	fcbuff = (IXGBE_FCBUFF_4KB << IXGBE_FCBUFF_BUFFSIZE_SHIFT);
 	fcbuff |= ((j & 0xff) << IXGBE_FCBUFF_BUFFCNT_SHIFT);
@@ -311,11 +319,12 @@ static int ixgbe_fcoe_ddp_setup(struct net_device *netdev, u16 xid,
 	return 1;
 
 out_noddp_free:
-	pci_pool_free(fcoe->pool, ddp->udl, ddp->udp);
+	pci_pool_free(pool, ddp->udl, ddp->udp);
 	ixgbe_fcoe_clear_ddp(ddp);
 
 out_noddp_unmap:
 	pci_unmap_sg(adapter->pdev, sgl, sgc, DMA_FROM_DEVICE);
+	put_cpu();
 	return 0;
 }
 
@@ -585,6 +594,46 @@ int ixgbe_fso(struct ixgbe_adapter *adapter,
 	return skb_is_gso(skb);
 }
 
+static void ixgbe_fcoe_ddp_pools_free(struct ixgbe_fcoe *fcoe)
+{
+	unsigned int cpu;
+	struct pci_pool **pool;
+
+	for_each_possible_cpu(cpu) {
+		pool = per_cpu_ptr(fcoe->pool, cpu);
+		if (*pool)
+			pci_pool_destroy(*pool);
+	}
+	free_percpu(fcoe->pool);
+	fcoe->pool = NULL;
+}
+
+static void ixgbe_fcoe_ddp_pools_alloc(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_fcoe *fcoe = &adapter->fcoe;
+	unsigned int cpu;
+	struct pci_pool **pool;
+	char pool_name[32];
+
+	fcoe->pool = alloc_percpu(struct pci_pool *);
+	if (!fcoe->pool)
+		return;
+
+	/* allocate pci pool for each cpu */
+	for_each_possible_cpu(cpu) {
+		snprintf(pool_name, 32, "ixgbe_fcoe_ddp_%d", cpu);
+		pool = per_cpu_ptr(fcoe->pool, cpu);
+		*pool = pci_pool_create(pool_name,
+					adapter->pdev, IXGBE_FCPTR_MAX,
+					IXGBE_FCPTR_ALIGN, PAGE_SIZE);
+		if (!*pool) {
+			e_err(drv, "failed to alloc DDP pool on cpu:%d\n", cpu);
+			ixgbe_fcoe_ddp_pools_free(fcoe);
+			return;
+		}
+	}
+}
+
 /**
  * ixgbe_configure_fcoe - configures registers for fcoe at start
  * @adapter: ptr to ixgbe adapter
@@ -604,22 +653,20 @@ void ixgbe_configure_fcoe(struct ixgbe_adapter *adapter)
 	u32 up2tc;
 #endif
 
-	/* create the pool for ddp if not created yet */
 	if (!fcoe->pool) {
-		/* allocate ddp pool */
-		fcoe->pool = pci_pool_create("ixgbe_fcoe_ddp",
-					     adapter->pdev, IXGBE_FCPTR_MAX,
-					     IXGBE_FCPTR_ALIGN, PAGE_SIZE);
-		if (!fcoe->pool)
-			e_err(drv, "failed to allocated FCoE DDP pool\n");
-
 		spin_lock_init(&fcoe->lock);
 
+		ixgbe_fcoe_ddp_pools_alloc(adapter);
+		if (!fcoe->pool) {
+			e_err(drv, "failed to alloc percpu fcoe DDP pools\n");
+			return;
+		}
+
 		/* Extra buffer to be shared by all DDPs for HW work around */
 		fcoe->extra_ddp_buffer = kmalloc(IXGBE_FCBUFF_MIN, GFP_ATOMIC);
 		if (fcoe->extra_ddp_buffer == NULL) {
 			e_err(drv, "failed to allocated extra DDP buffer\n");
-			goto out_extra_ddp_buffer_alloc;
+			goto out_ddp_pools;
 		}
 
 		fcoe->extra_ddp_buffer_dma =
@@ -630,7 +677,7 @@ void ixgbe_configure_fcoe(struct ixgbe_adapter *adapter)
 		if (dma_mapping_error(&adapter->pdev->dev,
 				      fcoe->extra_ddp_buffer_dma)) {
 			e_err(drv, "failed to map extra DDP buffer\n");
-			goto out_extra_ddp_buffer_dma;
+			goto out_extra_ddp_buffer;
 		}
 	}
 
@@ -684,11 +731,10 @@ void ixgbe_configure_fcoe(struct ixgbe_adapter *adapter)
 
 	return;
 
-out_extra_ddp_buffer_dma:
+out_extra_ddp_buffer:
 	kfree(fcoe->extra_ddp_buffer);
-out_extra_ddp_buffer_alloc:
-	pci_pool_destroy(fcoe->pool);
-	fcoe->pool = NULL;
+out_ddp_pools:
+	ixgbe_fcoe_ddp_pools_free(fcoe);
 }
 
 /**
@@ -704,18 +750,17 @@ void ixgbe_cleanup_fcoe(struct ixgbe_adapter *adapter)
 	int i;
 	struct ixgbe_fcoe *fcoe = &adapter->fcoe;
 
-	/* release ddp resource */
-	if (fcoe->pool) {
-		for (i = 0; i < IXGBE_FCOE_DDP_MAX; i++)
-			ixgbe_fcoe_ddp_put(adapter->netdev, i);
-		dma_unmap_single(&adapter->pdev->dev,
-				 fcoe->extra_ddp_buffer_dma,
-				 IXGBE_FCBUFF_MIN,
-				 DMA_FROM_DEVICE);
-		kfree(fcoe->extra_ddp_buffer);
-		pci_pool_destroy(fcoe->pool);
-		fcoe->pool = NULL;
-	}
+	if (!fcoe->pool)
+		return;
+
+	for (i = 0; i < IXGBE_FCOE_DDP_MAX; i++)
+		ixgbe_fcoe_ddp_put(adapter->netdev, i);
+	dma_unmap_single(&adapter->pdev->dev,
+			 fcoe->extra_ddp_buffer_dma,
+			 IXGBE_FCBUFF_MIN,
+			 DMA_FROM_DEVICE);
+	kfree(fcoe->extra_ddp_buffer);
+	ixgbe_fcoe_ddp_pools_free(fcoe);
 }
 
 /**
diff --git a/drivers/net/ixgbe/ixgbe_fcoe.h b/drivers/net/ixgbe/ixgbe_fcoe.h
index 5a650a4..d876e7a 100644
--- a/drivers/net/ixgbe/ixgbe_fcoe.h
+++ b/drivers/net/ixgbe/ixgbe_fcoe.h
@@ -62,20 +62,21 @@ struct ixgbe_fcoe_ddp {
 	struct scatterlist *sgl;
 	dma_addr_t udp;
 	u64 *udl;
+	struct pci_pool *pool;
 };
 
 struct ixgbe_fcoe {
-#ifdef CONFIG_IXGBE_DCB
-	u8 tc;
-	u8 up;
-#endif
-	unsigned long mode;
+	struct pci_pool **pool;
 	atomic_t refcnt;
 	spinlock_t lock;
-	struct pci_pool *pool;
 	struct ixgbe_fcoe_ddp ddp[IXGBE_FCOE_DDP_MAX];
 	unsigned char *extra_ddp_buffer;
 	dma_addr_t extra_ddp_buffer_dma;
+	unsigned long mode;
+#ifdef CONFIG_IXGBE_DCB
+	u8 tc;
+	u8 up;
+#endif
 };
 
 #endif /* _IXGBE_FCOE_H */
-- 
1.7.5.2


^ permalink raw reply related

* [net-next 21/40] ixgbe: add support for Dell CEM
From: Jeff Kirsher @ 2011-06-07 12:32 UTC (permalink / raw)
  To: davem; +Cc: Emil Tantilov, netdev, gospo, Jeff Kirsher
In-Reply-To: <1307449995-9458-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Emil Tantilov <emil.s.tantilov@intel.com>

This patch adds support for Dell CEM (Comprehensive Embedded Management)).
This consists of informing the management firmware of the driver version
during probe on 82599 and X540 HW.

Signed-off-by: Emil Tantilov <emil.s.tantilov@intel.com>
Tested-by: Evan Swanson <evan.swanson@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ixgbe/ixgbe_82598.c  |    1 +
 drivers/net/ixgbe/ixgbe_82599.c  |    1 +
 drivers/net/ixgbe/ixgbe_common.c |  174 ++++++++++++++++++++++++++++++++++++++
 drivers/net/ixgbe/ixgbe_common.h |    2 +
 drivers/net/ixgbe/ixgbe_main.c   |    4 +
 drivers/net/ixgbe/ixgbe_type.h   |   46 ++++++++++
 drivers/net/ixgbe/ixgbe_x540.c   |    1 +
 7 files changed, 229 insertions(+), 0 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_82598.c b/drivers/net/ixgbe/ixgbe_82598.c
index bb417d7..0d4e382 100644
--- a/drivers/net/ixgbe/ixgbe_82598.c
+++ b/drivers/net/ixgbe/ixgbe_82598.c
@@ -1316,6 +1316,7 @@ static struct ixgbe_mac_operations mac_ops_82598 = {
 	.clear_vfta		= &ixgbe_clear_vfta_82598,
 	.set_vfta		= &ixgbe_set_vfta_82598,
 	.fc_enable		= &ixgbe_fc_enable_82598,
+	.set_fw_drv_ver         = NULL,
 	.acquire_swfw_sync      = &ixgbe_acquire_swfw_sync,
 	.release_swfw_sync      = &ixgbe_release_swfw_sync,
 };
diff --git a/drivers/net/ixgbe/ixgbe_82599.c b/drivers/net/ixgbe/ixgbe_82599.c
index 324a505..4a6826b 100644
--- a/drivers/net/ixgbe/ixgbe_82599.c
+++ b/drivers/net/ixgbe/ixgbe_82599.c
@@ -2126,6 +2126,7 @@ static struct ixgbe_mac_operations mac_ops_82599 = {
 	.clear_vfta             = &ixgbe_clear_vfta_generic,
 	.set_vfta               = &ixgbe_set_vfta_generic,
 	.fc_enable              = &ixgbe_fc_enable_generic,
+	.set_fw_drv_ver         = &ixgbe_set_fw_drv_ver_generic,
 	.init_uta_tables        = &ixgbe_init_uta_tables_generic,
 	.setup_sfp              = &ixgbe_setup_sfp_modules_82599,
 	.set_mac_anti_spoofing  = &ixgbe_set_mac_anti_spoofing,
diff --git a/drivers/net/ixgbe/ixgbe_common.c b/drivers/net/ixgbe/ixgbe_common.c
index cc2a4a1..777051f 100644
--- a/drivers/net/ixgbe/ixgbe_common.c
+++ b/drivers/net/ixgbe/ixgbe_common.c
@@ -3333,3 +3333,177 @@ void ixgbe_set_rxpba_generic(struct ixgbe_hw *hw,
 		IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), 0);
 	}
 }
+
+/**
+ *  ixgbe_calculate_checksum - Calculate checksum for buffer
+ *  @buffer: pointer to EEPROM
+ *  @length: size of EEPROM to calculate a checksum for
+ *  Calculates the checksum for some buffer on a specified length.  The
+ *  checksum calculated is returned.
+ **/
+static u8 ixgbe_calculate_checksum(u8 *buffer, u32 length)
+{
+	u32 i;
+	u8 sum = 0;
+
+	if (!buffer)
+		return 0;
+
+	for (i = 0; i < length; i++)
+		sum += buffer[i];
+
+	return (u8) (0 - sum);
+}
+
+/**
+ *  ixgbe_host_interface_command - Issue command to manageability block
+ *  @hw: pointer to the HW structure
+ *  @buffer: contains the command to write and where the return status will
+ *           be placed
+ *  @lenght: lenght of buffer, must be multiple of 4 bytes
+ *
+ *  Communicates with the manageability block.  On success return 0
+ *  else return IXGBE_ERR_HOST_INTERFACE_COMMAND.
+ **/
+static s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, u8 *buffer,
+					u32 length)
+{
+	u32 hicr, i;
+	u32 hdr_size = sizeof(struct ixgbe_hic_hdr);
+	u8 buf_len, dword_len;
+
+	s32 ret_val = 0;
+
+	if (length == 0 || length & 0x3 ||
+	    length > IXGBE_HI_MAX_BLOCK_BYTE_LENGTH) {
+		hw_dbg(hw, "Buffer length failure.\n");
+		ret_val = IXGBE_ERR_HOST_INTERFACE_COMMAND;
+		goto out;
+	}
+
+	/* Check that the host interface is enabled. */
+	hicr = IXGBE_READ_REG(hw, IXGBE_HICR);
+	if ((hicr & IXGBE_HICR_EN) == 0) {
+		hw_dbg(hw, "IXGBE_HOST_EN bit disabled.\n");
+		ret_val = IXGBE_ERR_HOST_INTERFACE_COMMAND;
+		goto out;
+	}
+
+	/* Calculate length in DWORDs */
+	dword_len = length >> 2;
+
+	/*
+	 * The device driver writes the relevant command block
+	 * into the ram area.
+	 */
+	for (i = 0; i < dword_len; i++)
+		IXGBE_WRITE_REG_ARRAY(hw, IXGBE_FLEX_MNG,
+				      i, *((u32 *)buffer + i));
+
+	/* Setting this bit tells the ARC that a new command is pending. */
+	IXGBE_WRITE_REG(hw, IXGBE_HICR, hicr | IXGBE_HICR_C);
+
+	for (i = 0; i < IXGBE_HI_COMMAND_TIMEOUT; i++) {
+		hicr = IXGBE_READ_REG(hw, IXGBE_HICR);
+		if (!(hicr & IXGBE_HICR_C))
+			break;
+		usleep_range(1000, 2000);
+	}
+
+	/* Check command successful completion. */
+	if (i == IXGBE_HI_COMMAND_TIMEOUT ||
+	    (!(IXGBE_READ_REG(hw, IXGBE_HICR) & IXGBE_HICR_SV))) {
+		hw_dbg(hw, "Command has failed with no status valid.\n");
+		ret_val = IXGBE_ERR_HOST_INTERFACE_COMMAND;
+		goto out;
+	}
+
+	/* Calculate length in DWORDs */
+	dword_len = hdr_size >> 2;
+
+	/* first pull in the header so we know the buffer length */
+	for (i = 0; i < dword_len; i++)
+		*((u32 *)buffer + i) =
+			IXGBE_READ_REG_ARRAY(hw, IXGBE_FLEX_MNG, i);
+
+	/* If there is any thing in data position pull it in */
+	buf_len = ((struct ixgbe_hic_hdr *)buffer)->buf_len;
+	if (buf_len == 0)
+		goto out;
+
+	if (length < (buf_len + hdr_size)) {
+		hw_dbg(hw, "Buffer not large enough for reply message.\n");
+		ret_val = IXGBE_ERR_HOST_INTERFACE_COMMAND;
+		goto out;
+	}
+
+	/* Calculate length in DWORDs, add one for odd lengths */
+	dword_len = (buf_len + 1) >> 2;
+
+	/* Pull in the rest of the buffer (i is where we left off)*/
+	for (; i < buf_len; i++)
+		*((u32 *)buffer + i) =
+			IXGBE_READ_REG_ARRAY(hw, IXGBE_FLEX_MNG, i);
+
+out:
+	return ret_val;
+}
+
+/**
+ *  ixgbe_set_fw_drv_ver_generic - Sends driver version to firmware
+ *  @hw: pointer to the HW structure
+ *  @maj: driver version major number
+ *  @min: driver version minor number
+ *  @build: driver version build number
+ *  @sub: driver version sub build number
+ *
+ *  Sends driver version number to firmware through the manageability
+ *  block.  On success return 0
+ *  else returns IXGBE_ERR_SWFW_SYNC when encountering an error acquiring
+ *  semaphore or IXGBE_ERR_HOST_INTERFACE_COMMAND when command fails.
+ **/
+s32 ixgbe_set_fw_drv_ver_generic(struct ixgbe_hw *hw, u8 maj, u8 min,
+				 u8 build, u8 sub)
+{
+	struct ixgbe_hic_drv_info fw_cmd;
+	int i;
+	s32 ret_val = 0;
+
+	if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_SW_MNG_SM) != 0) {
+		ret_val = IXGBE_ERR_SWFW_SYNC;
+		goto out;
+	}
+
+	fw_cmd.hdr.cmd = FW_CEM_CMD_DRIVER_INFO;
+	fw_cmd.hdr.buf_len = FW_CEM_CMD_DRIVER_INFO_LEN;
+	fw_cmd.hdr.cmd_or_resp.cmd_resv = FW_CEM_CMD_RESERVED;
+	fw_cmd.port_num = (u8)hw->bus.func;
+	fw_cmd.ver_maj = maj;
+	fw_cmd.ver_min = min;
+	fw_cmd.ver_build = build;
+	fw_cmd.ver_sub = sub;
+	fw_cmd.hdr.checksum = 0;
+	fw_cmd.hdr.checksum = ixgbe_calculate_checksum((u8 *)&fw_cmd,
+				(FW_CEM_HDR_LEN + fw_cmd.hdr.buf_len));
+	fw_cmd.pad = 0;
+	fw_cmd.pad2 = 0;
+
+	for (i = 0; i <= FW_CEM_MAX_RETRIES; i++) {
+		ret_val = ixgbe_host_interface_command(hw, (u8 *)&fw_cmd,
+						       sizeof(fw_cmd));
+		if (ret_val != 0)
+			continue;
+
+		if (fw_cmd.hdr.cmd_or_resp.ret_status ==
+		    FW_CEM_RESP_STATUS_SUCCESS)
+			ret_val = 0;
+		else
+			ret_val = IXGBE_ERR_HOST_INTERFACE_COMMAND;
+
+		break;
+	}
+
+	hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_SW_MNG_SM);
+out:
+	return ret_val;
+}
diff --git a/drivers/net/ixgbe/ixgbe_common.h b/drivers/net/ixgbe/ixgbe_common.h
index 32a454f..f24fd64 100644
--- a/drivers/net/ixgbe/ixgbe_common.h
+++ b/drivers/net/ixgbe/ixgbe_common.h
@@ -99,6 +99,8 @@ s32 ixgbe_blink_led_stop_generic(struct ixgbe_hw *hw, u32 index);
 void ixgbe_set_mac_anti_spoofing(struct ixgbe_hw *hw, bool enable, int pf);
 void ixgbe_set_vlan_anti_spoofing(struct ixgbe_hw *hw, bool enable, int vf);
 s32 ixgbe_get_device_caps_generic(struct ixgbe_hw *hw, u16 *device_caps);
+s32 ixgbe_set_fw_drv_ver_generic(struct ixgbe_hw *hw, u8 maj, u8 min,
+				 u8 build, u8 ver);
 
 void ixgbe_set_rxpba_generic(struct ixgbe_hw *hw, int num_pb,
 			     u32 headroom, int strategy);
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index 89ba538..053c561 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -7673,6 +7673,10 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev,
 			ixgbe_vf_configuration(pdev, (i | 0x10000000));
 	}
 
+	/* Inform firmware of driver version */
+	if (hw->mac.ops.set_fw_drv_ver)
+		hw->mac.ops.set_fw_drv_ver(hw, MAJ, MIN, BUILD, KFIX);
+
 	/* add san mac addr to netdev */
 	ixgbe_add_sanmac_netdev(netdev);
 
diff --git a/drivers/net/ixgbe/ixgbe_type.h b/drivers/net/ixgbe/ixgbe_type.h
index 5455064..9a499a6 100644
--- a/drivers/net/ixgbe/ixgbe_type.h
+++ b/drivers/net/ixgbe/ixgbe_type.h
@@ -707,6 +707,13 @@
 #define IXGBE_HFDR      0x15FE8
 #define IXGBE_FLEX_MNG  0x15800 /* 0x15800 - 0x15EFC */
 
+#define IXGBE_HICR_EN              0x01  /* Enable bit - RO */
+/* Driver sets this bit when done to put command in RAM */
+#define IXGBE_HICR_C               0x02
+#define IXGBE_HICR_SV              0x04  /* Status Validity */
+#define IXGBE_HICR_FW_RESET_ENABLE 0x40
+#define IXGBE_HICR_FW_RESET        0x80
+
 /* PCI-E registers */
 #define IXGBE_GCR       0x11000
 #define IXGBE_GTV       0x11004
@@ -2124,6 +2131,41 @@ enum ixgbe_fdir_pballoc_type {
 #define IXGBE_FDIR_INIT_DONE_POLL               10
 #define IXGBE_FDIRCMD_CMD_POLL                  10
 
+/* Manageablility Host Interface defines */
+#define IXGBE_HI_MAX_BLOCK_BYTE_LENGTH       1792 /* Num of bytes in range */
+#define IXGBE_HI_MAX_BLOCK_DWORD_LENGTH      448 /* Num of dwords in range */
+#define IXGBE_HI_COMMAND_TIMEOUT             500 /* Process HI command limit */
+
+/* CEM Support */
+#define FW_CEM_HDR_LEN                0x4
+#define FW_CEM_CMD_DRIVER_INFO        0xDD
+#define FW_CEM_CMD_DRIVER_INFO_LEN    0x5
+#define FW_CEM_CMD_RESERVED           0X0
+#define FW_CEM_MAX_RETRIES            3
+#define FW_CEM_RESP_STATUS_SUCCESS    0x1
+
+/* Host Interface Command Structures */
+struct ixgbe_hic_hdr {
+	u8 cmd;
+	u8 buf_len;
+	union {
+		u8 cmd_resv;
+		u8 ret_status;
+	} cmd_or_resp;
+	u8 checksum;
+};
+
+struct ixgbe_hic_drv_info {
+	struct ixgbe_hic_hdr hdr;
+	u8 port_num;
+	u8 ver_sub;
+	u8 ver_build;
+	u8 ver_min;
+	u8 ver_maj;
+	u8 pad; /* end spacing to ensure length is mult. of dword */
+	u16 pad2; /* end spacing to ensure length is mult. of dword2 */
+};
+
 /* Transmit Descriptor - Advanced */
 union ixgbe_adv_tx_desc {
 	struct {
@@ -2663,6 +2705,9 @@ struct ixgbe_mac_operations {
 
 	/* Flow Control */
 	s32 (*fc_enable)(struct ixgbe_hw *, s32);
+
+	/* Manageability interface */
+	s32 (*set_fw_drv_ver)(struct ixgbe_hw *, u8, u8, u8, u8);
 };
 
 struct ixgbe_phy_operations {
@@ -2832,6 +2877,7 @@ struct ixgbe_info {
 #define IXGBE_ERR_SFP_SETUP_NOT_COMPLETE        -30
 #define IXGBE_ERR_PBA_SECTION                   -31
 #define IXGBE_ERR_INVALID_ARGUMENT              -32
+#define IXGBE_ERR_HOST_INTERFACE_COMMAND        -33
 #define IXGBE_NOT_IMPLEMENTED                   0x7FFFFFFF
 
 #endif /* _IXGBE_TYPE_H_ */
diff --git a/drivers/net/ixgbe/ixgbe_x540.c b/drivers/net/ixgbe/ixgbe_x540.c
index fa566ed..bec30ed 100644
--- a/drivers/net/ixgbe/ixgbe_x540.c
+++ b/drivers/net/ixgbe/ixgbe_x540.c
@@ -894,6 +894,7 @@ static struct ixgbe_mac_operations mac_ops_X540 = {
 	.clear_vfta             = &ixgbe_clear_vfta_generic,
 	.set_vfta               = &ixgbe_set_vfta_generic,
 	.fc_enable              = &ixgbe_fc_enable_generic,
+	.set_fw_drv_ver         = &ixgbe_set_fw_drv_ver_generic,
 	.init_uta_tables        = &ixgbe_init_uta_tables_generic,
 	.setup_sfp              = NULL,
 	.set_mac_anti_spoofing  = &ixgbe_set_mac_anti_spoofing,
-- 
1.7.5.2


^ permalink raw reply related

* [net-next 24/40] ixgbe: alloc DDP PCI pool and ixgbe queues as per NUMA nodes
From: Jeff Kirsher @ 2011-06-07 12:32 UTC (permalink / raw)
  To: davem; +Cc: Vasu Dev, netdev, gospo, Jeff Kirsher
In-Reply-To: <1307449995-9458-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Vasu Dev <vasu.dev@intel.com>

Have DDP PCI pools allocated from closest NUMA node to the CPU
and have ixgbe queues allocs per NUMA node.

Signed-off-by: Vasu Dev <vasu.dev@intel.com>
Tested-by: Ross Brattain <ross.b.brattain@intel.com>
Tested-by: Evan Swanson <evan.swanson@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ixgbe/ixgbe_fcoe.c |    8 ++++++--
 drivers/net/ixgbe/ixgbe_main.c |   12 +++++++++---
 2 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_fcoe.c b/drivers/net/ixgbe/ixgbe_fcoe.c
index aadff4f..71e8458 100644
--- a/drivers/net/ixgbe/ixgbe_fcoe.c
+++ b/drivers/net/ixgbe/ixgbe_fcoe.c
@@ -615,7 +615,7 @@ static void ixgbe_fcoe_ddp_pools_free(struct ixgbe_fcoe *fcoe)
 static void ixgbe_fcoe_ddp_pools_alloc(struct ixgbe_adapter *adapter)
 {
 	struct ixgbe_fcoe *fcoe = &adapter->fcoe;
-	unsigned int cpu;
+	unsigned int cpu, nid;
 	struct pci_pool **pool;
 	char pool_name[32];
 
@@ -623,19 +623,23 @@ static void ixgbe_fcoe_ddp_pools_alloc(struct ixgbe_adapter *adapter)
 	if (!fcoe->pool)
 		return;
 
+	nid = dev_to_node(&adapter->pdev->dev);
 	/* allocate pci pool for each cpu */
 	for_each_possible_cpu(cpu) {
 		snprintf(pool_name, 32, "ixgbe_fcoe_ddp_%d", cpu);
 		pool = per_cpu_ptr(fcoe->pool, cpu);
+		set_dev_node(&adapter->pdev->dev, cpu_to_node(cpu));
 		*pool = pci_pool_create(pool_name,
 					adapter->pdev, IXGBE_FCPTR_MAX,
 					IXGBE_FCPTR_ALIGN, PAGE_SIZE);
 		if (!*pool) {
 			e_err(drv, "failed to alloc DDP pool on cpu:%d\n", cpu);
 			ixgbe_fcoe_ddp_pools_free(fcoe);
-			return;
+			break;
 		}
 	}
+	/* restore original node value */
+	set_dev_node(&adapter->pdev->dev, nid);
 }
 
 static void ixgbe_fcoe_locks_free(struct ixgbe_fcoe *fcoe)
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index 053c561..1c2e721 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -4782,10 +4782,9 @@ static void ixgbe_cache_ring_register(struct ixgbe_adapter *adapter)
  **/
 static int ixgbe_alloc_queues(struct ixgbe_adapter *adapter)
 {
-	int rx = 0, tx = 0, nid = adapter->node;
+	int rx = 0, tx = 0, nid;
 
-	if (nid < 0 || !node_online(nid))
-		nid = first_online_node;
+	nid = first_online_node;
 
 	for (; tx < adapter->num_tx_queues; tx++) {
 		struct ixgbe_ring *ring;
@@ -4802,8 +4801,12 @@ static int ixgbe_alloc_queues(struct ixgbe_adapter *adapter)
 		ring->netdev = adapter->netdev;
 
 		adapter->tx_ring[tx] = ring;
+		nid = next_online_node(nid);
+		if (nid == MAX_NUMNODES)
+			nid = first_online_node;
 	}
 
+	nid = first_online_node;
 	for (; rx < adapter->num_rx_queues; rx++) {
 		struct ixgbe_ring *ring;
 
@@ -4819,6 +4822,9 @@ static int ixgbe_alloc_queues(struct ixgbe_adapter *adapter)
 		ring->netdev = adapter->netdev;
 
 		adapter->rx_ring[rx] = ring;
+		nid = next_online_node(nid);
+		if (nid == MAX_NUMNODES)
+			nid = first_online_node;
 	}
 
 	ixgbe_cache_ring_register(adapter);
-- 
1.7.5.2


^ permalink raw reply related

* [net-next 25/40] ixgbe: remove ntuple filtering
From: Jeff Kirsher @ 2011-06-07 12:33 UTC (permalink / raw)
  To: davem; +Cc: Alexander Duyck, netdev, gospo, Jeff Kirsher
In-Reply-To: <1307449995-9458-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Alexander Duyck <alexander.h.duyck@intel.com>

Due to numerous issues in ntuple filters it has been decided to move the
interface over to the network flow classification interface.  As a first
step to achieving this I first need to remove the old ntuple interface.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Ross Brattain <ross.b.brattain@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ixgbe/ixgbe_ethtool.c |  136 -------------------------------------
 1 files changed, 0 insertions(+), 136 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_ethtool.c b/drivers/net/ixgbe/ixgbe_ethtool.c
index cb1555b..48a1bfc 100644
--- a/drivers/net/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ixgbe/ixgbe_ethtool.c
@@ -2335,141 +2335,6 @@ static int ixgbe_set_flags(struct net_device *netdev, u32 data)
 	return 0;
 }
 
-static int ixgbe_set_rx_ntuple(struct net_device *dev,
-                               struct ethtool_rx_ntuple *cmd)
-{
-	struct ixgbe_adapter *adapter = netdev_priv(dev);
-	struct ethtool_rx_ntuple_flow_spec *fs = &cmd->fs;
-	union ixgbe_atr_input input_struct;
-	struct ixgbe_atr_input_masks input_masks;
-	int target_queue;
-	int err;
-
-	if (adapter->hw.mac.type == ixgbe_mac_82598EB)
-		return -EOPNOTSUPP;
-
-	/*
-	 * Don't allow programming if the action is a queue greater than
-	 * the number of online Tx queues.
-	 */
-	if ((fs->action >= adapter->num_tx_queues) ||
-	    (fs->action < ETHTOOL_RXNTUPLE_ACTION_DROP))
-		return -EINVAL;
-
-	memset(&input_struct, 0, sizeof(union ixgbe_atr_input));
-	memset(&input_masks, 0, sizeof(struct ixgbe_atr_input_masks));
-
-	/* record flow type */
-	switch (fs->flow_type) {
-	case IPV4_FLOW:
-		input_struct.formatted.flow_type = IXGBE_ATR_FLOW_TYPE_IPV4;
-		break;
-	case TCP_V4_FLOW:
-		input_struct.formatted.flow_type = IXGBE_ATR_FLOW_TYPE_TCPV4;
-		break;
-	case UDP_V4_FLOW:
-		input_struct.formatted.flow_type = IXGBE_ATR_FLOW_TYPE_UDPV4;
-		break;
-	case SCTP_V4_FLOW:
-		input_struct.formatted.flow_type = IXGBE_ATR_FLOW_TYPE_SCTPV4;
-		break;
-	default:
-		return -1;
-	}
-
-	/* copy vlan tag minus the CFI bit */
-	if ((fs->vlan_tag & 0xEFFF) || (~fs->vlan_tag_mask & 0xEFFF)) {
-		input_struct.formatted.vlan_id = htons(fs->vlan_tag & 0xEFFF);
-		if (!fs->vlan_tag_mask) {
-			input_masks.vlan_id_mask = htons(0xEFFF);
-		} else {
-			switch (~fs->vlan_tag_mask & 0xEFFF) {
-			/* all of these are valid vlan-mask values */
-			case 0xEFFF:
-			case 0xE000:
-			case 0x0FFF:
-			case 0x0000:
-				input_masks.vlan_id_mask =
-					htons(~fs->vlan_tag_mask);
-				break;
-			/* exit with error if vlan-mask is invalid */
-			default:
-				e_err(drv, "Partial VLAN ID or "
-				      "priority mask in vlan-mask is not "
-				      "supported by hardware\n");
-				return -1;
-			}
-		}
-	}
-
-	/* make sure we only use the first 2 bytes of user data */
-	if ((fs->data & 0xFFFF) || (~fs->data_mask & 0xFFFF)) {
-		input_struct.formatted.flex_bytes = htons(fs->data & 0xFFFF);
-		if (!(fs->data_mask & 0xFFFF)) {
-			input_masks.flex_mask = 0xFFFF;
-		} else if (~fs->data_mask & 0xFFFF) {
-			e_err(drv, "Partial user-def-mask is not "
-			      "supported by hardware\n");
-			return -1;
-		}
-	}
-
-	/*
-	 * Copy input into formatted structures
-	 *
-	 * These assignments are based on the following logic
-	 * If neither input or mask are set assume value is masked out.
-	 * If input is set, but mask is not mask should default to accept all.
-	 * If input is not set, but mask is set then mask likely results in 0.
-	 * If input is set and mask is set then assign both.
-	 */
-	if (fs->h_u.tcp_ip4_spec.ip4src || ~fs->m_u.tcp_ip4_spec.ip4src) {
-		input_struct.formatted.src_ip[0] = fs->h_u.tcp_ip4_spec.ip4src;
-		if (!fs->m_u.tcp_ip4_spec.ip4src)
-			input_masks.src_ip_mask[0] = 0xFFFFFFFF;
-		else
-			input_masks.src_ip_mask[0] =
-				~fs->m_u.tcp_ip4_spec.ip4src;
-	}
-	if (fs->h_u.tcp_ip4_spec.ip4dst || ~fs->m_u.tcp_ip4_spec.ip4dst) {
-		input_struct.formatted.dst_ip[0] = fs->h_u.tcp_ip4_spec.ip4dst;
-		if (!fs->m_u.tcp_ip4_spec.ip4dst)
-			input_masks.dst_ip_mask[0] = 0xFFFFFFFF;
-		else
-			input_masks.dst_ip_mask[0] =
-				~fs->m_u.tcp_ip4_spec.ip4dst;
-	}
-	if (fs->h_u.tcp_ip4_spec.psrc || ~fs->m_u.tcp_ip4_spec.psrc) {
-		input_struct.formatted.src_port = fs->h_u.tcp_ip4_spec.psrc;
-		if (!fs->m_u.tcp_ip4_spec.psrc)
-			input_masks.src_port_mask = 0xFFFF;
-		else
-			input_masks.src_port_mask = ~fs->m_u.tcp_ip4_spec.psrc;
-	}
-	if (fs->h_u.tcp_ip4_spec.pdst || ~fs->m_u.tcp_ip4_spec.pdst) {
-		input_struct.formatted.dst_port = fs->h_u.tcp_ip4_spec.pdst;
-		if (!fs->m_u.tcp_ip4_spec.pdst)
-			input_masks.dst_port_mask = 0xFFFF;
-		else
-			input_masks.dst_port_mask = ~fs->m_u.tcp_ip4_spec.pdst;
-	}
-
-	/* determine if we need to drop or route the packet */
-	if (fs->action == ETHTOOL_RXNTUPLE_ACTION_DROP)
-		target_queue = MAX_RX_QUEUES - 1;
-	else
-		target_queue = fs->action;
-
-	spin_lock(&adapter->fdir_perfect_lock);
-	err = ixgbe_fdir_add_perfect_filter_82599(&adapter->hw,
-						  &input_struct,
-						  &input_masks, 0,
-						  target_queue);
-	spin_unlock(&adapter->fdir_perfect_lock);
-
-	return err ? -1 : 0;
-}
-
 static const struct ethtool_ops ixgbe_ethtool_ops = {
 	.get_settings           = ixgbe_get_settings,
 	.set_settings           = ixgbe_set_settings,
@@ -2505,7 +2370,6 @@ static const struct ethtool_ops ixgbe_ethtool_ops = {
 	.set_coalesce           = ixgbe_set_coalesce,
 	.get_flags              = ethtool_op_get_flags,
 	.set_flags              = ixgbe_set_flags,
-	.set_rx_ntuple          = ixgbe_set_rx_ntuple,
 };
 
 void ixgbe_set_ethtool_ops(struct net_device *netdev)
-- 
1.7.5.2


^ permalink raw reply related

* [net-next 23/40] ixgbe: use per NUMA node lock for FCoE DDP
From: Jeff Kirsher @ 2011-06-07 12:32 UTC (permalink / raw)
  To: davem; +Cc: Vasu Dev, netdev, gospo, Jeff Kirsher
In-Reply-To: <1307449995-9458-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Vasu Dev <vasu.dev@intel.com>

Adds per NUMA node lock to have CPU pass thru its NUMA lock
first before contending for global DDP fcoe->lock to setup
DDP lock, this is to reduce contentions across NUMA nodes.

Allocates and initialize per NUMA node lock in added
ixgbe_fcoe_lock_init and then have current CPU's numa_node_id
based NUMA node lock acquired before taking global fcoe->lock.

The node lock is allocated from its NUMA node using kzalloc_node.

Signed-off-by: Vasu Dev <vasu.dev@intel.com>
Tested-by: Ross Brattain <ross.b.brattain@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ixgbe/ixgbe_fcoe.c |   50 ++++++++++++++++++++++++++++++++++++++-
 drivers/net/ixgbe/ixgbe_fcoe.h |    1 +
 2 files changed, 49 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_fcoe.c b/drivers/net/ixgbe/ixgbe_fcoe.c
index f5f39ed..aadff4f 100644
--- a/drivers/net/ixgbe/ixgbe_fcoe.c
+++ b/drivers/net/ixgbe/ixgbe_fcoe.c
@@ -109,6 +109,7 @@ int ixgbe_fcoe_ddp_put(struct net_device *netdev, u16 xid)
 	len = ddp->len;
 	/* if there an error, force to invalidate ddp context */
 	if (ddp->err) {
+		spin_lock(fcoe->node_lock[numa_node_id()]);
 		spin_lock_bh(&fcoe->lock);
 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCFLT, 0);
 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCFLTRW,
@@ -122,6 +123,7 @@ int ixgbe_fcoe_ddp_put(struct net_device *netdev, u16 xid)
 				(xid | IXGBE_FCDMARW_RE));
 		fcbuff = IXGBE_READ_REG(&adapter->hw, IXGBE_FCBUFF);
 		spin_unlock_bh(&fcoe->lock);
+		spin_unlock(fcoe->node_lock[numa_node_id()]);
 		if (fcbuff & IXGBE_FCBUFF_VALID)
 			udelay(100);
 	}
@@ -294,6 +296,7 @@ static int ixgbe_fcoe_ddp_setup(struct net_device *netdev, u16 xid,
 
 	/* program DMA context */
 	hw = &adapter->hw;
+	spin_lock(fcoe->node_lock[numa_node_id()]);
 	spin_lock_bh(&fcoe->lock);
 
 	/* turn on last frame indication for target mode as FCP_RSPtarget is
@@ -315,6 +318,7 @@ static int ixgbe_fcoe_ddp_setup(struct net_device *netdev, u16 xid,
 	IXGBE_WRITE_REG(hw, IXGBE_FCFLTRW, fcfltrw);
 
 	spin_unlock_bh(&fcoe->lock);
+	spin_unlock(fcoe->node_lock[numa_node_id()]);
 
 	return 1;
 
@@ -634,6 +638,42 @@ static void ixgbe_fcoe_ddp_pools_alloc(struct ixgbe_adapter *adapter)
 	}
 }
 
+static void ixgbe_fcoe_locks_free(struct ixgbe_fcoe *fcoe)
+{
+	int node;
+
+	if (!fcoe->node_lock)
+		return;
+
+	for_each_node_with_cpus(node)
+			kfree(fcoe->node_lock[node]);
+
+	kfree(fcoe->node_lock);
+	fcoe->node_lock = NULL;
+}
+
+static void ixgbe_fcoe_lock_init(struct ixgbe_fcoe *fcoe)
+{
+	int node;
+	spinlock_t *node_lock;
+
+	fcoe->node_lock = kzalloc(sizeof(node_lock) * num_possible_nodes(),
+				  GFP_KERNEL);
+	if (!fcoe->node_lock)
+		return;
+
+	for_each_node_with_cpus(node) {
+		node_lock = kzalloc_node(sizeof(*node_lock) , GFP_KERNEL, node);
+		if (!node_lock) {
+			ixgbe_fcoe_locks_free(fcoe);
+			return;
+		}
+		spin_lock_init(node_lock);
+		fcoe->node_lock[node] = node_lock;
+	}
+	spin_lock_init(&fcoe->lock);
+}
+
 /**
  * ixgbe_configure_fcoe - configures registers for fcoe at start
  * @adapter: ptr to ixgbe adapter
@@ -654,12 +694,16 @@ void ixgbe_configure_fcoe(struct ixgbe_adapter *adapter)
 #endif
 
 	if (!fcoe->pool) {
-		spin_lock_init(&fcoe->lock);
+		ixgbe_fcoe_lock_init(fcoe);
+		if (!fcoe->node_lock) {
+			e_err(drv, "failed to initialize fcoe locks\n");
+			return;
+		}
 
 		ixgbe_fcoe_ddp_pools_alloc(adapter);
 		if (!fcoe->pool) {
 			e_err(drv, "failed to alloc percpu fcoe DDP pools\n");
-			return;
+			goto out_locks_free;
 		}
 
 		/* Extra buffer to be shared by all DDPs for HW work around */
@@ -735,6 +779,8 @@ out_extra_ddp_buffer:
 	kfree(fcoe->extra_ddp_buffer);
 out_ddp_pools:
 	ixgbe_fcoe_ddp_pools_free(fcoe);
+out_locks_free:
+	ixgbe_fcoe_locks_free(fcoe);
 }
 
 /**
diff --git a/drivers/net/ixgbe/ixgbe_fcoe.h b/drivers/net/ixgbe/ixgbe_fcoe.h
index d876e7a..8618892 100644
--- a/drivers/net/ixgbe/ixgbe_fcoe.h
+++ b/drivers/net/ixgbe/ixgbe_fcoe.h
@@ -69,6 +69,7 @@ struct ixgbe_fcoe {
 	struct pci_pool **pool;
 	atomic_t refcnt;
 	spinlock_t lock;
+	struct spinlock **node_lock;
 	struct ixgbe_fcoe_ddp ddp[IXGBE_FCOE_DDP_MAX];
 	unsigned char *extra_ddp_buffer;
 	dma_addr_t extra_ddp_buffer_dma;
-- 
1.7.5.2


^ permalink raw reply related

* [net-next 28/40] ixgbe: add basic support for setting and getting nfc controls
From: Jeff Kirsher @ 2011-06-07 12:33 UTC (permalink / raw)
  To: davem; +Cc: Alexander Duyck, netdev, gospo, Jeff Kirsher
In-Reply-To: <1307449995-9458-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Alexander Duyck <alexander.h.duyck@intel.com>

This change adds basic support for the obtaining of RSS ring counts.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Ross Brattain <ross.b.brattain@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ixgbe/ixgbe_ethtool.c |   19 +++++++++++++++++++
 1 files changed, 19 insertions(+), 0 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_ethtool.c b/drivers/net/ixgbe/ixgbe_ethtool.c
index 0a9cd2b..477cbd3 100644
--- a/drivers/net/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ixgbe/ixgbe_ethtool.c
@@ -2335,6 +2335,24 @@ static int ixgbe_set_flags(struct net_device *netdev, u32 data)
 	return 0;
 }
 
+static int ixgbe_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
+			   void *rule_locs)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(dev);
+	int ret = -EOPNOTSUPP;
+
+	switch (cmd->cmd) {
+	case ETHTOOL_GRXRINGS:
+		cmd->data = adapter->num_rx_queues;
+		ret = 0;
+		break;
+	default:
+		break;
+	}
+
+	return ret;
+}
+
 static const struct ethtool_ops ixgbe_ethtool_ops = {
 	.get_settings           = ixgbe_get_settings,
 	.set_settings           = ixgbe_set_settings,
@@ -2370,6 +2388,7 @@ static const struct ethtool_ops ixgbe_ethtool_ops = {
 	.set_coalesce           = ixgbe_set_coalesce,
 	.get_flags              = ethtool_op_get_flags,
 	.set_flags              = ixgbe_set_flags,
+	.get_rxnfc		= ixgbe_get_rxnfc,
 };
 
 void ixgbe_set_ethtool_ops(struct net_device *netdev)
-- 
1.7.5.2


^ permalink raw reply related

* [net-next 27/40] ixgbe: update perfect filter framework to support retaining filters
From: Jeff Kirsher @ 2011-06-07 12:33 UTC (permalink / raw)
  To: davem; +Cc: Alexander Duyck, netdev, gospo, Jeff Kirsher
In-Reply-To: <1307449995-9458-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Alexander Duyck <alexander.h.duyck@intel.com>

This change is meant to update the internal framework of ixgbe so that
perfect filters can be stored and tracked via software.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Ross Brattain <ross.b.brattain@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ixgbe/ixgbe.h       |   18 +-
 drivers/net/ixgbe/ixgbe_82599.c |  603 ++++++++++++++++++++++-----------------
 drivers/net/ixgbe/ixgbe_main.c  |    2 +-
 drivers/net/ixgbe/ixgbe_type.h  |   25 +-
 4 files changed, 368 insertions(+), 280 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe.h b/drivers/net/ixgbe/ixgbe.h
index d5674fc..5ea5b4c 100644
--- a/drivers/net/ixgbe/ixgbe.h
+++ b/drivers/net/ixgbe/ixgbe.h
@@ -543,16 +543,22 @@ extern void ixgbe_alloc_rx_buffers(struct ixgbe_ring *, u16);
 extern void ixgbe_write_eitr(struct ixgbe_q_vector *);
 extern int ethtool_ioctl(struct ifreq *ifr);
 extern s32 ixgbe_reinit_fdir_tables_82599(struct ixgbe_hw *hw);
-extern s32 ixgbe_init_fdir_signature_82599(struct ixgbe_hw *hw, u32 pballoc);
-extern s32 ixgbe_init_fdir_perfect_82599(struct ixgbe_hw *hw, u32 pballoc);
+extern s32 ixgbe_init_fdir_signature_82599(struct ixgbe_hw *hw, u32 fdirctrl);
+extern s32 ixgbe_init_fdir_perfect_82599(struct ixgbe_hw *hw, u32 fdirctrl);
 extern s32 ixgbe_fdir_add_signature_filter_82599(struct ixgbe_hw *hw,
 						 union ixgbe_atr_hash_dword input,
 						 union ixgbe_atr_hash_dword common,
                                                  u8 queue);
-extern s32 ixgbe_fdir_add_perfect_filter_82599(struct ixgbe_hw *hw,
-                                      union ixgbe_atr_input *input,
-                                      struct ixgbe_atr_input_masks *input_masks,
-                                      u16 soft_id, u8 queue);
+extern s32 ixgbe_fdir_set_input_mask_82599(struct ixgbe_hw *hw,
+					   union ixgbe_atr_input *input_mask);
+extern s32 ixgbe_fdir_write_perfect_filter_82599(struct ixgbe_hw *hw,
+						 union ixgbe_atr_input *input,
+						 u16 soft_id, u8 queue);
+extern s32 ixgbe_fdir_erase_perfect_filter_82599(struct ixgbe_hw *hw,
+						 union ixgbe_atr_input *input,
+						 u16 soft_id);
+extern void ixgbe_atr_compute_perfect_hash_82599(union ixgbe_atr_input *input,
+						 union ixgbe_atr_input *mask);
 extern void ixgbe_configure_rscctl(struct ixgbe_adapter *adapter,
                                    struct ixgbe_ring *ring);
 extern void ixgbe_clear_rscctl(struct ixgbe_adapter *adapter,
diff --git a/drivers/net/ixgbe/ixgbe_82599.c b/drivers/net/ixgbe/ixgbe_82599.c
index 4a6826b..1880d9a 100644
--- a/drivers/net/ixgbe/ixgbe_82599.c
+++ b/drivers/net/ixgbe/ixgbe_82599.c
@@ -1107,115 +1107,87 @@ s32 ixgbe_reinit_fdir_tables_82599(struct ixgbe_hw *hw)
 }
 
 /**
- *  ixgbe_init_fdir_signature_82599 - Initialize Flow Director signature filters
+ *  ixgbe_set_fdir_rxpba_82599 - Initialize Flow Director Rx packet buffer
  *  @hw: pointer to hardware structure
  *  @pballoc: which mode to allocate filters with
  **/
-s32 ixgbe_init_fdir_signature_82599(struct ixgbe_hw *hw, u32 pballoc)
+static s32 ixgbe_set_fdir_rxpba_82599(struct ixgbe_hw *hw, u32 pballoc)
 {
-	u32 fdirctrl = 0;
+	u32 fdir_pbsize = hw->mac.rx_pb_size << IXGBE_RXPBSIZE_SHIFT;
+	u32 current_rxpbsize = 0;
 	int i;
 
-	/* Send interrupt when 64 filters are left */
-	fdirctrl |= 4 << IXGBE_FDIRCTRL_FULL_THRESH_SHIFT;
-
-	/* Set the maximum length per hash bucket to 0xA filters */
-	fdirctrl |= 0xA << IXGBE_FDIRCTRL_MAX_LENGTH_SHIFT;
-
+	/* reserve space for Flow Director filters */
 	switch (pballoc) {
-	case IXGBE_FDIR_PBALLOC_64K:
-		/* 8k - 1 signature filters */
-		fdirctrl |= IXGBE_FDIRCTRL_PBALLOC_64K;
+	case IXGBE_FDIR_PBALLOC_256K:
+		fdir_pbsize -= 256 << IXGBE_RXPBSIZE_SHIFT;
 		break;
 	case IXGBE_FDIR_PBALLOC_128K:
-		/* 16k - 1 signature filters */
-		fdirctrl |= IXGBE_FDIRCTRL_PBALLOC_128K;
+		fdir_pbsize -= 128 << IXGBE_RXPBSIZE_SHIFT;
 		break;
-	case IXGBE_FDIR_PBALLOC_256K:
-		/* 32k - 1 signature filters */
-		fdirctrl |= IXGBE_FDIRCTRL_PBALLOC_256K;
+	case IXGBE_FDIR_PBALLOC_64K:
+		fdir_pbsize -= 64 << IXGBE_RXPBSIZE_SHIFT;
 		break;
+	case IXGBE_FDIR_PBALLOC_NONE:
 	default:
-		/* bad value */
-		return IXGBE_ERR_CONFIG;
+		return IXGBE_ERR_PARAM;
 	}
 
-	/* Move the flexible bytes to use the ethertype - shift 6 words */
-	fdirctrl |= (0x6 << IXGBE_FDIRCTRL_FLEX_SHIFT);
+	/* determine current RX packet buffer size */
+	for (i = 0; i < 8; i++)
+		current_rxpbsize += IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
 
+	/* if there is already room for the filters do nothing */
+	if (current_rxpbsize <= fdir_pbsize)
+		return 0;
 
-	/* Prime the keys for hashing */
-	IXGBE_WRITE_REG(hw, IXGBE_FDIRHKEY, IXGBE_ATR_BUCKET_HASH_KEY);
-	IXGBE_WRITE_REG(hw, IXGBE_FDIRSKEY, IXGBE_ATR_SIGNATURE_HASH_KEY);
-
-	/*
-	 * Poll init-done after we write the register.  Estimated times:
-	 *      10G: PBALLOC = 11b, timing is 60us
-	 *       1G: PBALLOC = 11b, timing is 600us
-	 *     100M: PBALLOC = 11b, timing is 6ms
-	 *
-	 *     Multiple these timings by 4 if under full Rx load
-	 *
-	 * So we'll poll for IXGBE_FDIR_INIT_DONE_POLL times, sleeping for
-	 * 1 msec per poll time.  If we're at line rate and drop to 100M, then
-	 * this might not finish in our poll time, but we can live with that
-	 * for now.
-	 */
-	IXGBE_WRITE_REG(hw, IXGBE_FDIRCTRL, fdirctrl);
-	IXGBE_WRITE_FLUSH(hw);
-	for (i = 0; i < IXGBE_FDIR_INIT_DONE_POLL; i++) {
-		if (IXGBE_READ_REG(hw, IXGBE_FDIRCTRL) &
-		                   IXGBE_FDIRCTRL_INIT_DONE)
-			break;
-		usleep_range(1000, 2000);
+	if (current_rxpbsize > hw->mac.rx_pb_size) {
+		/*
+		 * if rxpbsize is greater than max then HW max the Rx buffer
+		 * sizes are unconfigured or misconfigured since HW default is
+		 * to give the full buffer to each traffic class resulting in
+		 * the total size being buffer size 8x actual size
+		 *
+		 * This assumes no DCB since the RXPBSIZE registers appear to
+		 * be unconfigured.
+		 */
+		IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(0), fdir_pbsize);
+		for (i = 1; i < 8; i++)
+			IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), 0);
+	} else {
+		/*
+		 * Since the Rx packet buffer appears to have already been
+		 * configured we need to shrink each packet buffer by enough
+		 * to make room for the filters.  As such we take each rxpbsize
+		 * value and multiply it by a fraction representing the size
+		 * needed over the size we currently have.
+		 *
+		 * We need to reduce fdir_pbsize and current_rxpbsize to
+		 * 1/1024 of their original values in order to avoid
+		 * overflowing the u32 being used to store rxpbsize.
+		 */
+		fdir_pbsize >>= IXGBE_RXPBSIZE_SHIFT;
+		current_rxpbsize >>= IXGBE_RXPBSIZE_SHIFT;
+		for (i = 0; i < 8; i++) {
+			u32 rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
+			rxpbsize *= fdir_pbsize;
+			rxpbsize /= current_rxpbsize;
+			IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
+		}
 	}
-	if (i >= IXGBE_FDIR_INIT_DONE_POLL)
-		hw_dbg(hw, "Flow Director Signature poll time exceeded!\n");
 
 	return 0;
 }
 
 /**
- *  ixgbe_init_fdir_perfect_82599 - Initialize Flow Director perfect filters
+ *  ixgbe_fdir_enable_82599 - Initialize Flow Director perfect filters
  *  @hw: pointer to hardware structure
- *  @pballoc: which mode to allocate filters with
+ *  @fdirctrl: value to write to flow director control register
  **/
-s32 ixgbe_init_fdir_perfect_82599(struct ixgbe_hw *hw, u32 pballoc)
+static void ixgbe_fdir_enable_82599(struct ixgbe_hw *hw, u32 fdirctrl)
 {
-	u32 fdirctrl = 0;
 	int i;
 
-	/* Send interrupt when 64 filters are left */
-	fdirctrl |= 4 << IXGBE_FDIRCTRL_FULL_THRESH_SHIFT;
-
-	/* Initialize the drop queue to Rx queue 127 */
-	fdirctrl |= (127 << IXGBE_FDIRCTRL_DROP_Q_SHIFT);
-
-	switch (pballoc) {
-	case IXGBE_FDIR_PBALLOC_64K:
-		/* 2k - 1 perfect filters */
-		fdirctrl |= IXGBE_FDIRCTRL_PBALLOC_64K;
-		break;
-	case IXGBE_FDIR_PBALLOC_128K:
-		/* 4k - 1 perfect filters */
-		fdirctrl |= IXGBE_FDIRCTRL_PBALLOC_128K;
-		break;
-	case IXGBE_FDIR_PBALLOC_256K:
-		/* 8k - 1 perfect filters */
-		fdirctrl |= IXGBE_FDIRCTRL_PBALLOC_256K;
-		break;
-	default:
-		/* bad value */
-		return IXGBE_ERR_CONFIG;
-	}
-
-	/* Turn perfect match filtering on */
-	fdirctrl |= IXGBE_FDIRCTRL_PERFECT_MATCH;
-	fdirctrl |= IXGBE_FDIRCTRL_REPORT_STATUS;
-
-	/* Move the flexible bytes to use the ethertype - shift 6 words */
-	fdirctrl |= (0x6 << IXGBE_FDIRCTRL_FLEX_SHIFT);
-
 	/* Prime the keys for hashing */
 	IXGBE_WRITE_REG(hw, IXGBE_FDIRHKEY, IXGBE_ATR_BUCKET_HASH_KEY);
 	IXGBE_WRITE_REG(hw, IXGBE_FDIRSKEY, IXGBE_ATR_SIGNATURE_HASH_KEY);
@@ -1233,10 +1205,6 @@ s32 ixgbe_init_fdir_perfect_82599(struct ixgbe_hw *hw, u32 pballoc)
 	 * this might not finish in our poll time, but we can live with that
 	 * for now.
 	 */
-
-	/* Set the maximum length per hash bucket to 0xA filters */
-	fdirctrl |= (0xA << IXGBE_FDIRCTRL_MAX_LENGTH_SHIFT);
-
 	IXGBE_WRITE_REG(hw, IXGBE_FDIRCTRL, fdirctrl);
 	IXGBE_WRITE_FLUSH(hw);
 	for (i = 0; i < IXGBE_FDIR_INIT_DONE_POLL; i++) {
@@ -1245,101 +1213,77 @@ s32 ixgbe_init_fdir_perfect_82599(struct ixgbe_hw *hw, u32 pballoc)
 			break;
 		usleep_range(1000, 2000);
 	}
-	if (i >= IXGBE_FDIR_INIT_DONE_POLL)
-		hw_dbg(hw, "Flow Director Perfect poll time exceeded!\n");
 
-	return 0;
+	if (i >= IXGBE_FDIR_INIT_DONE_POLL)
+		hw_dbg(hw, "Flow Director poll time exceeded!\n");
 }
 
-
 /**
- *  ixgbe_atr_compute_hash_82599 - Compute the hashes for SW ATR
- *  @stream: input bitstream to compute the hash on
- *  @key: 32-bit hash key
+ *  ixgbe_init_fdir_signature_82599 - Initialize Flow Director signature filters
+ *  @hw: pointer to hardware structure
+ *  @fdirctrl: value to write to flow director control register, initially
+ *             contains just the value of the Rx packet buffer allocation
  **/
-static u32 ixgbe_atr_compute_hash_82599(union ixgbe_atr_input *atr_input,
-					u32 key)
+s32 ixgbe_init_fdir_signature_82599(struct ixgbe_hw *hw, u32 fdirctrl)
 {
-	/*
-	 * The algorithm is as follows:
-	 *    Hash[15:0] = Sum { S[n] x K[n+16] }, n = 0...350
-	 *    where Sum {A[n]}, n = 0...n is bitwise XOR of A[0], A[1]...A[n]
-	 *    and A[n] x B[n] is bitwise AND between same length strings
-	 *
-	 *    K[n] is 16 bits, defined as:
-	 *       for n modulo 32 >= 15, K[n] = K[n % 32 : (n % 32) - 15]
-	 *       for n modulo 32 < 15, K[n] =
-	 *             K[(n % 32:0) | (31:31 - (14 - (n % 32)))]
-	 *
-	 *    S[n] is 16 bits, defined as:
-	 *       for n >= 15, S[n] = S[n:n - 15]
-	 *       for n < 15, S[n] = S[(n:0) | (350:350 - (14 - n))]
-	 *
-	 *    To simplify for programming, the algorithm is implemented
-	 *    in software this way:
-	 *
-	 *    key[31:0], hi_hash_dword[31:0], lo_hash_dword[31:0], hash[15:0]
-	 *
-	 *    for (i = 0; i < 352; i+=32)
-	 *        hi_hash_dword[31:0] ^= Stream[(i+31):i];
-	 *
-	 *    lo_hash_dword[15:0]  ^= Stream[15:0];
-	 *    lo_hash_dword[15:0]  ^= hi_hash_dword[31:16];
-	 *    lo_hash_dword[31:16] ^= hi_hash_dword[15:0];
-	 *
-	 *    hi_hash_dword[31:0]  ^= Stream[351:320];
-	 *
-	 *    if(key[0])
-	 *        hash[15:0] ^= Stream[15:0];
-	 *
-	 *    for (i = 0; i < 16; i++) {
-	 *        if (key[i])
-	 *            hash[15:0] ^= lo_hash_dword[(i+15):i];
-	 *        if (key[i + 16])
-	 *            hash[15:0] ^= hi_hash_dword[(i+15):i];
-	 *    }
-	 *
-	 */
-	__be32 common_hash_dword = 0;
-	u32 hi_hash_dword, lo_hash_dword, flow_vm_vlan;
-	u32 hash_result = 0;
-	u8 i;
+	s32 err;
 
-	/* record the flow_vm_vlan bits as they are a key part to the hash */
-	flow_vm_vlan = ntohl(atr_input->dword_stream[0]);
+	/* Before enabling Flow Director, verify the Rx Packet Buffer size */
+	err = ixgbe_set_fdir_rxpba_82599(hw, fdirctrl);
+	if (err)
+		return err;
 
-	/* generate common hash dword */
-	for (i = 10; i; i -= 2)
-		common_hash_dword ^= atr_input->dword_stream[i] ^
-				     atr_input->dword_stream[i - 1];
+	/*
+	 * Continue setup of fdirctrl register bits:
+	 *  Move the flexible bytes to use the ethertype - shift 6 words
+	 *  Set the maximum length per hash bucket to 0xA filters
+	 *  Send interrupt when 64 filters are left
+	 */
+	fdirctrl |= (0x6 << IXGBE_FDIRCTRL_FLEX_SHIFT) |
+		    (0xA << IXGBE_FDIRCTRL_MAX_LENGTH_SHIFT) |
+		    (4 << IXGBE_FDIRCTRL_FULL_THRESH_SHIFT);
 
-	hi_hash_dword = ntohl(common_hash_dword);
+	/* write hashes and fdirctrl register, poll for completion */
+	ixgbe_fdir_enable_82599(hw, fdirctrl);
 
-	/* low dword is word swapped version of common */
-	lo_hash_dword = (hi_hash_dword >> 16) | (hi_hash_dword << 16);
+	return 0;
+}
 
-	/* apply flow ID/VM pool/VLAN ID bits to hash words */
-	hi_hash_dword ^= flow_vm_vlan ^ (flow_vm_vlan >> 16);
+/**
+ *  ixgbe_init_fdir_perfect_82599 - Initialize Flow Director perfect filters
+ *  @hw: pointer to hardware structure
+ *  @fdirctrl: value to write to flow director control register, initially
+ *             contains just the value of the Rx packet buffer allocation
+ **/
+s32 ixgbe_init_fdir_perfect_82599(struct ixgbe_hw *hw, u32 fdirctrl)
+{
+	s32 err;
 
-	/* Process bits 0 and 16 */
-	if (key & 0x0001) hash_result ^= lo_hash_dword;
-	if (key & 0x00010000) hash_result ^= hi_hash_dword;
+	/* Before enabling Flow Director, verify the Rx Packet Buffer size */
+	err = ixgbe_set_fdir_rxpba_82599(hw, fdirctrl);
+	if (err)
+		return err;
 
 	/*
-	 * apply flow ID/VM pool/VLAN ID bits to lo hash dword, we had to
-	 * delay this because bit 0 of the stream should not be processed
-	 * so we do not add the vlan until after bit 0 was processed
+	 * Continue setup of fdirctrl register bits:
+	 *  Turn perfect match filtering on
+	 *  Report hash in RSS field of Rx wb descriptor
+	 *  Initialize the drop queue
+	 *  Move the flexible bytes to use the ethertype - shift 6 words
+	 *  Set the maximum length per hash bucket to 0xA filters
+	 *  Send interrupt when 64 (0x4 * 16) filters are left
 	 */
-	lo_hash_dword ^= flow_vm_vlan ^ (flow_vm_vlan << 16);
+	fdirctrl |= IXGBE_FDIRCTRL_PERFECT_MATCH |
+		    IXGBE_FDIRCTRL_REPORT_STATUS |
+		    (IXGBE_FDIR_DROP_QUEUE << IXGBE_FDIRCTRL_DROP_Q_SHIFT) |
+		    (0x6 << IXGBE_FDIRCTRL_FLEX_SHIFT) |
+		    (0xA << IXGBE_FDIRCTRL_MAX_LENGTH_SHIFT) |
+		    (4 << IXGBE_FDIRCTRL_FULL_THRESH_SHIFT);
 
+	/* write hashes and fdirctrl register, poll for completion */
+	ixgbe_fdir_enable_82599(hw, fdirctrl);
 
-	/* process the remaining 30 bits in the key 2 bits at a time */
-	for (i = 15; i; i-- ) {
-		if (key & (0x0001 << i)) hash_result ^= lo_hash_dword >> i;
-		if (key & (0x00010000 << i)) hash_result ^= hi_hash_dword >> i;
-	}
-
-	return hash_result & IXGBE_ATR_HASH_MASK;
+	return 0;
 }
 
 /*
@@ -1476,7 +1420,6 @@ s32 ixgbe_fdir_add_signature_filter_82599(struct ixgbe_hw *hw,
 	 */
 	fdirhashcmd = (u64)fdircmd << 32;
 	fdirhashcmd |= ixgbe_atr_compute_sig_hash_82599(input, common);
-
 	IXGBE_WRITE_REG64(hw, IXGBE_FDIRHASH, fdirhashcmd);
 
 	hw_dbg(hw, "Tx Queue=%x hash=%x\n", queue, (u32)fdirhashcmd);
@@ -1484,6 +1427,101 @@ s32 ixgbe_fdir_add_signature_filter_82599(struct ixgbe_hw *hw,
 	return 0;
 }
 
+#define IXGBE_COMPUTE_BKT_HASH_ITERATION(_n) \
+do { \
+	u32 n = (_n); \
+	if (IXGBE_ATR_BUCKET_HASH_KEY & (0x01 << n)) \
+		bucket_hash ^= lo_hash_dword >> n; \
+	if (IXGBE_ATR_BUCKET_HASH_KEY & (0x01 << (n + 16))) \
+		bucket_hash ^= hi_hash_dword >> n; \
+} while (0);
+
+/**
+ *  ixgbe_atr_compute_perfect_hash_82599 - Compute the perfect filter hash
+ *  @atr_input: input bitstream to compute the hash on
+ *  @input_mask: mask for the input bitstream
+ *
+ *  This function serves two main purposes.  First it applys the input_mask
+ *  to the atr_input resulting in a cleaned up atr_input data stream.
+ *  Secondly it computes the hash and stores it in the bkt_hash field at
+ *  the end of the input byte stream.  This way it will be available for
+ *  future use without needing to recompute the hash.
+ **/
+void ixgbe_atr_compute_perfect_hash_82599(union ixgbe_atr_input *input,
+					  union ixgbe_atr_input *input_mask)
+{
+
+	u32 hi_hash_dword, lo_hash_dword, flow_vm_vlan;
+	u32 bucket_hash = 0;
+
+	/* Apply masks to input data */
+	input->dword_stream[0]  &= input_mask->dword_stream[0];
+	input->dword_stream[1]  &= input_mask->dword_stream[1];
+	input->dword_stream[2]  &= input_mask->dword_stream[2];
+	input->dword_stream[3]  &= input_mask->dword_stream[3];
+	input->dword_stream[4]  &= input_mask->dword_stream[4];
+	input->dword_stream[5]  &= input_mask->dword_stream[5];
+	input->dword_stream[6]  &= input_mask->dword_stream[6];
+	input->dword_stream[7]  &= input_mask->dword_stream[7];
+	input->dword_stream[8]  &= input_mask->dword_stream[8];
+	input->dword_stream[9]  &= input_mask->dword_stream[9];
+	input->dword_stream[10] &= input_mask->dword_stream[10];
+
+	/* record the flow_vm_vlan bits as they are a key part to the hash */
+	flow_vm_vlan = ntohl(input->dword_stream[0]);
+
+	/* generate common hash dword */
+	hi_hash_dword = ntohl(input->dword_stream[1] ^
+				    input->dword_stream[2] ^
+				    input->dword_stream[3] ^
+				    input->dword_stream[4] ^
+				    input->dword_stream[5] ^
+				    input->dword_stream[6] ^
+				    input->dword_stream[7] ^
+				    input->dword_stream[8] ^
+				    input->dword_stream[9] ^
+				    input->dword_stream[10]);
+
+	/* low dword is word swapped version of common */
+	lo_hash_dword = (hi_hash_dword >> 16) | (hi_hash_dword << 16);
+
+	/* apply flow ID/VM pool/VLAN ID bits to hash words */
+	hi_hash_dword ^= flow_vm_vlan ^ (flow_vm_vlan >> 16);
+
+	/* Process bits 0 and 16 */
+	IXGBE_COMPUTE_BKT_HASH_ITERATION(0);
+
+	/*
+	 * apply flow ID/VM pool/VLAN ID bits to lo hash dword, we had to
+	 * delay this because bit 0 of the stream should not be processed
+	 * so we do not add the vlan until after bit 0 was processed
+	 */
+	lo_hash_dword ^= flow_vm_vlan ^ (flow_vm_vlan << 16);
+
+	/* Process remaining 30 bit of the key */
+	IXGBE_COMPUTE_BKT_HASH_ITERATION(1);
+	IXGBE_COMPUTE_BKT_HASH_ITERATION(2);
+	IXGBE_COMPUTE_BKT_HASH_ITERATION(3);
+	IXGBE_COMPUTE_BKT_HASH_ITERATION(4);
+	IXGBE_COMPUTE_BKT_HASH_ITERATION(5);
+	IXGBE_COMPUTE_BKT_HASH_ITERATION(6);
+	IXGBE_COMPUTE_BKT_HASH_ITERATION(7);
+	IXGBE_COMPUTE_BKT_HASH_ITERATION(8);
+	IXGBE_COMPUTE_BKT_HASH_ITERATION(9);
+	IXGBE_COMPUTE_BKT_HASH_ITERATION(10);
+	IXGBE_COMPUTE_BKT_HASH_ITERATION(11);
+	IXGBE_COMPUTE_BKT_HASH_ITERATION(12);
+	IXGBE_COMPUTE_BKT_HASH_ITERATION(13);
+	IXGBE_COMPUTE_BKT_HASH_ITERATION(14);
+	IXGBE_COMPUTE_BKT_HASH_ITERATION(15);
+
+	/*
+	 * Limit hash to 13 bits since max bucket count is 8K.
+	 * Store result at the end of the input stream.
+	 */
+	input->formatted.bkt_hash = bucket_hash & 0x1FFF;
+}
+
 /**
  *  ixgbe_get_fdirtcpm_82599 - generate a tcp port from atr_input_masks
  *  @input_mask: mask to be bit swapped
@@ -1493,11 +1531,11 @@ s32 ixgbe_fdir_add_signature_filter_82599(struct ixgbe_hw *hw,
  *  generate a correctly swapped value we need to bit swap the mask and that
  *  is what is accomplished by this function.
  **/
-static u32 ixgbe_get_fdirtcpm_82599(struct ixgbe_atr_input_masks *input_masks)
+static u32 ixgbe_get_fdirtcpm_82599(union ixgbe_atr_input *input_mask)
 {
-	u32 mask = ntohs(input_masks->dst_port_mask);
+	u32 mask = ntohs(input_mask->formatted.dst_port);
 	mask <<= IXGBE_FDIRTCPM_DPORTM_SHIFT;
-	mask |= ntohs(input_masks->src_port_mask);
+	mask |= ntohs(input_mask->formatted.src_port);
 	mask = ((mask & 0x55555555) << 1) | ((mask & 0xAAAAAAAA) >> 1);
 	mask = ((mask & 0x33333333) << 2) | ((mask & 0xCCCCCCCC) >> 2);
 	mask = ((mask & 0x0F0F0F0F) << 4) | ((mask & 0xF0F0F0F0) >> 4);
@@ -1519,52 +1557,14 @@ static u32 ixgbe_get_fdirtcpm_82599(struct ixgbe_atr_input_masks *input_masks)
 	IXGBE_WRITE_REG((a), (reg), IXGBE_STORE_AS_BE32(ntohl(value)))
 
 #define IXGBE_STORE_AS_BE16(_value) \
-	(((u16)(_value) >> 8) | ((u16)(_value) << 8))
+	ntohs(((u16)(_value) >> 8) | ((u16)(_value) << 8))
 
-/**
- *  ixgbe_fdir_add_perfect_filter_82599 - Adds a perfect filter
- *  @hw: pointer to hardware structure
- *  @input: input bitstream
- *  @input_masks: bitwise masks for relevant fields
- *  @soft_id: software index into the silicon hash tables for filter storage
- *  @queue: queue index to direct traffic to
- *
- *  Note that the caller to this function must lock before calling, since the
- *  hardware writes must be protected from one another.
- **/
-s32 ixgbe_fdir_add_perfect_filter_82599(struct ixgbe_hw *hw,
-                                      union ixgbe_atr_input *input,
-                                      struct ixgbe_atr_input_masks *input_masks,
-                                      u16 soft_id, u8 queue)
+s32 ixgbe_fdir_set_input_mask_82599(struct ixgbe_hw *hw,
+				    union ixgbe_atr_input *input_mask)
 {
-	u32 fdirhash;
-	u32 fdircmd;
-	u32 fdirport, fdirtcpm;
-	u32 fdirvlan;
-	/* start with VLAN, flex bytes, VM pool, and IPv6 destination masked */
-	u32 fdirm = IXGBE_FDIRM_VLANID | IXGBE_FDIRM_VLANP | IXGBE_FDIRM_FLEX |
-		    IXGBE_FDIRM_POOL | IXGBE_FDIRM_DIPv6;
-
-	/*
-	 * Check flow_type formatting, and bail out before we touch the hardware
-	 * if there's a configuration issue
-	 */
-	switch (input->formatted.flow_type) {
-	case IXGBE_ATR_FLOW_TYPE_IPV4:
-		/* use the L4 protocol mask for raw IPv4/IPv6 traffic */
-		fdirm |= IXGBE_FDIRM_L4P;
-	case IXGBE_ATR_FLOW_TYPE_SCTPV4:
-		if (input_masks->dst_port_mask || input_masks->src_port_mask) {
-			hw_dbg(hw, " Error on src/dst port mask\n");
-			return IXGBE_ERR_CONFIG;
-		}
-	case IXGBE_ATR_FLOW_TYPE_TCPV4:
-	case IXGBE_ATR_FLOW_TYPE_UDPV4:
-		break;
-	default:
-		hw_dbg(hw, " Error on flow type input\n");
-		return IXGBE_ERR_CONFIG;
-	}
+	/* mask IPv6 since it is currently not supported */
+	u32 fdirm = IXGBE_FDIRM_DIPv6;
+	u32 fdirtcpm;
 
 	/*
 	 * Program the relevant mask registers.  If src/dst_port or src/dst_addr
@@ -1576,41 +1576,71 @@ s32 ixgbe_fdir_add_perfect_filter_82599(struct ixgbe_hw *hw,
 	 * point in time.
 	 */
 
-	/* Program FDIRM */
-	switch (ntohs(input_masks->vlan_id_mask) & 0xEFFF) {
-	case 0xEFFF:
-		/* Unmask VLAN ID - bit 0 and fall through to unmask prio */
-		fdirm &= ~IXGBE_FDIRM_VLANID;
-	case 0xE000:
-		/* Unmask VLAN prio - bit 1 */
-		fdirm &= ~IXGBE_FDIRM_VLANP;
+	/* verify bucket hash is cleared on hash generation */
+	if (input_mask->formatted.bkt_hash)
+		hw_dbg(hw, " bucket hash should always be 0 in mask\n");
+
+	/* Program FDIRM and verify partial masks */
+	switch (input_mask->formatted.vm_pool & 0x7F) {
+	case 0x0:
+		fdirm |= IXGBE_FDIRM_POOL;
+	case 0x7F:
 		break;
-	case 0x0FFF:
-		/* Unmask VLAN ID - bit 0 */
-		fdirm &= ~IXGBE_FDIRM_VLANID;
+	default:
+		hw_dbg(hw, " Error on vm pool mask\n");
+		return IXGBE_ERR_CONFIG;
+	}
+
+	switch (input_mask->formatted.flow_type & IXGBE_ATR_L4TYPE_MASK) {
+	case 0x0:
+		fdirm |= IXGBE_FDIRM_L4P;
+		if (input_mask->formatted.dst_port ||
+		    input_mask->formatted.src_port) {
+			hw_dbg(hw, " Error on src/dst port mask\n");
+			return IXGBE_ERR_CONFIG;
+		}
+	case IXGBE_ATR_L4TYPE_MASK:
 		break;
+	default:
+		hw_dbg(hw, " Error on flow type mask\n");
+		return IXGBE_ERR_CONFIG;
+	}
+
+	switch (ntohs(input_mask->formatted.vlan_id) & 0xEFFF) {
 	case 0x0000:
-		/* do nothing, vlans already masked */
+		/* mask VLAN ID, fall through to mask VLAN priority */
+		fdirm |= IXGBE_FDIRM_VLANID;
+	case 0x0FFF:
+		/* mask VLAN priority */
+		fdirm |= IXGBE_FDIRM_VLANP;
+		break;
+	case 0xE000:
+		/* mask VLAN ID only, fall through */
+		fdirm |= IXGBE_FDIRM_VLANID;
+	case 0xEFFF:
+		/* no VLAN fields masked */
 		break;
 	default:
 		hw_dbg(hw, " Error on VLAN mask\n");
 		return IXGBE_ERR_CONFIG;
 	}
 
-	if (input_masks->flex_mask & 0xFFFF) {
-		if ((input_masks->flex_mask & 0xFFFF) != 0xFFFF) {
-			hw_dbg(hw, " Error on flexible byte mask\n");
-			return IXGBE_ERR_CONFIG;
-		}
-		/* Unmask Flex Bytes - bit 4 */
-		fdirm &= ~IXGBE_FDIRM_FLEX;
+	switch (input_mask->formatted.flex_bytes & 0xFFFF) {
+	case 0x0000:
+		/* Mask Flex Bytes, fall through */
+		fdirm |= IXGBE_FDIRM_FLEX;
+	case 0xFFFF:
+		break;
+	default:
+		hw_dbg(hw, " Error on flexible byte mask\n");
+		return IXGBE_ERR_CONFIG;
 	}
 
 	/* Now mask VM pool and destination IPv6 - bits 5 and 2 */
 	IXGBE_WRITE_REG(hw, IXGBE_FDIRM, fdirm);
 
 	/* store the TCP/UDP port masks, bit reversed from port layout */
-	fdirtcpm = ixgbe_get_fdirtcpm_82599(input_masks);
+	fdirtcpm = ixgbe_get_fdirtcpm_82599(input_mask);
 
 	/* write both the same so that UDP and TCP use the same mask */
 	IXGBE_WRITE_REG(hw, IXGBE_FDIRTCPM, ~fdirtcpm);
@@ -1618,24 +1648,32 @@ s32 ixgbe_fdir_add_perfect_filter_82599(struct ixgbe_hw *hw,
 
 	/* store source and destination IP masks (big-enian) */
 	IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRSIP4M,
-			     ~input_masks->src_ip_mask[0]);
+			     ~input_mask->formatted.src_ip[0]);
 	IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRDIP4M,
-			     ~input_masks->dst_ip_mask[0]);
+			     ~input_mask->formatted.dst_ip[0]);
 
-	/* Apply masks to input data */
-	input->formatted.vlan_id &= input_masks->vlan_id_mask;
-	input->formatted.flex_bytes &= input_masks->flex_mask;
-	input->formatted.src_port &= input_masks->src_port_mask;
-	input->formatted.dst_port &= input_masks->dst_port_mask;
-	input->formatted.src_ip[0] &= input_masks->src_ip_mask[0];
-	input->formatted.dst_ip[0] &= input_masks->dst_ip_mask[0];
+	return 0;
+}
 
-	/* record vlan (little-endian) and flex_bytes(big-endian) */
-	fdirvlan =
-		IXGBE_STORE_AS_BE16(ntohs(input->formatted.flex_bytes));
-	fdirvlan <<= IXGBE_FDIRVLAN_FLEX_SHIFT;
-	fdirvlan |= ntohs(input->formatted.vlan_id);
-	IXGBE_WRITE_REG(hw, IXGBE_FDIRVLAN, fdirvlan);
+s32 ixgbe_fdir_write_perfect_filter_82599(struct ixgbe_hw *hw,
+					  union ixgbe_atr_input *input,
+					  u16 soft_id, u8 queue)
+{
+	u32 fdirport, fdirvlan, fdirhash, fdircmd;
+
+	/* currently IPv6 is not supported, must be programmed with 0 */
+	IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRSIPv6(0),
+			     input->formatted.src_ip[0]);
+	IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRSIPv6(1),
+			     input->formatted.src_ip[1]);
+	IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRSIPv6(2),
+			     input->formatted.src_ip[2]);
+
+	/* record the source address (big-endian) */
+	IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRIPSA, input->formatted.src_ip[0]);
+
+	/* record the first 32 bits of the destination address (big-endian) */
+	IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRIPDA, input->formatted.dst_ip[0]);
 
 	/* record source and destination port (little-endian)*/
 	fdirport = ntohs(input->formatted.dst_port);
@@ -1643,29 +1681,80 @@ s32 ixgbe_fdir_add_perfect_filter_82599(struct ixgbe_hw *hw,
 	fdirport |= ntohs(input->formatted.src_port);
 	IXGBE_WRITE_REG(hw, IXGBE_FDIRPORT, fdirport);
 
-	/* record the first 32 bits of the destination address (big-endian) */
-	IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRIPDA, input->formatted.dst_ip[0]);
+	/* record vlan (little-endian) and flex_bytes(big-endian) */
+	fdirvlan = IXGBE_STORE_AS_BE16(input->formatted.flex_bytes);
+	fdirvlan <<= IXGBE_FDIRVLAN_FLEX_SHIFT;
+	fdirvlan |= ntohs(input->formatted.vlan_id);
+	IXGBE_WRITE_REG(hw, IXGBE_FDIRVLAN, fdirvlan);
 
-	/* record the source address (big-endian) */
-	IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRIPSA, input->formatted.src_ip[0]);
+	/* configure FDIRHASH register */
+	fdirhash = input->formatted.bkt_hash;
+	fdirhash |= soft_id << IXGBE_FDIRHASH_SIG_SW_INDEX_SHIFT;
+	IXGBE_WRITE_REG(hw, IXGBE_FDIRHASH, fdirhash);
+
+	/*
+	 * flush all previous writes to make certain registers are
+	 * programmed prior to issuing the command
+	 */
+	IXGBE_WRITE_FLUSH(hw);
 
 	/* configure FDIRCMD register */
 	fdircmd = IXGBE_FDIRCMD_CMD_ADD_FLOW | IXGBE_FDIRCMD_FILTER_UPDATE |
 		  IXGBE_FDIRCMD_LAST | IXGBE_FDIRCMD_QUEUE_EN;
+	if (queue == IXGBE_FDIR_DROP_QUEUE)
+		fdircmd |= IXGBE_FDIRCMD_DROP;
 	fdircmd |= input->formatted.flow_type << IXGBE_FDIRCMD_FLOW_TYPE_SHIFT;
 	fdircmd |= (u32)queue << IXGBE_FDIRCMD_RX_QUEUE_SHIFT;
+	fdircmd |= (u32)input->formatted.vm_pool << IXGBE_FDIRCMD_VT_POOL_SHIFT;
 
-	/* we only want the bucket hash so drop the upper 16 bits */
-	fdirhash = ixgbe_atr_compute_hash_82599(input,
-						IXGBE_ATR_BUCKET_HASH_KEY);
-	fdirhash |= soft_id << IXGBE_FDIRHASH_SIG_SW_INDEX_SHIFT;
-
-	IXGBE_WRITE_REG(hw, IXGBE_FDIRHASH, fdirhash);
 	IXGBE_WRITE_REG(hw, IXGBE_FDIRCMD, fdircmd);
 
 	return 0;
 }
 
+s32 ixgbe_fdir_erase_perfect_filter_82599(struct ixgbe_hw *hw,
+					  union ixgbe_atr_input *input,
+					  u16 soft_id)
+{
+	u32 fdirhash;
+	u32 fdircmd = 0;
+	u32 retry_count;
+	s32 err = 0;
+
+	/* configure FDIRHASH register */
+	fdirhash = input->formatted.bkt_hash;
+	fdirhash |= soft_id << IXGBE_FDIRHASH_SIG_SW_INDEX_SHIFT;
+	IXGBE_WRITE_REG(hw, IXGBE_FDIRHASH, fdirhash);
+
+	/* flush hash to HW */
+	IXGBE_WRITE_FLUSH(hw);
+
+	/* Query if filter is present */
+	IXGBE_WRITE_REG(hw, IXGBE_FDIRCMD, IXGBE_FDIRCMD_CMD_QUERY_REM_FILT);
+
+	for (retry_count = 10; retry_count; retry_count--) {
+		/* allow 10us for query to process */
+		udelay(10);
+		/* verify query completed successfully */
+		fdircmd = IXGBE_READ_REG(hw, IXGBE_FDIRCMD);
+		if (!(fdircmd & IXGBE_FDIRCMD_CMD_MASK))
+			break;
+	}
+
+	if (!retry_count)
+		err = IXGBE_ERR_FDIR_REINIT_FAILED;
+
+	/* if filter exists in hardware then remove it */
+	if (fdircmd & IXGBE_FDIRCMD_FILTER_VALID) {
+		IXGBE_WRITE_REG(hw, IXGBE_FDIRHASH, fdirhash);
+		IXGBE_WRITE_FLUSH(hw);
+		IXGBE_WRITE_REG(hw, IXGBE_FDIRCMD,
+				IXGBE_FDIRCMD_CMD_REMOVE_FLOW);
+	}
+
+	return err;
+}
+
 /**
  *  ixgbe_read_analog_reg8_82599 - Reads 8 bit Omer analog register
  *  @hw: pointer to hardware structure
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index 7c929a2..bfeed84 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -5135,7 +5135,7 @@ static int __devinit ixgbe_sw_init(struct ixgbe_adapter *adapter)
 		adapter->atr_sample_rate = 20;
 		adapter->ring_feature[RING_F_FDIR].indices =
 							 IXGBE_MAX_FDIR_INDICES;
-		adapter->fdir_pballoc = 0;
+		adapter->fdir_pballoc = IXGBE_FDIR_PBALLOC_64K;
 #ifdef IXGBE_FCOE
 		adapter->flags |= IXGBE_FLAG_FCOE_CAPABLE;
 		adapter->flags &= ~IXGBE_FLAG_FCOE_ENABLED;
diff --git a/drivers/net/ixgbe/ixgbe_type.h b/drivers/net/ixgbe/ixgbe_type.h
index 9a499a6..8b1abd4 100644
--- a/drivers/net/ixgbe/ixgbe_type.h
+++ b/drivers/net/ixgbe/ixgbe_type.h
@@ -2056,9 +2056,10 @@ enum {
 #define IXGBE_VFLREC(_i)                 (0x00700 + (_i * 4))
 
 enum ixgbe_fdir_pballoc_type {
-	IXGBE_FDIR_PBALLOC_64K = 0,
-	IXGBE_FDIR_PBALLOC_128K,
-	IXGBE_FDIR_PBALLOC_256K,
+	IXGBE_FDIR_PBALLOC_NONE = 0,
+	IXGBE_FDIR_PBALLOC_64K  = 1,
+	IXGBE_FDIR_PBALLOC_128K = 2,
+	IXGBE_FDIR_PBALLOC_256K = 3,
 };
 #define IXGBE_FDIR_PBALLOC_SIZE_SHIFT           16
 
@@ -2112,7 +2113,7 @@ enum ixgbe_fdir_pballoc_type {
 #define IXGBE_FDIRCMD_CMD_ADD_FLOW              0x00000001
 #define IXGBE_FDIRCMD_CMD_REMOVE_FLOW           0x00000002
 #define IXGBE_FDIRCMD_CMD_QUERY_REM_FILT        0x00000003
-#define IXGBE_FDIRCMD_CMD_QUERY_REM_HASH        0x00000007
+#define IXGBE_FDIRCMD_FILTER_VALID              0x00000004
 #define IXGBE_FDIRCMD_FILTER_UPDATE             0x00000008
 #define IXGBE_FDIRCMD_IPv6DMATCH                0x00000010
 #define IXGBE_FDIRCMD_L4TYPE_UDP                0x00000020
@@ -2131,6 +2132,8 @@ enum ixgbe_fdir_pballoc_type {
 #define IXGBE_FDIR_INIT_DONE_POLL               10
 #define IXGBE_FDIRCMD_CMD_POLL                  10
 
+#define IXGBE_FDIR_DROP_QUEUE                   127
+
 /* Manageablility Host Interface defines */
 #define IXGBE_HI_MAX_BLOCK_BYTE_LENGTH       1792 /* Num of bytes in range */
 #define IXGBE_HI_MAX_BLOCK_DWORD_LENGTH      448 /* Num of dwords in range */
@@ -2350,7 +2353,7 @@ union ixgbe_atr_input {
 	 * src_port   - 2 bytes
 	 * dst_port   - 2 bytes
 	 * flex_bytes - 2 bytes
-	 * rsvd0      - 2 bytes - space reserved must be 0.
+	 * bkt_hash   - 2 bytes
 	 */
 	struct {
 		u8     vm_pool;
@@ -2361,7 +2364,7 @@ union ixgbe_atr_input {
 		__be16 src_port;
 		__be16 dst_port;
 		__be16 flex_bytes;
-		__be16 rsvd0;
+		__be16 bkt_hash;
 	} formatted;
 	__be32 dword_stream[11];
 };
@@ -2382,16 +2385,6 @@ union ixgbe_atr_hash_dword {
 	__be32 dword;
 };
 
-struct ixgbe_atr_input_masks {
-	__be16 rsvd0;
-	__be16 vlan_id_mask;
-	__be32 dst_ip_mask[4];
-	__be32 src_ip_mask[4];
-	__be16 src_port_mask;
-	__be16 dst_port_mask;
-	__be16 flex_mask;
-};
-
 enum ixgbe_eeprom_type {
 	ixgbe_eeprom_uninitialized = 0,
 	ixgbe_eeprom_spi,
-- 
1.7.5.2


^ permalink raw reply related

* [net-next 26/40] ixgbe: fix flags relating to perfect filters to support coexistence
From: Jeff Kirsher @ 2011-06-07 12:33 UTC (permalink / raw)
  To: davem; +Cc: Alexander Duyck, netdev, gospo, Jeff Kirsher
In-Reply-To: <1307449995-9458-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Alexander Duyck <alexander.h.duyck@intel.com>

I am removing the requirement that Ntuple filters have the same
number of queues and requirements as ATR.  As a result this change will
make it so that all the Ntuple flag does is disable ATR for now.

This change fixes an issue in which we were incorrectly re-enabling ATR
when we exited perfect filter mode.  This was due to the fact that the
logic assumed RSS and DCB were mutually exclusive which is no longer the
case.

To correct this we just need to add a check to guarantee DCB is disabled
before re-enabling ATR.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Ross Brattain <ross.b.brattain@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ixgbe/ixgbe_dcb_nl.c  |   13 ++++++-------
 drivers/net/ixgbe/ixgbe_ethtool.c |   24 ++++++++++++------------
 drivers/net/ixgbe/ixgbe_main.c    |   34 ++++++++++++----------------------
 3 files changed, 30 insertions(+), 41 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_dcb_nl.c b/drivers/net/ixgbe/ixgbe_dcb_nl.c
index 08c7aeb..bd2d752 100644
--- a/drivers/net/ixgbe/ixgbe_dcb_nl.c
+++ b/drivers/net/ixgbe/ixgbe_dcb_nl.c
@@ -114,11 +114,12 @@ static u8 ixgbe_dcbnl_set_state(struct net_device *netdev, u8 state)
 	u8 err = 0;
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 
+	/* verify there is something to do, if not then exit */
+	if (!!state != !(adapter->flags & IXGBE_FLAG_DCB_ENABLED))
+		return err;
+
 	if (state > 0) {
 		/* Turn on DCB */
-		if (adapter->flags & IXGBE_FLAG_DCB_ENABLED)
-			goto out;
-
 		if (!(adapter->flags & IXGBE_FLAG_MSIX_ENABLED)) {
 			e_err(drv, "Enable failed, needs MSI-X\n");
 			err = 1;
@@ -143,9 +144,6 @@ static u8 ixgbe_dcbnl_set_state(struct net_device *netdev, u8 state)
 		ixgbe_setup_tc(netdev, MAX_TRAFFIC_CLASS);
 	} else {
 		/* Turn off DCB */
-		if (!(adapter->flags & IXGBE_FLAG_DCB_ENABLED))
-			goto out;
-
 		adapter->hw.fc.requested_mode = adapter->last_lfc_mode;
 		adapter->temp_dcb_cfg.pfc_mode_enable = false;
 		adapter->dcb_cfg.pfc_mode_enable = false;
@@ -153,7 +151,8 @@ static u8 ixgbe_dcbnl_set_state(struct net_device *netdev, u8 state)
 		switch (adapter->hw.mac.type) {
 		case ixgbe_mac_82599EB:
 		case ixgbe_mac_X540:
-			adapter->flags |= IXGBE_FLAG_FDIR_HASH_CAPABLE;
+			if (!(adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE))
+				adapter->flags |= IXGBE_FLAG_FDIR_HASH_CAPABLE;
 			break;
 		default:
 			break;
diff --git a/drivers/net/ixgbe/ixgbe_ethtool.c b/drivers/net/ixgbe/ixgbe_ethtool.c
index 48a1bfc..0a9cd2b 100644
--- a/drivers/net/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ixgbe/ixgbe_ethtool.c
@@ -2309,20 +2309,20 @@ static int ixgbe_set_flags(struct net_device *netdev, u32 data)
 	 * Check if Flow Director n-tuple support was enabled or disabled.  If
 	 * the state changed, we need to reset.
 	 */
-	if ((adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE) &&
-	    (!(data & ETH_FLAG_NTUPLE))) {
-		/* turn off Flow Director perfect, set hash and reset */
+	if (!(adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE)) {
+		/* turn off ATR, enable perfect filters and reset */
+		if (data & ETH_FLAG_NTUPLE) {
+			adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE;
+			adapter->flags |= IXGBE_FLAG_FDIR_PERFECT_CAPABLE;
+			need_reset = true;
+		}
+	} else if (!(data & ETH_FLAG_NTUPLE)) {
+		/* turn off Flow Director, set ATR and reset */
 		adapter->flags &= ~IXGBE_FLAG_FDIR_PERFECT_CAPABLE;
-		adapter->flags |= IXGBE_FLAG_FDIR_HASH_CAPABLE;
-		need_reset = true;
-	} else if ((!(adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE)) &&
-	           (data & ETH_FLAG_NTUPLE)) {
-		/* turn off Flow Director hash, enable perfect and reset */
-		adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE;
-		adapter->flags |= IXGBE_FLAG_FDIR_PERFECT_CAPABLE;
+		if ((adapter->flags & IXGBE_FLAG_RSS_ENABLED) &&
+		    !(adapter->flags & IXGBE_FLAG_DCB_ENABLED))
+			adapter->flags |= IXGBE_FLAG_FDIR_HASH_CAPABLE;
 		need_reset = true;
-	} else {
-		/* no state change */
 	}
 
 	if (need_reset) {
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index 1c2e721..7c929a2 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -1554,9 +1554,8 @@ static void ixgbe_configure_msix(struct ixgbe_adapter *adapter)
 			q_vector->eitr = adapter->rx_eitr_param;
 
 		ixgbe_write_eitr(q_vector);
-		/* If Flow Director is enabled, set interrupt affinity */
-		if ((adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE) ||
-		    (adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE)) {
+		/* If ATR is enabled, set interrupt affinity */
+		if (adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE) {
 			/*
 			 * Allocate the affinity_hint cpumask, assign the mask
 			 * for this vector, and set our affinity_hint for
@@ -2467,8 +2466,7 @@ static inline void ixgbe_irq_enable(struct ixgbe_adapter *adapter, bool queues,
 	default:
 		break;
 	}
-	if (adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE ||
-	    adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE)
+	if (adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE)
 		mask |= IXGBE_EIMS_FLOW_DIR;
 
 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, mask);
@@ -3766,8 +3764,6 @@ static void ixgbe_configure(struct ixgbe_adapter *adapter)
 			adapter->tx_ring[i]->atr_sample_rate =
 						       adapter->atr_sample_rate;
 		ixgbe_init_fdir_signature_82599(hw, adapter->fdir_pballoc);
-	} else if (adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE) {
-		ixgbe_init_fdir_perfect_82599(hw, adapter->fdir_pballoc);
 	}
 	ixgbe_configure_virtualization(adapter);
 
@@ -4333,15 +4329,13 @@ static inline bool ixgbe_set_fdir_queues(struct ixgbe_adapter *adapter)
 	f_fdir->mask = 0;
 
 	/* Flow Director must have RSS enabled */
-	if (adapter->flags & IXGBE_FLAG_RSS_ENABLED &&
-	    ((adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE ||
-	     (adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE)))) {
+	if ((adapter->flags & IXGBE_FLAG_RSS_ENABLED) &&
+	    (adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE)) {
 		adapter->num_tx_queues = f_fdir->indices;
 		adapter->num_rx_queues = f_fdir->indices;
 		ret = true;
 	} else {
 		adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE;
-		adapter->flags &= ~IXGBE_FLAG_FDIR_PERFECT_CAPABLE;
 	}
 	return ret;
 }
@@ -4371,12 +4365,12 @@ static inline bool ixgbe_set_fcoe_queues(struct ixgbe_adapter *adapter)
 
 	if (adapter->flags & IXGBE_FLAG_RSS_ENABLED) {
 		e_info(probe, "FCoE enabled with RSS\n");
-		if ((adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE) ||
-		    (adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE))
+		if (adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE)
 			ixgbe_set_fdir_queues(adapter);
 		else
 			ixgbe_set_rss_queues(adapter);
 	}
+
 	/* adding FCoE rx rings to the end */
 	f->mask = adapter->num_rx_queues;
 	adapter->num_rx_queues += f->indices;
@@ -4669,9 +4663,8 @@ static inline bool ixgbe_cache_ring_fdir(struct ixgbe_adapter *adapter)
 	int i;
 	bool ret = false;
 
-	if (adapter->flags & IXGBE_FLAG_RSS_ENABLED &&
-	    ((adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE) ||
-	     (adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE))) {
+	if ((adapter->flags & IXGBE_FLAG_RSS_ENABLED) &&
+	    (adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE)) {
 		for (i = 0; i < adapter->num_rx_queues; i++)
 			adapter->rx_ring[i]->reg_idx = i;
 		for (i = 0; i < adapter->num_tx_queues; i++)
@@ -4700,8 +4693,7 @@ static inline bool ixgbe_cache_ring_fcoe(struct ixgbe_adapter *adapter)
 		return false;
 
 	if (adapter->flags & IXGBE_FLAG_RSS_ENABLED) {
-		if ((adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE) ||
-		    (adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE))
+		if (adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE)
 			ixgbe_cache_ring_fdir(adapter);
 		else
 			ixgbe_cache_ring_rss(adapter);
@@ -4887,14 +4879,12 @@ static int ixgbe_set_interrupt_capability(struct ixgbe_adapter *adapter)
 
 	adapter->flags &= ~IXGBE_FLAG_DCB_ENABLED;
 	adapter->flags &= ~IXGBE_FLAG_RSS_ENABLED;
-	if (adapter->flags & (IXGBE_FLAG_FDIR_HASH_CAPABLE |
-			      IXGBE_FLAG_FDIR_PERFECT_CAPABLE)) {
+	if (adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE) {
 		e_err(probe,
-		      "Flow Director is not supported while multiple "
+		      "ATR is not supported while multiple "
 		      "queues are disabled.  Disabling Flow Director\n");
 	}
 	adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE;
-	adapter->flags &= ~IXGBE_FLAG_FDIR_PERFECT_CAPABLE;
 	adapter->atr_sample_rate = 0;
 	if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)
 		ixgbe_disable_sriov(adapter);
-- 
1.7.5.2


^ permalink raw reply related

* [net-next 29/40] ixgbe: add support for displaying ntuple filters via the nfc interface
From: Jeff Kirsher @ 2011-06-07 12:33 UTC (permalink / raw)
  To: davem; +Cc: Alexander Duyck, netdev, gospo, Jeff Kirsher
In-Reply-To: <1307449995-9458-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Alexander Duyck <alexander.h.duyck@intel.com>

This code adds support for displaying the filters that were added via the
nfc interface.  This is primarily to test the interface for now, but I am
also looking into the feasibility of moving all of the ntuple filter code
in ixgbe over to the nfc interface since it seems to be better implemented.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Ross Brattain <ross.b.brattain@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ixgbe/ixgbe.h         |   11 ++++
 drivers/net/ixgbe/ixgbe_ethtool.c |  102 +++++++++++++++++++++++++++++++++++++
 2 files changed, 113 insertions(+), 0 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe.h b/drivers/net/ixgbe/ixgbe.h
index 5ea5b4c..d6bfb2f 100644
--- a/drivers/net/ixgbe/ixgbe.h
+++ b/drivers/net/ixgbe/ixgbe.h
@@ -482,6 +482,17 @@ struct ixgbe_adapter {
 	struct vf_macvlans vf_mvs;
 	struct vf_macvlans *mv_list;
 	bool antispoofing_enabled;
+
+	struct hlist_head fdir_filter_list;
+	union ixgbe_atr_input fdir_mask;
+	int fdir_filter_count;
+};
+
+struct ixgbe_fdir_filter {
+	struct hlist_node fdir_node;
+	union ixgbe_atr_input filter;
+	u16 sw_idx;
+	u16 action;
 };
 
 enum ixbge_state_t {
diff --git a/drivers/net/ixgbe/ixgbe_ethtool.c b/drivers/net/ixgbe/ixgbe_ethtool.c
index 477cbd3..2c70363 100644
--- a/drivers/net/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ixgbe/ixgbe_ethtool.c
@@ -2335,6 +2335,97 @@ static int ixgbe_set_flags(struct net_device *netdev, u32 data)
 	return 0;
 }
 
+static int ixgbe_get_ethtool_fdir_entry(struct ixgbe_adapter *adapter,
+					struct ethtool_rxnfc *cmd)
+{
+	union ixgbe_atr_input *mask = &adapter->fdir_mask;
+	struct ethtool_rx_flow_spec *fsp =
+		(struct ethtool_rx_flow_spec *)&cmd->fs;
+	struct hlist_node *node, *node2;
+	struct ixgbe_fdir_filter *rule = NULL;
+
+	/* report total rule count */
+	cmd->data = (1024 << adapter->fdir_pballoc) - 2;
+
+	hlist_for_each_entry_safe(rule, node, node2,
+				  &adapter->fdir_filter_list, fdir_node) {
+		if (fsp->location <= rule->sw_idx)
+			break;
+	}
+
+	if (!rule || fsp->location != rule->sw_idx)
+		return -EINVAL;
+
+	/* fill out the flow spec entry */
+
+	/* set flow type field */
+	switch (rule->filter.formatted.flow_type) {
+	case IXGBE_ATR_FLOW_TYPE_TCPV4:
+		fsp->flow_type = TCP_V4_FLOW;
+		break;
+	case IXGBE_ATR_FLOW_TYPE_UDPV4:
+		fsp->flow_type = UDP_V4_FLOW;
+		break;
+	case IXGBE_ATR_FLOW_TYPE_SCTPV4:
+		fsp->flow_type = SCTP_V4_FLOW;
+		break;
+	case IXGBE_ATR_FLOW_TYPE_IPV4:
+		fsp->flow_type = IP_USER_FLOW;
+		fsp->h_u.usr_ip4_spec.ip_ver = ETH_RX_NFC_IP4;
+		fsp->h_u.usr_ip4_spec.proto = 0;
+		fsp->m_u.usr_ip4_spec.proto = 0;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	fsp->h_u.tcp_ip4_spec.psrc = rule->filter.formatted.src_port;
+	fsp->m_u.tcp_ip4_spec.psrc = mask->formatted.src_port;
+	fsp->h_u.tcp_ip4_spec.pdst = rule->filter.formatted.dst_port;
+	fsp->m_u.tcp_ip4_spec.pdst = mask->formatted.dst_port;
+	fsp->h_u.tcp_ip4_spec.ip4src = rule->filter.formatted.src_ip[0];
+	fsp->m_u.tcp_ip4_spec.ip4src = mask->formatted.src_ip[0];
+	fsp->h_u.tcp_ip4_spec.ip4dst = rule->filter.formatted.dst_ip[0];
+	fsp->m_u.tcp_ip4_spec.ip4dst = mask->formatted.dst_ip[0];
+	fsp->h_ext.vlan_tci = rule->filter.formatted.vlan_id;
+	fsp->m_ext.vlan_tci = mask->formatted.vlan_id;
+	fsp->h_ext.vlan_etype = rule->filter.formatted.flex_bytes;
+	fsp->m_ext.vlan_etype = mask->formatted.flex_bytes;
+	fsp->h_ext.data[1] = htonl(rule->filter.formatted.vm_pool);
+	fsp->m_ext.data[1] = htonl(mask->formatted.vm_pool);
+	fsp->flow_type |= FLOW_EXT;
+
+	/* record action */
+	if (rule->action == IXGBE_FDIR_DROP_QUEUE)
+		fsp->ring_cookie = RX_CLS_FLOW_DISC;
+	else
+		fsp->ring_cookie = rule->action;
+
+	return 0;
+}
+
+static int ixgbe_get_ethtool_fdir_all(struct ixgbe_adapter *adapter,
+				      struct ethtool_rxnfc *cmd,
+				      u32 *rule_locs)
+{
+	struct hlist_node *node, *node2;
+	struct ixgbe_fdir_filter *rule;
+	int cnt = 0;
+
+	/* report total rule count */
+	cmd->data = (1024 << adapter->fdir_pballoc) - 2;
+
+	hlist_for_each_entry_safe(rule, node, node2,
+				  &adapter->fdir_filter_list, fdir_node) {
+		if (cnt == cmd->rule_cnt)
+			return -EMSGSIZE;
+		rule_locs[cnt] = rule->sw_idx;
+		cnt++;
+	}
+
+	return 0;
+}
+
 static int ixgbe_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
 			   void *rule_locs)
 {
@@ -2346,6 +2437,17 @@ static int ixgbe_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
 		cmd->data = adapter->num_rx_queues;
 		ret = 0;
 		break;
+	case ETHTOOL_GRXCLSRLCNT:
+		cmd->rule_cnt = adapter->fdir_filter_count;
+		ret = 0;
+		break;
+	case ETHTOOL_GRXCLSRULE:
+		ret = ixgbe_get_ethtool_fdir_entry(adapter, cmd);
+		break;
+	case ETHTOOL_GRXCLSRLALL:
+		ret = ixgbe_get_ethtool_fdir_all(adapter, cmd,
+						 (u32 *)rule_locs);
+		break;
 	default:
 		break;
 	}
-- 
1.7.5.2


^ permalink raw reply related

* [net-next 30/40] ixgbe: add support for nfc addition and removal of filters
From: Jeff Kirsher @ 2011-06-07 12:33 UTC (permalink / raw)
  To: davem; +Cc: Alexander Duyck, netdev, gospo, Jeff Kirsher
In-Reply-To: <1307449995-9458-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Alexander Duyck <alexander.h.duyck@intel.com>

This change is meant to allow for nfc to insert and remove filters in order
to test the ethtool interface which includes it's own rules manager.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Ross Brattain <ross.b.brattain@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ixgbe/ixgbe_ethtool.c |  245 +++++++++++++++++++++++++++++++++++++
 drivers/net/ixgbe/ixgbe_main.c    |   45 +++++++
 2 files changed, 290 insertions(+), 0 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_ethtool.c b/drivers/net/ixgbe/ixgbe_ethtool.c
index 2c70363..f37c9b8 100644
--- a/drivers/net/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ixgbe/ixgbe_ethtool.c
@@ -2455,6 +2455,250 @@ static int ixgbe_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
 	return ret;
 }
 
+static int ixgbe_update_ethtool_fdir_entry(struct ixgbe_adapter *adapter,
+					   struct ixgbe_fdir_filter *input,
+					   u16 sw_idx)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	struct hlist_node *node, *node2, *parent;
+	struct ixgbe_fdir_filter *rule;
+	int err = -EINVAL;
+
+	parent = NULL;
+	rule = NULL;
+
+	hlist_for_each_entry_safe(rule, node, node2,
+				  &adapter->fdir_filter_list, fdir_node) {
+		/* hash found, or no matching entry */
+		if (rule->sw_idx >= sw_idx)
+			break;
+		parent = node;
+	}
+
+	/* if there is an old rule occupying our place remove it */
+	if (rule && (rule->sw_idx == sw_idx)) {
+		if (!input || (rule->filter.formatted.bkt_hash !=
+			       input->filter.formatted.bkt_hash)) {
+			err = ixgbe_fdir_erase_perfect_filter_82599(hw,
+								&rule->filter,
+								sw_idx);
+		}
+
+		hlist_del(&rule->fdir_node);
+		kfree(rule);
+		adapter->fdir_filter_count--;
+	}
+
+	/*
+	 * If no input this was a delete, err should be 0 if a rule was
+	 * successfully found and removed from the list else -EINVAL
+	 */
+	if (!input)
+		return err;
+
+	/* initialize node and set software index */
+	INIT_HLIST_NODE(&input->fdir_node);
+
+	/* add filter to the list */
+	if (parent)
+		hlist_add_after(parent, &input->fdir_node);
+	else
+		hlist_add_head(&input->fdir_node,
+			       &adapter->fdir_filter_list);
+
+	/* update counts */
+	adapter->fdir_filter_count++;
+
+	return 0;
+}
+
+static int ixgbe_flowspec_to_flow_type(struct ethtool_rx_flow_spec *fsp,
+				       u8 *flow_type)
+{
+	switch (fsp->flow_type & ~FLOW_EXT) {
+	case TCP_V4_FLOW:
+		*flow_type = IXGBE_ATR_FLOW_TYPE_TCPV4;
+		break;
+	case UDP_V4_FLOW:
+		*flow_type = IXGBE_ATR_FLOW_TYPE_UDPV4;
+		break;
+	case SCTP_V4_FLOW:
+		*flow_type = IXGBE_ATR_FLOW_TYPE_SCTPV4;
+		break;
+	case IP_USER_FLOW:
+		switch (fsp->h_u.usr_ip4_spec.proto) {
+		case IPPROTO_TCP:
+			*flow_type = IXGBE_ATR_FLOW_TYPE_TCPV4;
+			break;
+		case IPPROTO_UDP:
+			*flow_type = IXGBE_ATR_FLOW_TYPE_UDPV4;
+			break;
+		case IPPROTO_SCTP:
+			*flow_type = IXGBE_ATR_FLOW_TYPE_SCTPV4;
+			break;
+		case 0:
+			if (!fsp->m_u.usr_ip4_spec.proto) {
+				*flow_type = IXGBE_ATR_FLOW_TYPE_IPV4;
+				break;
+			}
+		default:
+			return 0;
+		}
+		break;
+	default:
+		return 0;
+	}
+
+	return 1;
+}
+
+static int ixgbe_add_ethtool_fdir_entry(struct ixgbe_adapter *adapter,
+					struct ethtool_rxnfc *cmd)
+{
+	struct ethtool_rx_flow_spec *fsp =
+		(struct ethtool_rx_flow_spec *)&cmd->fs;
+	struct ixgbe_hw *hw = &adapter->hw;
+	struct ixgbe_fdir_filter *input;
+	union ixgbe_atr_input mask;
+	int err;
+
+	if (!(adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE))
+		return -EOPNOTSUPP;
+
+	/*
+	 * Don't allow programming if the action is a queue greater than
+	 * the number of online Rx queues.
+	 */
+	if ((fsp->ring_cookie != RX_CLS_FLOW_DISC) &&
+	    (fsp->ring_cookie >= adapter->num_rx_queues))
+		return -EINVAL;
+
+	/* Don't allow indexes to exist outside of available space */
+	if (fsp->location >= ((1024 << adapter->fdir_pballoc) - 2)) {
+		e_err(drv, "Location out of range\n");
+		return -EINVAL;
+	}
+
+	input = kzalloc(sizeof(*input), GFP_ATOMIC);
+	if (!input)
+		return -ENOMEM;
+
+	memset(&mask, 0, sizeof(union ixgbe_atr_input));
+
+	/* set SW index */
+	input->sw_idx = fsp->location;
+
+	/* record flow type */
+	if (!ixgbe_flowspec_to_flow_type(fsp,
+					 &input->filter.formatted.flow_type)) {
+		e_err(drv, "Unrecognized flow type\n");
+		goto err_out;
+	}
+
+	mask.formatted.flow_type = IXGBE_ATR_L4TYPE_IPV6_MASK |
+				   IXGBE_ATR_L4TYPE_MASK;
+
+	if (input->filter.formatted.flow_type == IXGBE_ATR_FLOW_TYPE_IPV4)
+		mask.formatted.flow_type &= IXGBE_ATR_L4TYPE_IPV6_MASK;
+
+	/* Copy input into formatted structures */
+	input->filter.formatted.src_ip[0] = fsp->h_u.tcp_ip4_spec.ip4src;
+	mask.formatted.src_ip[0] = fsp->m_u.tcp_ip4_spec.ip4src;
+	input->filter.formatted.dst_ip[0] = fsp->h_u.tcp_ip4_spec.ip4dst;
+	mask.formatted.dst_ip[0] = fsp->m_u.tcp_ip4_spec.ip4dst;
+	input->filter.formatted.src_port = fsp->h_u.tcp_ip4_spec.psrc;
+	mask.formatted.src_port = fsp->m_u.tcp_ip4_spec.psrc;
+	input->filter.formatted.dst_port = fsp->h_u.tcp_ip4_spec.pdst;
+	mask.formatted.dst_port = fsp->m_u.tcp_ip4_spec.pdst;
+
+	if (fsp->flow_type & FLOW_EXT) {
+		input->filter.formatted.vm_pool =
+				(unsigned char)ntohl(fsp->h_ext.data[1]);
+		mask.formatted.vm_pool =
+				(unsigned char)ntohl(fsp->m_ext.data[1]);
+		input->filter.formatted.vlan_id = fsp->h_ext.vlan_tci;
+		mask.formatted.vlan_id = fsp->m_ext.vlan_tci;
+		input->filter.formatted.flex_bytes =
+						fsp->h_ext.vlan_etype;
+		mask.formatted.flex_bytes = fsp->m_ext.vlan_etype;
+	}
+
+	/* determine if we need to drop or route the packet */
+	if (fsp->ring_cookie == RX_CLS_FLOW_DISC)
+		input->action = IXGBE_FDIR_DROP_QUEUE;
+	else
+		input->action = fsp->ring_cookie;
+
+	spin_lock(&adapter->fdir_perfect_lock);
+
+	if (hlist_empty(&adapter->fdir_filter_list)) {
+		/* save mask and program input mask into HW */
+		memcpy(&adapter->fdir_mask, &mask, sizeof(mask));
+		err = ixgbe_fdir_set_input_mask_82599(hw, &mask);
+		if (err) {
+			e_err(drv, "Error writing mask\n");
+			goto err_out_w_lock;
+		}
+	} else if (memcmp(&adapter->fdir_mask, &mask, sizeof(mask))) {
+		e_err(drv, "Only one mask supported per port\n");
+		goto err_out_w_lock;
+	}
+
+	/* apply mask and compute/store hash */
+	ixgbe_atr_compute_perfect_hash_82599(&input->filter, &mask);
+
+	/* program filters to filter memory */
+	err = ixgbe_fdir_write_perfect_filter_82599(hw,
+				&input->filter, input->sw_idx,
+				adapter->rx_ring[input->action]->reg_idx);
+	if (err)
+		goto err_out_w_lock;
+
+	ixgbe_update_ethtool_fdir_entry(adapter, input, input->sw_idx);
+
+	spin_unlock(&adapter->fdir_perfect_lock);
+
+	return err;
+err_out_w_lock:
+	spin_unlock(&adapter->fdir_perfect_lock);
+err_out:
+	kfree(input);
+	return -EINVAL;
+}
+
+static int ixgbe_del_ethtool_fdir_entry(struct ixgbe_adapter *adapter,
+					struct ethtool_rxnfc *cmd)
+{
+	struct ethtool_rx_flow_spec *fsp =
+		(struct ethtool_rx_flow_spec *)&cmd->fs;
+	int err;
+
+	spin_lock(&adapter->fdir_perfect_lock);
+	err = ixgbe_update_ethtool_fdir_entry(adapter, NULL, fsp->location);
+	spin_unlock(&adapter->fdir_perfect_lock);
+
+	return err;
+}
+
+static int ixgbe_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(dev);
+	int ret = -EOPNOTSUPP;
+
+	switch (cmd->cmd) {
+	case ETHTOOL_SRXCLSRLINS:
+		ret = ixgbe_add_ethtool_fdir_entry(adapter, cmd);
+		break;
+	case ETHTOOL_SRXCLSRLDEL:
+		ret = ixgbe_del_ethtool_fdir_entry(adapter, cmd);
+		break;
+	default:
+		break;
+	}
+
+	return ret;
+}
+
 static const struct ethtool_ops ixgbe_ethtool_ops = {
 	.get_settings           = ixgbe_get_settings,
 	.set_settings           = ixgbe_set_settings,
@@ -2491,6 +2735,7 @@ static const struct ethtool_ops ixgbe_ethtool_ops = {
 	.get_flags              = ethtool_op_get_flags,
 	.set_flags              = ixgbe_set_flags,
 	.get_rxnfc		= ixgbe_get_rxnfc,
+	.set_rxnfc		= ixgbe_set_rxnfc,
 };
 
 void ixgbe_set_ethtool_ops(struct net_device *netdev)
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index bfeed84..472b2c0 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -3740,6 +3740,28 @@ static void ixgbe_configure_pb(struct ixgbe_adapter *adapter)
 	hw->mac.ops.set_rxpba(&adapter->hw, num_tc, hdrm, PBA_STRATEGY_EQUAL);
 }
 
+static void ixgbe_fdir_filter_restore(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	struct hlist_node *node, *node2;
+	struct ixgbe_fdir_filter *filter;
+
+	spin_lock(&adapter->fdir_perfect_lock);
+
+	if (!hlist_empty(&adapter->fdir_filter_list))
+		ixgbe_fdir_set_input_mask_82599(hw, &adapter->fdir_mask);
+
+	hlist_for_each_entry_safe(filter, node, node2,
+				  &adapter->fdir_filter_list, fdir_node) {
+		ixgbe_fdir_write_perfect_filter_82599(hw,
+						      &filter->filter,
+						      filter->sw_idx,
+						      filter->action);
+	}
+
+	spin_unlock(&adapter->fdir_perfect_lock);
+}
+
 static void ixgbe_configure(struct ixgbe_adapter *adapter)
 {
 	struct net_device *netdev = adapter->netdev;
@@ -3764,6 +3786,10 @@ static void ixgbe_configure(struct ixgbe_adapter *adapter)
 			adapter->tx_ring[i]->atr_sample_rate =
 						       adapter->atr_sample_rate;
 		ixgbe_init_fdir_signature_82599(hw, adapter->fdir_pballoc);
+	} else if (adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE) {
+		ixgbe_init_fdir_perfect_82599(&adapter->hw,
+					      adapter->fdir_pballoc);
+		ixgbe_fdir_filter_restore(adapter);
 	}
 	ixgbe_configure_virtualization(adapter);
 
@@ -4140,6 +4166,23 @@ static void ixgbe_clean_all_tx_rings(struct ixgbe_adapter *adapter)
 		ixgbe_clean_tx_ring(adapter->tx_ring[i]);
 }
 
+static void ixgbe_fdir_filter_exit(struct ixgbe_adapter *adapter)
+{
+	struct hlist_node *node, *node2;
+	struct ixgbe_fdir_filter *filter;
+
+	spin_lock(&adapter->fdir_perfect_lock);
+
+	hlist_for_each_entry_safe(filter, node, node2,
+				  &adapter->fdir_filter_list, fdir_node) {
+		hlist_del(&filter->fdir_node);
+		kfree(filter);
+	}
+	adapter->fdir_filter_count = 0;
+
+	spin_unlock(&adapter->fdir_perfect_lock);
+}
+
 void ixgbe_down(struct ixgbe_adapter *adapter)
 {
 	struct net_device *netdev = adapter->netdev;
@@ -5532,6 +5575,8 @@ static int ixgbe_close(struct net_device *netdev)
 	ixgbe_down(adapter);
 	ixgbe_free_irq(adapter);
 
+	ixgbe_fdir_filter_exit(adapter);
+
 	ixgbe_free_all_tx_resources(adapter);
 	ixgbe_free_all_rx_resources(adapter);
 
-- 
1.7.5.2


^ permalink raw reply related

* [net-next 31/40] ethtool: remove support for ETHTOOL_GRXNTUPLE
From: Jeff Kirsher @ 2011-06-07 12:33 UTC (permalink / raw)
  To: davem; +Cc: Alexander Duyck, netdev, gospo, Jeff Kirsher
In-Reply-To: <1307449995-9458-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Alexander Duyck <alexander.h.duyck@intel.com>

This change is meant to remove all support for displaying an ntuple as
strings via ETHTOOL_GRXNTUPLE.  The reason for this change is due to the
fact that multiple issues have been found including:
 - Multiple buffer overruns for strings being displayed.
 - Incorrect filters displayed, cleared filters with ring of -2 are displayed
 - Setting get_rx_ntuple displays no rules if defined.
 - Endianess wrong on displayed values.
 - Hard limit of 1024 filters makes display functionality extremely limited

The only driver that had supported this interface was ixgbe.  Since it no
longer uses the interface and due to the issues mentioned above I am
submitting this patch to remove it.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Ross Brattain <ross.b.brattain@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 include/linux/ethtool.h   |    8 +-
 include/linux/netdevice.h |    3 -
 net/core/dev.c            |    5 -
 net/core/ethtool.c        |  299 ---------------------------------------------
 4 files changed, 2 insertions(+), 313 deletions(-)

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index c6a850a..3310ab6 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -287,7 +287,7 @@ enum ethtool_stringset {
 	ETH_SS_TEST		= 0,
 	ETH_SS_STATS,
 	ETH_SS_PRIV_FLAGS,
-	ETH_SS_NTUPLE_FILTERS,
+	ETH_SS_DO_NOT_USE,		/* was ETH_SS_NTUPLE_FILTERS */
 	ETH_SS_FEATURES,
 };
 
@@ -720,8 +720,6 @@ struct ethtool_rx_ntuple_flow_spec_container {
 };
 
 struct ethtool_rx_ntuple_list {
-#define ETHTOOL_MAX_NTUPLE_LIST_ENTRY 1024
-#define ETHTOOL_MAX_NTUPLE_STRING_PER_ENTRY 14
 	struct list_head	list;
 	unsigned int		count;
 };
@@ -758,7 +756,6 @@ u32 ethtool_op_get_ufo(struct net_device *dev);
 int ethtool_op_set_ufo(struct net_device *dev, u32 data);
 u32 ethtool_op_get_flags(struct net_device *dev);
 int ethtool_op_set_flags(struct net_device *dev, u32 data, u32 supported);
-void ethtool_ntuple_flush(struct net_device *dev);
 bool ethtool_invalid_flags(struct net_device *dev, u32 data, u32 supported);
 
 /**
@@ -944,7 +941,6 @@ struct ethtool_ops {
 	int	(*reset)(struct net_device *, u32 *);
 	int	(*set_rx_ntuple)(struct net_device *,
 				 struct ethtool_rx_ntuple *);
-	int	(*get_rx_ntuple)(struct net_device *, u32 stringset, void *);
 	int	(*get_rxfh_indir)(struct net_device *,
 				  struct ethtool_rxfh_indir *);
 	int	(*set_rxfh_indir)(struct net_device *,
@@ -1017,7 +1013,7 @@ struct ethtool_ops {
 #define ETHTOOL_FLASHDEV	0x00000033 /* Flash firmware to device */
 #define ETHTOOL_RESET		0x00000034 /* Reset hardware */
 #define ETHTOOL_SRXNTUPLE	0x00000035 /* Add an n-tuple filter to device */
-#define ETHTOOL_GRXNTUPLE	0x00000036 /* Get n-tuple filters from device */
+/* ETHTOOL_GRXNTUPLE		0x00000036 disabled due to multiple issues */
 #define ETHTOOL_GSSET_INFO	0x00000037 /* Get string set info */
 #define ETHTOOL_GRXFHINDIR	0x00000038 /* Get RX flow hash indir'n table */
 #define ETHTOOL_SRXFHINDIR	0x00000039 /* Set RX flow hash indir'n table */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ca333e7..f51eef8 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1348,9 +1348,6 @@ struct net_device {
 	/* max exchange id for FCoE LRO by ddp */
 	unsigned int		fcoe_ddp_xid;
 #endif
-	/* n-tuple filter list attached to this device */
-	struct ethtool_rx_ntuple_list ethtool_ntuple_list;
-
 	/* phy device may attach itself for hardware timestamping */
 	struct phy_device *phydev;
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 9393078..b3f52d2 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5867,8 +5867,6 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 
 	dev->gso_max_size = GSO_MAX_SIZE;
 
-	INIT_LIST_HEAD(&dev->ethtool_ntuple_list.list);
-	dev->ethtool_ntuple_list.count = 0;
 	INIT_LIST_HEAD(&dev->napi_list);
 	INIT_LIST_HEAD(&dev->unreg_list);
 	INIT_LIST_HEAD(&dev->link_watch_list);
@@ -5932,9 +5930,6 @@ void free_netdev(struct net_device *dev)
 	/* Flush device addresses */
 	dev_addr_flush(dev);
 
-	/* Clear ethtool n-tuple list */
-	ethtool_ntuple_flush(dev);
-
 	list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
 		netif_napi_del(p);
 
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index fd14116..9fa7270 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -169,18 +169,6 @@ int ethtool_op_set_flags(struct net_device *dev, u32 data, u32 supported)
 }
 EXPORT_SYMBOL(ethtool_op_set_flags);
 
-void ethtool_ntuple_flush(struct net_device *dev)
-{
-	struct ethtool_rx_ntuple_flow_spec_container *fsc, *f;
-
-	list_for_each_entry_safe(fsc, f, &dev->ethtool_ntuple_list.list, list) {
-		list_del(&fsc->list);
-		kfree(fsc);
-	}
-	dev->ethtool_ntuple_list.count = 0;
-}
-EXPORT_SYMBOL(ethtool_ntuple_flush);
-
 /* Handlers for each ethtool command */
 
 #define ETHTOOL_DEV_FEATURE_WORDS	1
@@ -865,34 +853,6 @@ out:
 	return ret;
 }
 
-static void __rx_ntuple_filter_add(struct ethtool_rx_ntuple_list *list,
-			struct ethtool_rx_ntuple_flow_spec *spec,
-			struct ethtool_rx_ntuple_flow_spec_container *fsc)
-{
-
-	/* don't add filters forever */
-	if (list->count >= ETHTOOL_MAX_NTUPLE_LIST_ENTRY) {
-		/* free the container */
-		kfree(fsc);
-		return;
-	}
-
-	/* Copy the whole filter over */
-	fsc->fs.flow_type = spec->flow_type;
-	memcpy(&fsc->fs.h_u, &spec->h_u, sizeof(spec->h_u));
-	memcpy(&fsc->fs.m_u, &spec->m_u, sizeof(spec->m_u));
-
-	fsc->fs.vlan_tag = spec->vlan_tag;
-	fsc->fs.vlan_tag_mask = spec->vlan_tag_mask;
-	fsc->fs.data = spec->data;
-	fsc->fs.data_mask = spec->data_mask;
-	fsc->fs.action = spec->action;
-
-	/* add to the list */
-	list_add_tail_rcu(&fsc->list, &list->list);
-	list->count++;
-}
-
 /*
  * ethtool does not (or did not) set masks for flow parameters that are
  * not specified, so if both value and mask are 0 then this must be
@@ -944,268 +904,12 @@ static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev,
 
 	rx_ntuple_fix_masks(&cmd.fs);
 
-	/*
-	 * Cache filter in dev struct for GET operation only if
-	 * the underlying driver doesn't have its own GET operation, and
-	 * only if the filter was added successfully.  First make sure we
-	 * can allocate the filter, then continue if successful.
-	 */
-	if (!ops->get_rx_ntuple) {
-		fsc = kmalloc(sizeof(*fsc), GFP_ATOMIC);
-		if (!fsc)
-			return -ENOMEM;
-	}
-
 	ret = ops->set_rx_ntuple(dev, &cmd);
 	if (ret) {
 		kfree(fsc);
 		return ret;
 	}
 
-	if (!ops->get_rx_ntuple)
-		__rx_ntuple_filter_add(&dev->ethtool_ntuple_list, &cmd.fs, fsc);
-
-	return ret;
-}
-
-static int ethtool_get_rx_ntuple(struct net_device *dev, void __user *useraddr)
-{
-	struct ethtool_gstrings gstrings;
-	const struct ethtool_ops *ops = dev->ethtool_ops;
-	struct ethtool_rx_ntuple_flow_spec_container *fsc;
-	u8 *data;
-	char *p;
-	int ret, i, num_strings = 0;
-
-	if (!ops->get_sset_count)
-		return -EOPNOTSUPP;
-
-	if (copy_from_user(&gstrings, useraddr, sizeof(gstrings)))
-		return -EFAULT;
-
-	ret = ops->get_sset_count(dev, gstrings.string_set);
-	if (ret < 0)
-		return ret;
-
-	gstrings.len = ret;
-
-	data = kzalloc(gstrings.len * ETH_GSTRING_LEN, GFP_USER);
-	if (!data)
-		return -ENOMEM;
-
-	if (ops->get_rx_ntuple) {
-		/* driver-specific filter grab */
-		ret = ops->get_rx_ntuple(dev, gstrings.string_set, data);
-		goto copy;
-	}
-
-	/* default ethtool filter grab */
-	i = 0;
-	p = (char *)data;
-	list_for_each_entry(fsc, &dev->ethtool_ntuple_list.list, list) {
-		sprintf(p, "Filter %d:\n", i);
-		p += ETH_GSTRING_LEN;
-		num_strings++;
-
-		switch (fsc->fs.flow_type) {
-		case TCP_V4_FLOW:
-			sprintf(p, "\tFlow Type: TCP\n");
-			p += ETH_GSTRING_LEN;
-			num_strings++;
-			break;
-		case UDP_V4_FLOW:
-			sprintf(p, "\tFlow Type: UDP\n");
-			p += ETH_GSTRING_LEN;
-			num_strings++;
-			break;
-		case SCTP_V4_FLOW:
-			sprintf(p, "\tFlow Type: SCTP\n");
-			p += ETH_GSTRING_LEN;
-			num_strings++;
-			break;
-		case AH_ESP_V4_FLOW:
-			sprintf(p, "\tFlow Type: AH ESP\n");
-			p += ETH_GSTRING_LEN;
-			num_strings++;
-			break;
-		case ESP_V4_FLOW:
-			sprintf(p, "\tFlow Type: ESP\n");
-			p += ETH_GSTRING_LEN;
-			num_strings++;
-			break;
-		case IP_USER_FLOW:
-			sprintf(p, "\tFlow Type: Raw IP\n");
-			p += ETH_GSTRING_LEN;
-			num_strings++;
-			break;
-		case IPV4_FLOW:
-			sprintf(p, "\tFlow Type: IPv4\n");
-			p += ETH_GSTRING_LEN;
-			num_strings++;
-			break;
-		default:
-			sprintf(p, "\tFlow Type: Unknown\n");
-			p += ETH_GSTRING_LEN;
-			num_strings++;
-			goto unknown_filter;
-		}
-
-		/* now the rest of the filters */
-		switch (fsc->fs.flow_type) {
-		case TCP_V4_FLOW:
-		case UDP_V4_FLOW:
-		case SCTP_V4_FLOW:
-			sprintf(p, "\tSrc IP addr: 0x%x\n",
-				fsc->fs.h_u.tcp_ip4_spec.ip4src);
-			p += ETH_GSTRING_LEN;
-			num_strings++;
-			sprintf(p, "\tSrc IP mask: 0x%x\n",
-				fsc->fs.m_u.tcp_ip4_spec.ip4src);
-			p += ETH_GSTRING_LEN;
-			num_strings++;
-			sprintf(p, "\tDest IP addr: 0x%x\n",
-				fsc->fs.h_u.tcp_ip4_spec.ip4dst);
-			p += ETH_GSTRING_LEN;
-			num_strings++;
-			sprintf(p, "\tDest IP mask: 0x%x\n",
-				fsc->fs.m_u.tcp_ip4_spec.ip4dst);
-			p += ETH_GSTRING_LEN;
-			num_strings++;
-			sprintf(p, "\tSrc Port: %d, mask: 0x%x\n",
-				fsc->fs.h_u.tcp_ip4_spec.psrc,
-				fsc->fs.m_u.tcp_ip4_spec.psrc);
-			p += ETH_GSTRING_LEN;
-			num_strings++;
-			sprintf(p, "\tDest Port: %d, mask: 0x%x\n",
-				fsc->fs.h_u.tcp_ip4_spec.pdst,
-				fsc->fs.m_u.tcp_ip4_spec.pdst);
-			p += ETH_GSTRING_LEN;
-			num_strings++;
-			sprintf(p, "\tTOS: %d, mask: 0x%x\n",
-				fsc->fs.h_u.tcp_ip4_spec.tos,
-				fsc->fs.m_u.tcp_ip4_spec.tos);
-			p += ETH_GSTRING_LEN;
-			num_strings++;
-			break;
-		case AH_ESP_V4_FLOW:
-		case ESP_V4_FLOW:
-			sprintf(p, "\tSrc IP addr: 0x%x\n",
-				fsc->fs.h_u.ah_ip4_spec.ip4src);
-			p += ETH_GSTRING_LEN;
-			num_strings++;
-			sprintf(p, "\tSrc IP mask: 0x%x\n",
-				fsc->fs.m_u.ah_ip4_spec.ip4src);
-			p += ETH_GSTRING_LEN;
-			num_strings++;
-			sprintf(p, "\tDest IP addr: 0x%x\n",
-				fsc->fs.h_u.ah_ip4_spec.ip4dst);
-			p += ETH_GSTRING_LEN;
-			num_strings++;
-			sprintf(p, "\tDest IP mask: 0x%x\n",
-				fsc->fs.m_u.ah_ip4_spec.ip4dst);
-			p += ETH_GSTRING_LEN;
-			num_strings++;
-			sprintf(p, "\tSPI: %d, mask: 0x%x\n",
-				fsc->fs.h_u.ah_ip4_spec.spi,
-				fsc->fs.m_u.ah_ip4_spec.spi);
-			p += ETH_GSTRING_LEN;
-			num_strings++;
-			sprintf(p, "\tTOS: %d, mask: 0x%x\n",
-				fsc->fs.h_u.ah_ip4_spec.tos,
-				fsc->fs.m_u.ah_ip4_spec.tos);
-			p += ETH_GSTRING_LEN;
-			num_strings++;
-			break;
-		case IP_USER_FLOW:
-			sprintf(p, "\tSrc IP addr: 0x%x\n",
-				fsc->fs.h_u.usr_ip4_spec.ip4src);
-			p += ETH_GSTRING_LEN;
-			num_strings++;
-			sprintf(p, "\tSrc IP mask: 0x%x\n",
-				fsc->fs.m_u.usr_ip4_spec.ip4src);
-			p += ETH_GSTRING_LEN;
-			num_strings++;
-			sprintf(p, "\tDest IP addr: 0x%x\n",
-				fsc->fs.h_u.usr_ip4_spec.ip4dst);
-			p += ETH_GSTRING_LEN;
-			num_strings++;
-			sprintf(p, "\tDest IP mask: 0x%x\n",
-				fsc->fs.m_u.usr_ip4_spec.ip4dst);
-			p += ETH_GSTRING_LEN;
-			num_strings++;
-			break;
-		case IPV4_FLOW:
-			sprintf(p, "\tSrc IP addr: 0x%x\n",
-				fsc->fs.h_u.usr_ip4_spec.ip4src);
-			p += ETH_GSTRING_LEN;
-			num_strings++;
-			sprintf(p, "\tSrc IP mask: 0x%x\n",
-				fsc->fs.m_u.usr_ip4_spec.ip4src);
-			p += ETH_GSTRING_LEN;
-			num_strings++;
-			sprintf(p, "\tDest IP addr: 0x%x\n",
-				fsc->fs.h_u.usr_ip4_spec.ip4dst);
-			p += ETH_GSTRING_LEN;
-			num_strings++;
-			sprintf(p, "\tDest IP mask: 0x%x\n",
-				fsc->fs.m_u.usr_ip4_spec.ip4dst);
-			p += ETH_GSTRING_LEN;
-			num_strings++;
-			sprintf(p, "\tL4 bytes: 0x%x, mask: 0x%x\n",
-				fsc->fs.h_u.usr_ip4_spec.l4_4_bytes,
-				fsc->fs.m_u.usr_ip4_spec.l4_4_bytes);
-			p += ETH_GSTRING_LEN;
-			num_strings++;
-			sprintf(p, "\tTOS: %d, mask: 0x%x\n",
-				fsc->fs.h_u.usr_ip4_spec.tos,
-				fsc->fs.m_u.usr_ip4_spec.tos);
-			p += ETH_GSTRING_LEN;
-			num_strings++;
-			sprintf(p, "\tIP Version: %d, mask: 0x%x\n",
-				fsc->fs.h_u.usr_ip4_spec.ip_ver,
-				fsc->fs.m_u.usr_ip4_spec.ip_ver);
-			p += ETH_GSTRING_LEN;
-			num_strings++;
-			sprintf(p, "\tProtocol: %d, mask: 0x%x\n",
-				fsc->fs.h_u.usr_ip4_spec.proto,
-				fsc->fs.m_u.usr_ip4_spec.proto);
-			p += ETH_GSTRING_LEN;
-			num_strings++;
-			break;
-		}
-		sprintf(p, "\tVLAN: %d, mask: 0x%x\n",
-			fsc->fs.vlan_tag, fsc->fs.vlan_tag_mask);
-		p += ETH_GSTRING_LEN;
-		num_strings++;
-		sprintf(p, "\tUser-defined: 0x%Lx\n", fsc->fs.data);
-		p += ETH_GSTRING_LEN;
-		num_strings++;
-		sprintf(p, "\tUser-defined mask: 0x%Lx\n", fsc->fs.data_mask);
-		p += ETH_GSTRING_LEN;
-		num_strings++;
-		if (fsc->fs.action == ETHTOOL_RXNTUPLE_ACTION_DROP)
-			sprintf(p, "\tAction: Drop\n");
-		else
-			sprintf(p, "\tAction: Direct to queue %d\n",
-				fsc->fs.action);
-		p += ETH_GSTRING_LEN;
-		num_strings++;
-unknown_filter:
-		i++;
-	}
-copy:
-	/* indicate to userspace how many strings we actually have */
-	gstrings.len = num_strings;
-	ret = -EFAULT;
-	if (copy_to_user(useraddr, &gstrings, sizeof(gstrings)))
-		goto out;
-	useraddr += sizeof(gstrings);
-	if (copy_to_user(useraddr, data, gstrings.len * ETH_GSTRING_LEN))
-		goto out;
-	ret = 0;
-
-out:
-	kfree(data);
 	return ret;
 }
 
@@ -2101,9 +1805,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	case ETHTOOL_SRXNTUPLE:
 		rc = ethtool_set_rx_ntuple(dev, useraddr);
 		break;
-	case ETHTOOL_GRXNTUPLE:
-		rc = ethtool_get_rx_ntuple(dev, useraddr);
-		break;
 	case ETHTOOL_GSSET_INFO:
 		rc = ethtool_get_sset_info(dev, useraddr);
 		break;
-- 
1.7.5.2


^ permalink raw reply related

* [net-next 32/40] ixgbe: add support for modifying UDP RSS flow hash options
From: Jeff Kirsher @ 2011-06-07 12:33 UTC (permalink / raw)
  To: davem; +Cc: Alexander Duyck, netdev, gospo, Jeff Kirsher
In-Reply-To: <1307449995-9458-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Alexander Duyck <alexander.h.duyck@intel.com>

This patch adds the ability to add or remove the UDP source and destination
ports from the flow hash generated for RSS.  Currently the UDP flow hash is
always disabled.  By adding support for enabling the UDP flow hash we are
now providing the option of generating an RSS hash based on the UDP source
and destination port numbers.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Evan Swanson <evan.swanson@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ixgbe/ixgbe.h         |    2 +
 drivers/net/ixgbe/ixgbe_ethtool.c |  152 +++++++++++++++++++++++++++++++++++++
 drivers/net/ixgbe/ixgbe_main.c    |    5 +
 3 files changed, 159 insertions(+), 0 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe.h b/drivers/net/ixgbe/ixgbe.h
index d6bfb2f..1d80d77 100644
--- a/drivers/net/ixgbe/ixgbe.h
+++ b/drivers/net/ixgbe/ixgbe.h
@@ -382,6 +382,8 @@ struct ixgbe_adapter {
 #define IXGBE_FLAG2_SFP_NEEDS_RESET             (u32)(1 << 5)
 #define IXGBE_FLAG2_RESET_REQUESTED             (u32)(1 << 6)
 #define IXGBE_FLAG2_FDIR_REQUIRES_REINIT        (u32)(1 << 7)
+#define IXGBE_FLAG2_RSS_FIELD_IPV4_UDP          (u32)(1 << 8)
+#define IXGBE_FLAG2_RSS_FIELD_IPV6_UDP          (u32)(1 << 9)
 
 	unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
 	u16 bd_number;
diff --git a/drivers/net/ixgbe/ixgbe_ethtool.c b/drivers/net/ixgbe/ixgbe_ethtool.c
index f37c9b8..552cb62 100644
--- a/drivers/net/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ixgbe/ixgbe_ethtool.c
@@ -2426,6 +2426,48 @@ static int ixgbe_get_ethtool_fdir_all(struct ixgbe_adapter *adapter,
 	return 0;
 }
 
+static int ixgbe_get_rss_hash_opts(struct ixgbe_adapter *adapter,
+				   struct ethtool_rxnfc *cmd)
+{
+	cmd->data = 0;
+
+	/* if RSS is disabled then report no hashing */
+	if (!(adapter->flags & IXGBE_FLAG_RSS_ENABLED))
+		return 0;
+
+	/* Report default options for RSS on ixgbe */
+	switch (cmd->flow_type) {
+	case TCP_V4_FLOW:
+		cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+	case UDP_V4_FLOW:
+		if (adapter->flags2 & IXGBE_FLAG2_RSS_FIELD_IPV4_UDP)
+			cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+	case SCTP_V4_FLOW:
+	case AH_ESP_V4_FLOW:
+	case AH_V4_FLOW:
+	case ESP_V4_FLOW:
+	case IPV4_FLOW:
+		cmd->data |= RXH_IP_SRC | RXH_IP_DST;
+		break;
+	case TCP_V6_FLOW:
+		cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+	case UDP_V6_FLOW:
+		if (adapter->flags2 & IXGBE_FLAG2_RSS_FIELD_IPV6_UDP)
+			cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+	case SCTP_V6_FLOW:
+	case AH_ESP_V6_FLOW:
+	case AH_V6_FLOW:
+	case ESP_V6_FLOW:
+	case IPV6_FLOW:
+		cmd->data |= RXH_IP_SRC | RXH_IP_DST;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int ixgbe_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
 			   void *rule_locs)
 {
@@ -2448,6 +2490,9 @@ static int ixgbe_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
 		ret = ixgbe_get_ethtool_fdir_all(adapter, cmd,
 						 (u32 *)rule_locs);
 		break;
+	case ETHTOOL_GRXFH:
+		ret = ixgbe_get_rss_hash_opts(adapter, cmd);
+		break;
 	default:
 		break;
 	}
@@ -2680,6 +2725,110 @@ static int ixgbe_del_ethtool_fdir_entry(struct ixgbe_adapter *adapter,
 	return err;
 }
 
+#define UDP_RSS_FLAGS (IXGBE_FLAG2_RSS_FIELD_IPV4_UDP | \
+		       IXGBE_FLAG2_RSS_FIELD_IPV6_UDP)
+static int ixgbe_set_rss_hash_opt(struct ixgbe_adapter *adapter,
+				  struct ethtool_rxnfc *nfc)
+{
+	u32 flags2 = adapter->flags2;
+
+	/*
+	 * RSS does not support anything other than hashing
+	 * to queues on src and dst IPs and ports
+	 */
+	if (nfc->data & ~(RXH_IP_SRC | RXH_IP_DST |
+			  RXH_L4_B_0_1 | RXH_L4_B_2_3))
+		return -EINVAL;
+
+	switch (nfc->flow_type) {
+	case TCP_V4_FLOW:
+	case TCP_V6_FLOW:
+		if (!(nfc->data & RXH_IP_SRC) ||
+		    !(nfc->data & RXH_IP_DST) ||
+		    !(nfc->data & RXH_L4_B_0_1) ||
+		    !(nfc->data & RXH_L4_B_2_3))
+			return -EINVAL;
+		break;
+	case UDP_V4_FLOW:
+		if (!(nfc->data & RXH_IP_SRC) ||
+		    !(nfc->data & RXH_IP_DST))
+			return -EINVAL;
+		switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
+		case 0:
+			flags2 &= ~IXGBE_FLAG2_RSS_FIELD_IPV4_UDP;
+			break;
+		case (RXH_L4_B_0_1 | RXH_L4_B_2_3):
+			flags2 |= IXGBE_FLAG2_RSS_FIELD_IPV4_UDP;
+			break;
+		default:
+			return -EINVAL;
+		}
+		break;
+	case UDP_V6_FLOW:
+		if (!(nfc->data & RXH_IP_SRC) ||
+		    !(nfc->data & RXH_IP_DST))
+			return -EINVAL;
+		switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
+		case 0:
+			flags2 &= ~IXGBE_FLAG2_RSS_FIELD_IPV6_UDP;
+			break;
+		case (RXH_L4_B_0_1 | RXH_L4_B_2_3):
+			flags2 |= IXGBE_FLAG2_RSS_FIELD_IPV6_UDP;
+			break;
+		default:
+			return -EINVAL;
+		}
+		break;
+	case AH_ESP_V4_FLOW:
+	case AH_V4_FLOW:
+	case ESP_V4_FLOW:
+	case SCTP_V4_FLOW:
+	case AH_ESP_V6_FLOW:
+	case AH_V6_FLOW:
+	case ESP_V6_FLOW:
+	case SCTP_V6_FLOW:
+		if (!(nfc->data & RXH_IP_SRC) ||
+		    !(nfc->data & RXH_IP_DST))
+			return -EINVAL;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* if we changed something we need to update flags */
+	if (flags2 != adapter->flags2) {
+		struct ixgbe_hw *hw = &adapter->hw;
+		u32 mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
+
+		if ((flags2 & UDP_RSS_FLAGS) &&
+		    !(adapter->flags2 & UDP_RSS_FLAGS))
+			netdev_warn(adapter->netdev,
+				"enabling UDP RSS: fragmented packets may "
+				"arrive out of order to the stack above\n");
+
+		adapter->flags2 = flags2;
+
+		/* Perform hash on these packet types */
+		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4
+		      | IXGBE_MRQC_RSS_FIELD_IPV4_TCP
+		      | IXGBE_MRQC_RSS_FIELD_IPV6
+		      | IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
+
+		mrqc &= ~(IXGBE_MRQC_RSS_FIELD_IPV4_UDP |
+			  IXGBE_MRQC_RSS_FIELD_IPV6_UDP);
+
+		if (flags2 & IXGBE_FLAG2_RSS_FIELD_IPV4_UDP)
+			mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
+
+		if (flags2 & IXGBE_FLAG2_RSS_FIELD_IPV6_UDP)
+			mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
+
+		IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
+	}
+
+	return 0;
+}
+
 static int ixgbe_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(dev);
@@ -2692,6 +2841,9 @@ static int ixgbe_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
 	case ETHTOOL_SRXCLSRLDEL:
 		ret = ixgbe_del_ethtool_fdir_entry(adapter, cmd);
 		break;
+	case ETHTOOL_SRXFH:
+		ret = ixgbe_set_rss_hash_opt(adapter, cmd);
+		break;
 	default:
 		break;
 	}
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index 472b2c0..c0deed8 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -2935,6 +2935,11 @@ static void ixgbe_setup_mrqc(struct ixgbe_adapter *adapter)
 	      | IXGBE_MRQC_RSS_FIELD_IPV6
 	      | IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
 
+	if (adapter->flags2 & IXGBE_FLAG2_RSS_FIELD_IPV4_UDP)
+		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
+	if (adapter->flags2 & IXGBE_FLAG2_RSS_FIELD_IPV6_UDP)
+		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
+
 	IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
 }
 
-- 
1.7.5.2


^ permalink raw reply related

* [net-next 35/40] ixgbe: disable RSC when Rx checksum is off
From: Jeff Kirsher @ 2011-06-07 12:33 UTC (permalink / raw)
  To: davem; +Cc: Emil Tantilov, netdev, gospo, Jeff Kirsher
In-Reply-To: <1307449995-9458-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Emil Tantilov <emil.s.tantilov@intel.com>

Disabling Rx checksumming leads to performance degradation due to
RSC causing packets to have incorrect checksums.

Signed-off-by: Emil Tantilov <emil.s.tantilov@intel.com>
Tested-by: Evan Swanson <evan.swanson@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ixgbe/ixgbe_ethtool.c |   44 +++++++++++++++++++++++++++---------
 1 files changed, 33 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_ethtool.c b/drivers/net/ixgbe/ixgbe_ethtool.c
index 3dd0f82..f96193d 100644
--- a/drivers/net/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ixgbe/ixgbe_ethtool.c
@@ -457,17 +457,6 @@ static u32 ixgbe_get_rx_csum(struct net_device *netdev)
 	return adapter->flags & IXGBE_FLAG_RX_CSUM_ENABLED;
 }
 
-static int ixgbe_set_rx_csum(struct net_device *netdev, u32 data)
-{
-	struct ixgbe_adapter *adapter = netdev_priv(netdev);
-	if (data)
-		adapter->flags |= IXGBE_FLAG_RX_CSUM_ENABLED;
-	else
-		adapter->flags &= ~IXGBE_FLAG_RX_CSUM_ENABLED;
-
-	return 0;
-}
-
 static void ixgbe_set_rsc(struct ixgbe_adapter *adapter)
 {
 	int i;
@@ -483,6 +472,39 @@ static void ixgbe_set_rsc(struct ixgbe_adapter *adapter)
 	}
 }
 
+static int ixgbe_set_rx_csum(struct net_device *netdev, u32 data)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	bool need_reset = false;
+
+	if (data) {
+		adapter->flags |= IXGBE_FLAG_RX_CSUM_ENABLED;
+	} else {
+		adapter->flags &= ~IXGBE_FLAG_RX_CSUM_ENABLED;
+
+		if (adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE) {
+			adapter->flags2 &= ~IXGBE_FLAG2_RSC_ENABLED;
+			netdev->features &= ~NETIF_F_LRO;
+		}
+
+		switch (adapter->hw.mac.type) {
+		case ixgbe_mac_X540:
+			ixgbe_set_rsc(adapter);
+			break;
+		case ixgbe_mac_82599EB:
+			need_reset = true;
+			break;
+		default:
+			break;
+		}
+	}
+
+	if (need_reset)
+		ixgbe_do_reset(netdev);
+
+	return 0;
+}
+
 static u32 ixgbe_get_tx_csum(struct net_device *netdev)
 {
 	return (netdev->features & NETIF_F_IP_CSUM) != 0;
-- 
1.7.5.2


^ permalink raw reply related

* [net-next 33/40] ixgbe: move setting RSC into a separate function
From: Jeff Kirsher @ 2011-06-07 12:33 UTC (permalink / raw)
  To: davem; +Cc: Emil Tantilov, netdev, gospo, Jeff Kirsher
In-Reply-To: <1307449995-9458-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Emil Tantilov <emil.s.tantilov@intel.com>

Move setting RSC into a separate function
to allow for reuse in other parts of the code.

Signed-off-by: Emil Tantilov <emil.s.tantilov@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ixgbe/ixgbe_ethtool.c |   34 ++++++++++++++++++----------------
 1 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_ethtool.c b/drivers/net/ixgbe/ixgbe_ethtool.c
index 552cb62..85a77ce 100644
--- a/drivers/net/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ixgbe/ixgbe_ethtool.c
@@ -458,6 +458,21 @@ static int ixgbe_set_rx_csum(struct net_device *netdev, u32 data)
 	return 0;
 }
 
+static void ixgbe_set_rsc(struct ixgbe_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_rx_queues; i++) {
+		struct ixgbe_ring *ring = adapter->rx_ring[i];
+		if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) {
+			set_ring_rsc_enabled(ring);
+			ixgbe_configure_rscctl(adapter, ring);
+		} else {
+			ixgbe_clear_rscctl(adapter, ring);
+		}
+	}
+}
+
 static u32 ixgbe_get_tx_csum(struct net_device *netdev)
 {
 	return (netdev->features & NETIF_F_IP_CSUM) != 0;
@@ -2280,25 +2295,12 @@ static int ixgbe_set_flags(struct net_device *netdev, u32 data)
 		} else {
 			adapter->flags2 ^= IXGBE_FLAG2_RSC_ENABLED;
 			switch (adapter->hw.mac.type) {
+			case ixgbe_mac_X540:
+				ixgbe_set_rsc(adapter);
+				break;
 			case ixgbe_mac_82599EB:
 				need_reset = true;
 				break;
-			case ixgbe_mac_X540: {
-				int i;
-				for (i = 0; i < adapter->num_rx_queues; i++) {
-					struct ixgbe_ring *ring =
-					                  adapter->rx_ring[i];
-					if (adapter->flags2 &
-					    IXGBE_FLAG2_RSC_ENABLED) {
-						ixgbe_configure_rscctl(adapter,
-						                       ring);
-					} else {
-						ixgbe_clear_rscctl(adapter,
-						                   ring);
-					}
-				}
-			}
-				break;
 			default:
 				break;
 			}
-- 
1.7.5.2


^ permalink raw reply related

* [net-next 34/40] ixgbe: move reset code into a separate function
From: Jeff Kirsher @ 2011-06-07 12:33 UTC (permalink / raw)
  To: davem; +Cc: Emil Tantilov, netdev, gospo, Jeff Kirsher
In-Reply-To: <1307449995-9458-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Emil Tantilov <emil.s.tantilov@intel.com>

Move reset code into a separate function
to allow for reuse in other parts of the code.

Signed-off-by: Emil Tantilov <emil.s.tantilov@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ixgbe/ixgbe_ethtool.c |   26 ++++++++++++++------------
 1 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_ethtool.c b/drivers/net/ixgbe/ixgbe_ethtool.c
index 85a77ce..3dd0f82 100644
--- a/drivers/net/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ixgbe/ixgbe_ethtool.c
@@ -441,6 +441,16 @@ static int ixgbe_set_pauseparam(struct net_device *netdev,
 	return 0;
 }
 
+static void ixgbe_do_reset(struct net_device *netdev)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	if (netif_running(netdev))
+		ixgbe_reinit_locked(adapter);
+	else
+		ixgbe_reset(adapter);
+}
+
 static u32 ixgbe_get_rx_csum(struct net_device *netdev)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
@@ -2248,12 +2258,8 @@ static int ixgbe_set_coalesce(struct net_device *netdev,
 	 * correctly w.r.t stopping tx, and changing TXDCTL.WTHRESH settings
 	 * also locks in RSC enable/disable which requires reset
 	 */
-	if (need_reset) {
-		if (netif_running(netdev))
-			ixgbe_reinit_locked(adapter);
-		else
-			ixgbe_reset(adapter);
-	}
+	if (need_reset)
+		ixgbe_do_reset(netdev);
 
 	return 0;
 }
@@ -2327,12 +2333,8 @@ static int ixgbe_set_flags(struct net_device *netdev, u32 data)
 		need_reset = true;
 	}
 
-	if (need_reset) {
-		if (netif_running(netdev))
-			ixgbe_reinit_locked(adapter);
-		else
-			ixgbe_reset(adapter);
-	}
+	if (need_reset)
+		ixgbe_do_reset(netdev);
 
 	return 0;
 }
-- 
1.7.5.2


^ permalink raw reply related

* [net-next 36/40] ixgbe: fix ring assignment issues for SR-IOV and drop cases
From: Jeff Kirsher @ 2011-06-07 12:33 UTC (permalink / raw)
  To: davem; +Cc: Alexander Duyck, netdev, gospo, Jeff Kirsher
In-Reply-To: <1307449995-9458-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Alexander Duyck <alexander.h.duyck@intel.com>

This change fixes the fact that we would trigger a null pointer dereference
or specify the wrong ring if the rings were restored.  This change makes
certain that the DROP queue is a static value, and all other rings are
based on the ring offsets for the PF.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Ross Brattain <ross.b.brattain@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ixgbe/ixgbe_ethtool.c |    2 ++
 drivers/net/ixgbe/ixgbe_main.c    |    8 +++++---
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_ethtool.c b/drivers/net/ixgbe/ixgbe_ethtool.c
index f96193d..74a53ca 100644
--- a/drivers/net/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ixgbe/ixgbe_ethtool.c
@@ -2721,6 +2721,8 @@ static int ixgbe_add_ethtool_fdir_entry(struct ixgbe_adapter *adapter,
 	/* program filters to filter memory */
 	err = ixgbe_fdir_write_perfect_filter_82599(hw,
 				&input->filter, input->sw_idx,
+				(input->action == IXGBE_FDIR_DROP_QUEUE) ?
+				IXGBE_FDIR_DROP_QUEUE :
 				adapter->rx_ring[input->action]->reg_idx);
 	if (err)
 		goto err_out_w_lock;
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index c0deed8..a5425e8 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -3759,9 +3759,11 @@ static void ixgbe_fdir_filter_restore(struct ixgbe_adapter *adapter)
 	hlist_for_each_entry_safe(filter, node, node2,
 				  &adapter->fdir_filter_list, fdir_node) {
 		ixgbe_fdir_write_perfect_filter_82599(hw,
-						      &filter->filter,
-						      filter->sw_idx,
-						      filter->action);
+				&filter->filter,
+				filter->sw_idx,
+				(filter->action == IXGBE_FDIR_DROP_QUEUE) ?
+				IXGBE_FDIR_DROP_QUEUE :
+				adapter->rx_ring[filter->action]->reg_idx);
 	}
 
 	spin_unlock(&adapter->fdir_perfect_lock);
-- 
1.7.5.2


^ permalink raw reply related

* [net-next 37/40] rtnetlink: Compute and store minimum ifinfo dump size
From: Jeff Kirsher @ 2011-06-07 12:33 UTC (permalink / raw)
  To: davem; +Cc: Greg Rose, netdev, gospo, Jeff Kirsher
In-Reply-To: <1307449995-9458-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Greg Rose <gregory.v.rose@intel.com>

The message size allocated for rtnl ifinfo dumps was limited to
a single page.  This is not enough for additional interface info
available with devices that support SR-IOV and caused a bug in
which VF info would not be displayed if more than approximately
40 VFs were created per interface.

Implement a new function pointer for the rtnl_register service that will
calculate the amount of data required for the ifinfo dump and allocate
enough data to satisfy the request.

Signed-off-by: Greg Rose <gregory.v.rose@intel.com>
Tested-by: Evan Swanson <evan.swanson@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 include/linux/netlink.h              |    6 ++-
 include/net/rtnetlink.h              |    7 +++-
 net/bridge/br_netlink.c              |   15 ++++++---
 net/core/fib_rules.c                 |    6 ++--
 net/core/neighbour.c                 |   11 +++---
 net/core/rtnetlink.c                 |   60 +++++++++++++++++++++++++++------
 net/dcb/dcbnl.c                      |    4 +-
 net/decnet/dn_dev.c                  |    6 ++--
 net/decnet/dn_fib.c                  |    4 +-
 net/decnet/dn_route.c                |    5 ++-
 net/ipv4/devinet.c                   |    6 ++--
 net/ipv4/fib_frontend.c              |    6 ++--
 net/ipv4/inet_diag.c                 |    2 +-
 net/ipv4/ipmr.c                      |    3 +-
 net/ipv4/route.c                     |    2 +-
 net/ipv6/addrconf.c                  |   16 ++++++---
 net/ipv6/addrlabel.c                 |    9 +++--
 net/ipv6/ip6_fib.c                   |    3 +-
 net/ipv6/ip6mr.c                     |    3 +-
 net/ipv6/route.c                     |    6 ++--
 net/netfilter/ipset/ip_set_core.c    |    2 +-
 net/netfilter/nf_conntrack_netlink.c |    4 +-
 net/netlink/af_netlink.c             |   17 ++++++---
 net/netlink/genetlink.c              |    2 +-
 net/phonet/pn_netlink.c              |   13 ++++---
 net/sched/act_api.c                  |    7 ++--
 net/sched/cls_api.c                  |    6 ++--
 net/sched/sch_api.c                  |   12 +++---
 net/xfrm/xfrm_user.c                 |    3 +-
 29 files changed, 157 insertions(+), 89 deletions(-)

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index a9dd895..fdd0188 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -221,7 +221,8 @@ struct netlink_callback {
 	int			(*dump)(struct sk_buff * skb,
 					struct netlink_callback *cb);
 	int			(*done)(struct netlink_callback *cb);
-	int			family;
+	u16			family;
+	u16			min_dump_alloc;
 	long			args[6];
 };
 
@@ -259,7 +260,8 @@ __nlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, int type, int len, int flags)
 extern int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
 			      const struct nlmsghdr *nlh,
 			      int (*dump)(struct sk_buff *skb, struct netlink_callback*),
-			      int (*done)(struct netlink_callback*));
+			      int (*done)(struct netlink_callback*),
+			      u16 min_dump_alloc);
 
 
 #define NL_NONROOT_RECV 0x1
diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index 4093ca7..678f1ff 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -6,11 +6,14 @@
 
 typedef int (*rtnl_doit_func)(struct sk_buff *, struct nlmsghdr *, void *);
 typedef int (*rtnl_dumpit_func)(struct sk_buff *, struct netlink_callback *);
+typedef u16 (*rtnl_calcit_func)(struct sk_buff *);
 
 extern int	__rtnl_register(int protocol, int msgtype,
-				rtnl_doit_func, rtnl_dumpit_func);
+				rtnl_doit_func, rtnl_dumpit_func,
+				rtnl_calcit_func);
 extern void	rtnl_register(int protocol, int msgtype,
-			      rtnl_doit_func, rtnl_dumpit_func);
+			      rtnl_doit_func, rtnl_dumpit_func,
+			      rtnl_calcit_func);
 extern int	rtnl_unregister(int protocol, int msgtype);
 extern void	rtnl_unregister_all(int protocol);
 
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index ffb0dc4..6814083 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -218,19 +218,24 @@ int __init br_netlink_init(void)
 	if (err < 0)
 		goto err1;
 
-	err = __rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, br_dump_ifinfo);
+	err = __rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL,
+			      br_dump_ifinfo, NULL);
 	if (err)
 		goto err2;
-	err = __rtnl_register(PF_BRIDGE, RTM_SETLINK, br_rtm_setlink, NULL);
+	err = __rtnl_register(PF_BRIDGE, RTM_SETLINK,
+			      br_rtm_setlink, NULL, NULL);
 	if (err)
 		goto err3;
-	err = __rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, br_fdb_add, NULL);
+	err = __rtnl_register(PF_BRIDGE, RTM_NEWNEIGH,
+			      br_fdb_add, NULL, NULL);
 	if (err)
 		goto err3;
-	err = __rtnl_register(PF_BRIDGE, RTM_DELNEIGH, br_fdb_delete, NULL);
+	err = __rtnl_register(PF_BRIDGE, RTM_DELNEIGH,
+			      br_fdb_delete, NULL, NULL);
 	if (err)
 		goto err3;
-	err = __rtnl_register(PF_BRIDGE, RTM_GETNEIGH, NULL, br_fdb_dump);
+	err = __rtnl_register(PF_BRIDGE, RTM_GETNEIGH,
+			      NULL, br_fdb_dump, NULL);
 	if (err)
 		goto err3;
 
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 008dc70..e7ab0c0 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -740,9 +740,9 @@ static struct pernet_operations fib_rules_net_ops = {
 static int __init fib_rules_init(void)
 {
 	int err;
-	rtnl_register(PF_UNSPEC, RTM_NEWRULE, fib_nl_newrule, NULL);
-	rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL);
-	rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule);
+	rtnl_register(PF_UNSPEC, RTM_NEWRULE, fib_nl_newrule, NULL, NULL);
+	rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL, NULL);
+	rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule, NULL);
 
 	err = register_pernet_subsys(&fib_rules_net_ops);
 	if (err < 0)
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 799f06e..ceb505b 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -2909,12 +2909,13 @@ EXPORT_SYMBOL(neigh_sysctl_unregister);
 
 static int __init neigh_init(void)
 {
-	rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL);
-	rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL);
-	rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info);
+	rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL);
+	rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL);
+	rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL);
 
-	rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info);
-	rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL);
+	rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
+		      NULL);
+	rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL);
 
 	return 0;
 }
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index abd936d..a798fc6 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -56,9 +56,11 @@
 struct rtnl_link {
 	rtnl_doit_func		doit;
 	rtnl_dumpit_func	dumpit;
+	rtnl_calcit_func 	calcit;
 };
 
 static DEFINE_MUTEX(rtnl_mutex);
+static u16 min_ifinfo_dump_size;
 
 void rtnl_lock(void)
 {
@@ -144,12 +146,28 @@ static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex)
 	return tab ? tab[msgindex].dumpit : NULL;
 }
 
+static rtnl_calcit_func rtnl_get_calcit(int protocol, int msgindex)
+{
+	struct rtnl_link *tab;
+
+	if (protocol <= RTNL_FAMILY_MAX)
+		tab = rtnl_msg_handlers[protocol];
+	else
+		tab = NULL;
+
+	if (tab == NULL || tab[msgindex].calcit == NULL)
+		tab = rtnl_msg_handlers[PF_UNSPEC];
+
+	return tab ? tab[msgindex].calcit : NULL;
+}
+
 /**
  * __rtnl_register - Register a rtnetlink message type
  * @protocol: Protocol family or PF_UNSPEC
  * @msgtype: rtnetlink message type
  * @doit: Function pointer called for each request message
  * @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message
+ * @calcit: Function pointer to calc size of dump message
  *
  * Registers the specified function pointers (at least one of them has
  * to be non-NULL) to be called whenever a request message for the
@@ -162,7 +180,8 @@ static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex)
  * Returns 0 on success or a negative error code.
  */
 int __rtnl_register(int protocol, int msgtype,
-		    rtnl_doit_func doit, rtnl_dumpit_func dumpit)
+		    rtnl_doit_func doit, rtnl_dumpit_func dumpit,
+		    rtnl_calcit_func calcit)
 {
 	struct rtnl_link *tab;
 	int msgindex;
@@ -185,6 +204,9 @@ int __rtnl_register(int protocol, int msgtype,
 	if (dumpit)
 		tab[msgindex].dumpit = dumpit;
 
+	if (calcit)
+		tab[msgindex].calcit = calcit;
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(__rtnl_register);
@@ -199,9 +221,10 @@ EXPORT_SYMBOL_GPL(__rtnl_register);
  * of memory implies no sense in continuing.
  */
 void rtnl_register(int protocol, int msgtype,
-		   rtnl_doit_func doit, rtnl_dumpit_func dumpit)
+		   rtnl_doit_func doit, rtnl_dumpit_func dumpit,
+		   rtnl_calcit_func calcit)
 {
-	if (__rtnl_register(protocol, msgtype, doit, dumpit) < 0)
+	if (__rtnl_register(protocol, msgtype, doit, dumpit, calcit) < 0)
 		panic("Unable to register rtnetlink message handler, "
 		      "protocol = %d, message type = %d\n",
 		      protocol, msgtype);
@@ -1818,6 +1841,11 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 	return err;
 }
 
+static u16 rtnl_calcit(struct sk_buff *skb)
+{
+	return min_ifinfo_dump_size;
+}
+
 static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	int idx;
@@ -1847,11 +1875,14 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change)
 	struct net *net = dev_net(dev);
 	struct sk_buff *skb;
 	int err = -ENOBUFS;
+	size_t if_info_size;
 
-	skb = nlmsg_new(if_nlmsg_size(dev), GFP_KERNEL);
+	skb = nlmsg_new((if_info_size = if_nlmsg_size(dev)), GFP_KERNEL);
 	if (skb == NULL)
 		goto errout;
 
+	min_ifinfo_dump_size = max_t(u16, if_info_size, min_ifinfo_dump_size);
+
 	err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0);
 	if (err < 0) {
 		/* -EMSGSIZE implies BUG in if_nlmsg_size() */
@@ -1902,14 +1933,20 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 	if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) {
 		struct sock *rtnl;
 		rtnl_dumpit_func dumpit;
+		rtnl_calcit_func calcit;
+		u16 min_dump_alloc = 0;
 
 		dumpit = rtnl_get_dumpit(family, type);
 		if (dumpit == NULL)
 			return -EOPNOTSUPP;
+		calcit = rtnl_get_calcit(family, type);
+		if (calcit)
+			min_dump_alloc = calcit(skb);
 
 		__rtnl_unlock();
 		rtnl = net->rtnl;
-		err = netlink_dump_start(rtnl, skb, nlh, dumpit, NULL);
+		err = netlink_dump_start(rtnl, skb, nlh, dumpit,
+					 NULL, min_dump_alloc);
 		rtnl_lock();
 		return err;
 	}
@@ -2019,12 +2056,13 @@ void __init rtnetlink_init(void)
 	netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV);
 	register_netdevice_notifier(&rtnetlink_dev_notifier);
 
-	rtnl_register(PF_UNSPEC, RTM_GETLINK, rtnl_getlink, rtnl_dump_ifinfo);
-	rtnl_register(PF_UNSPEC, RTM_SETLINK, rtnl_setlink, NULL);
-	rtnl_register(PF_UNSPEC, RTM_NEWLINK, rtnl_newlink, NULL);
-	rtnl_register(PF_UNSPEC, RTM_DELLINK, rtnl_dellink, NULL);
+	rtnl_register(PF_UNSPEC, RTM_GETLINK, rtnl_getlink,
+		      rtnl_dump_ifinfo, rtnl_calcit);
+	rtnl_register(PF_UNSPEC, RTM_SETLINK, rtnl_setlink, NULL, NULL);
+	rtnl_register(PF_UNSPEC, RTM_NEWLINK, rtnl_newlink, NULL, NULL);
+	rtnl_register(PF_UNSPEC, RTM_DELLINK, rtnl_dellink, NULL, NULL);
 
-	rtnl_register(PF_UNSPEC, RTM_GETADDR, NULL, rtnl_dump_all);
-	rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all);
+	rtnl_register(PF_UNSPEC, RTM_GETADDR, NULL, rtnl_dump_all, NULL);
+	rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all, NULL);
 }
 
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 3609eac..ed1bb8c 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -1819,8 +1819,8 @@ static int __init dcbnl_init(void)
 {
 	INIT_LIST_HEAD(&dcb_app_list);
 
-	rtnl_register(PF_UNSPEC, RTM_GETDCB, dcb_doit, NULL);
-	rtnl_register(PF_UNSPEC, RTM_SETDCB, dcb_doit, NULL);
+	rtnl_register(PF_UNSPEC, RTM_GETDCB, dcb_doit, NULL, NULL);
+	rtnl_register(PF_UNSPEC, RTM_SETDCB, dcb_doit, NULL, NULL);
 
 	return 0;
 }
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index cf26ac7..3780fd6 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -1414,9 +1414,9 @@ void __init dn_dev_init(void)
 
 	dn_dev_devices_on();
 
-	rtnl_register(PF_DECnet, RTM_NEWADDR, dn_nl_newaddr, NULL);
-	rtnl_register(PF_DECnet, RTM_DELADDR, dn_nl_deladdr, NULL);
-	rtnl_register(PF_DECnet, RTM_GETADDR, NULL, dn_nl_dump_ifaddr);
+	rtnl_register(PF_DECnet, RTM_NEWADDR, dn_nl_newaddr, NULL, NULL);
+	rtnl_register(PF_DECnet, RTM_DELADDR, dn_nl_deladdr, NULL, NULL);
+	rtnl_register(PF_DECnet, RTM_GETADDR, NULL, dn_nl_dump_ifaddr, NULL);
 
 	proc_net_fops_create(&init_net, "decnet_dev", S_IRUGO, &dn_dev_seq_fops);
 
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
index 1c74ed3..104324d 100644
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c
@@ -763,8 +763,8 @@ void __init dn_fib_init(void)
 
 	register_dnaddr_notifier(&dn_fib_dnaddr_notifier);
 
-	rtnl_register(PF_DECnet, RTM_NEWROUTE, dn_fib_rtm_newroute, NULL);
-	rtnl_register(PF_DECnet, RTM_DELROUTE, dn_fib_rtm_delroute, NULL);
+	rtnl_register(PF_DECnet, RTM_NEWROUTE, dn_fib_rtm_newroute, NULL, NULL);
+	rtnl_register(PF_DECnet, RTM_DELROUTE, dn_fib_rtm_delroute, NULL, NULL);
 }
 
 
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 74544bc..2949ca4 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1841,10 +1841,11 @@ void __init dn_route_init(void)
 	proc_net_fops_create(&init_net, "decnet_cache", S_IRUGO, &dn_rt_cache_seq_fops);
 
 #ifdef CONFIG_DECNET_ROUTER
-	rtnl_register(PF_DECnet, RTM_GETROUTE, dn_cache_getroute, dn_fib_dump);
+	rtnl_register(PF_DECnet, RTM_GETROUTE, dn_cache_getroute,
+		      dn_fib_dump, NULL);
 #else
 	rtnl_register(PF_DECnet, RTM_GETROUTE, dn_cache_getroute,
-		      dn_cache_dump);
+		      dn_cache_dump, NULL);
 #endif
 }
 
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 0d4a184..37b3c18 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1833,8 +1833,8 @@ void __init devinet_init(void)
 
 	rtnl_af_register(&inet_af_ops);
 
-	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
-	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
-	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
+	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
+	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
+	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
 }
 
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 2252471..92fc5f6 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1124,9 +1124,9 @@ static struct pernet_operations fib_net_ops = {
 
 void __init ip_fib_init(void)
 {
-	rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
-	rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
-	rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
+	rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, NULL);
+	rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, NULL);
+	rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, NULL);
 
 	register_pernet_subsys(&fib_net_ops);
 	register_netdevice_notifier(&fib_netdev_notifier);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 6ffe94c..5ff4765 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -871,7 +871,7 @@ static int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 		}
 
 		return netlink_dump_start(idiagnl, skb, nlh,
-					  inet_diag_dump, NULL);
+					  inet_diag_dump, NULL, 0);
 	}
 
 	return inet_diag_get_exact(skb, nlh);
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 30a7763..aae2bd8 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -2544,7 +2544,8 @@ int __init ip_mr_init(void)
 		goto add_proto_fail;
 	}
 #endif
-	rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE, NULL, ipmr_rtm_dumproute);
+	rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE,
+		      NULL, ipmr_rtm_dumproute, NULL);
 	return 0;
 
 #ifdef CONFIG_IP_PIMSM_V2
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 52b0b95..aa29c62 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -3295,7 +3295,7 @@ int __init ip_rt_init(void)
 	xfrm_init();
 	xfrm4_init(ip_rt_max_size);
 #endif
-	rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL);
+	rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL, NULL);
 
 #ifdef CONFIG_SYSCTL
 	register_pernet_subsys(&sysctl_route_ops);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 498b927..954772b 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -4692,16 +4692,20 @@ int __init addrconf_init(void)
 	if (err < 0)
 		goto errout_af;
 
-	err = __rtnl_register(PF_INET6, RTM_GETLINK, NULL, inet6_dump_ifinfo);
+	err = __rtnl_register(PF_INET6, RTM_GETLINK, NULL, inet6_dump_ifinfo,
+			      NULL);
 	if (err < 0)
 		goto errout;
 
 	/* Only the first call to __rtnl_register can fail */
-	__rtnl_register(PF_INET6, RTM_NEWADDR, inet6_rtm_newaddr, NULL);
-	__rtnl_register(PF_INET6, RTM_DELADDR, inet6_rtm_deladdr, NULL);
-	__rtnl_register(PF_INET6, RTM_GETADDR, inet6_rtm_getaddr, inet6_dump_ifaddr);
-	__rtnl_register(PF_INET6, RTM_GETMULTICAST, NULL, inet6_dump_ifmcaddr);
-	__rtnl_register(PF_INET6, RTM_GETANYCAST, NULL, inet6_dump_ifacaddr);
+	__rtnl_register(PF_INET6, RTM_NEWADDR, inet6_rtm_newaddr, NULL, NULL);
+	__rtnl_register(PF_INET6, RTM_DELADDR, inet6_rtm_deladdr, NULL, NULL);
+	__rtnl_register(PF_INET6, RTM_GETADDR, inet6_rtm_getaddr,
+			inet6_dump_ifaddr, NULL);
+	__rtnl_register(PF_INET6, RTM_GETMULTICAST, NULL,
+			inet6_dump_ifmcaddr, NULL);
+	__rtnl_register(PF_INET6, RTM_GETANYCAST, NULL,
+			inet6_dump_ifacaddr, NULL);
 
 	ipv6_addr_label_rtnl_register();
 
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index c8993e5..2d8ddba 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -592,8 +592,11 @@ out:
 
 void __init ipv6_addr_label_rtnl_register(void)
 {
-	__rtnl_register(PF_INET6, RTM_NEWADDRLABEL, ip6addrlbl_newdel, NULL);
-	__rtnl_register(PF_INET6, RTM_DELADDRLABEL, ip6addrlbl_newdel, NULL);
-	__rtnl_register(PF_INET6, RTM_GETADDRLABEL, ip6addrlbl_get, ip6addrlbl_dump);
+	__rtnl_register(PF_INET6, RTM_NEWADDRLABEL, ip6addrlbl_newdel,
+			NULL, NULL);
+	__rtnl_register(PF_INET6, RTM_DELADDRLABEL, ip6addrlbl_newdel,
+			NULL, NULL);
+	__rtnl_register(PF_INET6, RTM_GETADDRLABEL, ip6addrlbl_get,
+			ip6addrlbl_dump, NULL);
 }
 
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 4076a0b..3030bdf 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -1586,7 +1586,8 @@ int __init fib6_init(void)
 	if (ret)
 		goto out_kmem_cache_create;
 
-	ret = __rtnl_register(PF_INET6, RTM_GETROUTE, NULL, inet6_dump_fib);
+	ret = __rtnl_register(PF_INET6, RTM_GETROUTE, NULL, inet6_dump_fib,
+			      NULL);
 	if (ret)
 		goto out_unregister_subsys;
 out:
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 82a8099..705c828 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1354,7 +1354,8 @@ int __init ip6_mr_init(void)
 		goto add_proto_fail;
 	}
 #endif
-	rtnl_register(RTNL_FAMILY_IP6MR, RTM_GETROUTE, NULL, ip6mr_rtm_dumproute);
+	rtnl_register(RTNL_FAMILY_IP6MR, RTM_GETROUTE, NULL,
+		      ip6mr_rtm_dumproute, NULL);
 	return 0;
 #ifdef CONFIG_IPV6_PIMSM_V2
 add_proto_fail:
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index de2b1de..216ff31 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2925,9 +2925,9 @@ int __init ip6_route_init(void)
 		goto xfrm6_init;
 
 	ret = -ENOBUFS;
-	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
-	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
-	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
+	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
+	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
+	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
 		goto fib6_rules_init;
 
 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 8041bef..333b0be 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -1120,7 +1120,7 @@ ip_set_dump(struct sock *ctnl, struct sk_buff *skb,
 
 	return netlink_dump_start(ctnl, skb, nlh,
 				  ip_set_dump_start,
-				  ip_set_dump_done);
+				  ip_set_dump_done, 0);
 }
 
 /* Add, del and test */
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 482e90c..7dec88a 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -970,7 +970,7 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
 
 	if (nlh->nlmsg_flags & NLM_F_DUMP)
 		return netlink_dump_start(ctnl, skb, nlh, ctnetlink_dump_table,
-					  ctnetlink_done);
+					  ctnetlink_done, 0);
 
 	err = ctnetlink_parse_zone(cda[CTA_ZONE], &zone);
 	if (err < 0)
@@ -1840,7 +1840,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
 	if (nlh->nlmsg_flags & NLM_F_DUMP) {
 		return netlink_dump_start(ctnl, skb, nlh,
 					  ctnetlink_exp_dump_table,
-					  ctnetlink_exp_done);
+					  ctnetlink_exp_done, 0);
 	}
 
 	err = ctnetlink_parse_zone(cda[CTA_EXPECT_ZONE], &zone);
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 6ef64ad..0b92f7549 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1659,13 +1659,10 @@ static int netlink_dump(struct sock *sk)
 {
 	struct netlink_sock *nlk = nlk_sk(sk);
 	struct netlink_callback *cb;
-	struct sk_buff *skb;
+	struct sk_buff *skb = NULL;
 	struct nlmsghdr *nlh;
 	int len, err = -ENOBUFS;
-
-	skb = sock_rmalloc(sk, NLMSG_GOODSIZE, 0, GFP_KERNEL);
-	if (!skb)
-		goto errout;
+	int alloc_size;
 
 	mutex_lock(nlk->cb_mutex);
 
@@ -1675,6 +1672,12 @@ static int netlink_dump(struct sock *sk)
 		goto errout_skb;
 	}
 
+	alloc_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE);
+
+	skb = sock_rmalloc(sk, alloc_size, 0, GFP_KERNEL);
+	if (!skb)
+		goto errout;
+
 	len = cb->dump(skb, cb);
 
 	if (len > 0) {
@@ -1721,7 +1724,8 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
 		       const struct nlmsghdr *nlh,
 		       int (*dump)(struct sk_buff *skb,
 				   struct netlink_callback *),
-		       int (*done)(struct netlink_callback *))
+		       int (*done)(struct netlink_callback *),
+		       u16 min_dump_alloc)
 {
 	struct netlink_callback *cb;
 	struct sock *sk;
@@ -1735,6 +1739,7 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
 	cb->dump = dump;
 	cb->done = done;
 	cb->nlh = nlh;
+	cb->min_dump_alloc = min_dump_alloc;
 	atomic_inc(&skb->users);
 	cb->skb = skb;
 
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 1781d99..482fa57 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -525,7 +525,7 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 
 		genl_unlock();
 		err = netlink_dump_start(net->genl_sock, skb, nlh,
-					 ops->dumpit, ops->done);
+					 ops->dumpit, ops->done, 0);
 		genl_lock();
 		return err;
 	}
diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c
index 438accb..d61f676 100644
--- a/net/phonet/pn_netlink.c
+++ b/net/phonet/pn_netlink.c
@@ -289,15 +289,16 @@ out:
 
 int __init phonet_netlink_register(void)
 {
-	int err = __rtnl_register(PF_PHONET, RTM_NEWADDR, addr_doit, NULL);
+	int err = __rtnl_register(PF_PHONET, RTM_NEWADDR, addr_doit,
+				  NULL, NULL);
 	if (err)
 		return err;
 
 	/* Further __rtnl_register() cannot fail */
-	__rtnl_register(PF_PHONET, RTM_DELADDR, addr_doit, NULL);
-	__rtnl_register(PF_PHONET, RTM_GETADDR, NULL, getaddr_dumpit);
-	__rtnl_register(PF_PHONET, RTM_NEWROUTE, route_doit, NULL);
-	__rtnl_register(PF_PHONET, RTM_DELROUTE, route_doit, NULL);
-	__rtnl_register(PF_PHONET, RTM_GETROUTE, NULL, route_dumpit);
+	__rtnl_register(PF_PHONET, RTM_DELADDR, addr_doit, NULL, NULL);
+	__rtnl_register(PF_PHONET, RTM_GETADDR, NULL, getaddr_dumpit, NULL);
+	__rtnl_register(PF_PHONET, RTM_NEWROUTE, route_doit, NULL, NULL);
+	__rtnl_register(PF_PHONET, RTM_DELROUTE, route_doit, NULL, NULL);
+	__rtnl_register(PF_PHONET, RTM_GETROUTE, NULL, route_dumpit, NULL);
 	return 0;
 }
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index a606025..2f64262 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -1115,9 +1115,10 @@ nlmsg_failure:
 
 static int __init tc_action_init(void)
 {
-	rtnl_register(PF_UNSPEC, RTM_NEWACTION, tc_ctl_action, NULL);
-	rtnl_register(PF_UNSPEC, RTM_DELACTION, tc_ctl_action, NULL);
-	rtnl_register(PF_UNSPEC, RTM_GETACTION, tc_ctl_action, tc_dump_action);
+	rtnl_register(PF_UNSPEC, RTM_NEWACTION, tc_ctl_action, NULL, NULL);
+	rtnl_register(PF_UNSPEC, RTM_DELACTION, tc_ctl_action, NULL, NULL);
+	rtnl_register(PF_UNSPEC, RTM_GETACTION, tc_ctl_action, tc_dump_action,
+		      NULL);
 
 	return 0;
 }
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index bb2c523..9563887 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -610,10 +610,10 @@ EXPORT_SYMBOL(tcf_exts_dump_stats);
 
 static int __init tc_filter_init(void)
 {
-	rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL);
-	rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_ctl_tfilter, NULL);
+	rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL, NULL);
+	rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_ctl_tfilter, NULL, NULL);
 	rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_ctl_tfilter,
-						 tc_dump_tfilter);
+		      tc_dump_tfilter, NULL);
 
 	return 0;
 }
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 6b86276..8182aef 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1792,12 +1792,12 @@ static int __init pktsched_init(void)
 	register_qdisc(&pfifo_head_drop_qdisc_ops);
 	register_qdisc(&mq_qdisc_ops);
 
-	rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL);
-	rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL);
-	rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc);
-	rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL);
-	rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL);
-	rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass);
+	rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, NULL);
+	rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, NULL);
+	rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc, NULL);
+	rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, NULL);
+	rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, NULL);
+	rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass, NULL);
 
 	return 0;
 }
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index c658cb3..0256b8a 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -2299,7 +2299,8 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 		if (link->dump == NULL)
 			return -EINVAL;
 
-		return netlink_dump_start(net->xfrm.nlsk, skb, nlh, link->dump, link->done);
+		return netlink_dump_start(net->xfrm.nlsk, skb, nlh,
+					  link->dump, link->done, 0);
 	}
 
 	err = nlmsg_parse(nlh, xfrm_msg_min[type], attrs, XFRMA_MAX,
-- 
1.7.5.2


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox