Netdev List

Netdev List
 help / color / mirror / Atom feed

* [net-next 07/11] ixgbe: move setting RSC into a separate function
From: Jeff Kirsher @ 2011-06-24  6:03 UTC (permalink / raw)
  To: davem; +Cc: Emil Tantilov, netdev, gospo, Jeff Kirsher
In-Reply-To: <1308895397-23133-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Emil Tantilov <emil.s.tantilov@intel.com>

Move setting RSC into a separate function to allow for reuse in other
parts of the code.

Signed-off-by: Emil Tantilov <emil.s.tantilov@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ixgbe/ixgbe_ethtool.c |   34 ++++++++++++++++++----------------
 1 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_ethtool.c b/drivers/net/ixgbe/ixgbe_ethtool.c
index 2965b6e..405c5ba 100644
--- a/drivers/net/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ixgbe/ixgbe_ethtool.c
@@ -459,6 +459,21 @@ static int ixgbe_set_rx_csum(struct net_device *netdev, u32 data)
 	return 0;
 }
 
+static void ixgbe_set_rsc(struct ixgbe_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_rx_queues; i++) {
+		struct ixgbe_ring *ring = adapter->rx_ring[i];
+		if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) {
+			set_ring_rsc_enabled(ring);
+			ixgbe_configure_rscctl(adapter, ring);
+		} else {
+			ixgbe_clear_rscctl(adapter, ring);
+		}
+	}
+}
+
 static u32 ixgbe_get_tx_csum(struct net_device *netdev)
 {
 	return (netdev->features & NETIF_F_IP_CSUM) != 0;
@@ -2281,25 +2296,12 @@ static int ixgbe_set_flags(struct net_device *netdev, u32 data)
 		} else {
 			adapter->flags2 ^= IXGBE_FLAG2_RSC_ENABLED;
 			switch (adapter->hw.mac.type) {
+			case ixgbe_mac_X540:
+				ixgbe_set_rsc(adapter);
+				break;
 			case ixgbe_mac_82599EB:
 				need_reset = true;
 				break;
-			case ixgbe_mac_X540: {
-				int i;
-				for (i = 0; i < adapter->num_rx_queues; i++) {
-					struct ixgbe_ring *ring =
-					                  adapter->rx_ring[i];
-					if (adapter->flags2 &
-					    IXGBE_FLAG2_RSC_ENABLED) {
-						ixgbe_configure_rscctl(adapter,
-						                       ring);
-					} else {
-						ixgbe_clear_rscctl(adapter,
-						                   ring);
-					}
-				}
-			}
-				break;
 			default:
 				break;
 			}
-- 
1.7.5.4


^ permalink raw reply related

* [net-next 11/11] ixgbe: update driver version string
From: Jeff Kirsher @ 2011-06-24  6:03 UTC (permalink / raw)
  To: davem; +Cc: Don Skidmore, netdev, gospo, Jeff Kirsher
In-Reply-To: <1308895397-23133-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Don Skidmore <donald.c.skidmore@intel.com>

Update the ixgbe driver version string to better match the Source Driver
with similar device support.  Likewise update to the current LAD Linux
versioning scheme.

Signed-of-by: Don Skidmore <donald.c.skidmore@intel.com>
Tested-by: Evan Swanson <evan.swanson@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ixgbe/ixgbe_main.c |    8 ++++----
 drivers/net/ixgbe/ixgbe_type.h |    3 ++-
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index 916a2b6..2496a27 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -54,11 +54,10 @@ char ixgbe_driver_name[] = "ixgbe";
 static const char ixgbe_driver_string[] =
 			      "Intel(R) 10 Gigabit PCI Express Network Driver";
 #define MAJ 3
-#define MIN 3
+#define MIN 4
 #define BUILD 8
-#define KFIX 2
 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
-	__stringify(BUILD) "-k" __stringify(KFIX)
+	__stringify(BUILD) "-k"
 const char ixgbe_driver_version[] = DRV_VERSION;
 static const char ixgbe_copyright[] =
 				"Copyright (c) 1999-2011 Intel Corporation.";
@@ -7713,7 +7712,8 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev,
 
 	/* Inform firmware of driver version */
 	if (hw->mac.ops.set_fw_drv_ver)
-		hw->mac.ops.set_fw_drv_ver(hw, MAJ, MIN, BUILD, KFIX);
+		hw->mac.ops.set_fw_drv_ver(hw, MAJ, MIN, BUILD,
+					   FW_CEM_UNUSED_VER);
 
 	/* add san mac addr to netdev */
 	ixgbe_add_sanmac_netdev(netdev);
diff --git a/drivers/net/ixgbe/ixgbe_type.h b/drivers/net/ixgbe/ixgbe_type.h
index 8b1abd4..1eefc0c 100644
--- a/drivers/net/ixgbe/ixgbe_type.h
+++ b/drivers/net/ixgbe/ixgbe_type.h
@@ -2143,7 +2143,8 @@ enum ixgbe_fdir_pballoc_type {
 #define FW_CEM_HDR_LEN                0x4
 #define FW_CEM_CMD_DRIVER_INFO        0xDD
 #define FW_CEM_CMD_DRIVER_INFO_LEN    0x5
-#define FW_CEM_CMD_RESERVED           0X0
+#define FW_CEM_CMD_RESERVED           0x0
+#define FW_CEM_UNUSED_VER             0x0
 #define FW_CEM_MAX_RETRIES            3
 #define FW_CEM_RESP_STATUS_SUCCESS    0x1
 
-- 
1.7.5.4


^ permalink raw reply related

* [net-next 08/11] ixgbe: move reset code into a separate function
From: Jeff Kirsher @ 2011-06-24  6:03 UTC (permalink / raw)
  To: davem; +Cc: Emil Tantilov, netdev, gospo, Jeff Kirsher
In-Reply-To: <1308895397-23133-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Emil Tantilov <emil.s.tantilov@intel.com>

Move reset code into a separate function to allow for reuse in other
parts of the code.

Signed-off-by: Emil Tantilov <emil.s.tantilov@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ixgbe/ixgbe_ethtool.c |   26 ++++++++++++++------------
 1 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_ethtool.c b/drivers/net/ixgbe/ixgbe_ethtool.c
index 405c5ba..bb8441e 100644
--- a/drivers/net/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ixgbe/ixgbe_ethtool.c
@@ -442,6 +442,16 @@ static int ixgbe_set_pauseparam(struct net_device *netdev,
 	return 0;
 }
 
+static void ixgbe_do_reset(struct net_device *netdev)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	if (netif_running(netdev))
+		ixgbe_reinit_locked(adapter);
+	else
+		ixgbe_reset(adapter);
+}
+
 static u32 ixgbe_get_rx_csum(struct net_device *netdev)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
@@ -2249,12 +2259,8 @@ static int ixgbe_set_coalesce(struct net_device *netdev,
 	 * correctly w.r.t stopping tx, and changing TXDCTL.WTHRESH settings
 	 * also locks in RSC enable/disable which requires reset
 	 */
-	if (need_reset) {
-		if (netif_running(netdev))
-			ixgbe_reinit_locked(adapter);
-		else
-			ixgbe_reset(adapter);
-	}
+	if (need_reset)
+		ixgbe_do_reset(netdev);
 
 	return 0;
 }
@@ -2328,12 +2334,8 @@ static int ixgbe_set_flags(struct net_device *netdev, u32 data)
 		need_reset = true;
 	}
 
-	if (need_reset) {
-		if (netif_running(netdev))
-			ixgbe_reinit_locked(adapter);
-		else
-			ixgbe_reset(adapter);
-	}
+	if (need_reset)
+		ixgbe_do_reset(netdev);
 
 	return 0;
 }
-- 
1.7.5.4


^ permalink raw reply related

* [net-next 10/11] ixgbe: fix ring assignment issues for SR-IOV and drop cases
From: Jeff Kirsher @ 2011-06-24  6:03 UTC (permalink / raw)
  To: davem; +Cc: Alexander Duyck, netdev, gospo, Jeff Kirsher
In-Reply-To: <1308895397-23133-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Alexander Duyck <alexander.h.duyck@intel.com>

This change fixes the fact that we would trigger a null pointer dereference
or specify the wrong ring if the rings were restored.  This change makes
certain that the DROP queue is a static value, and all other rings are
based on the ring offsets for the PF.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Ross Brattain <ross.b.brattain@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ixgbe/ixgbe_ethtool.c |    2 ++
 drivers/net/ixgbe/ixgbe_main.c    |    8 +++++---
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_ethtool.c b/drivers/net/ixgbe/ixgbe_ethtool.c
index 596d794..074e9ba 100644
--- a/drivers/net/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ixgbe/ixgbe_ethtool.c
@@ -2677,6 +2677,8 @@ static int ixgbe_add_ethtool_fdir_entry(struct ixgbe_adapter *adapter,
 	/* program filters to filter memory */
 	err = ixgbe_fdir_write_perfect_filter_82599(hw,
 				&input->filter, input->sw_idx,
+				(input->action == IXGBE_FDIR_DROP_QUEUE) ?
+				IXGBE_FDIR_DROP_QUEUE :
 				adapter->rx_ring[input->action]->reg_idx);
 	if (err)
 		goto err_out_w_lock;
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index 2886dae..916a2b6 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -3755,9 +3755,11 @@ static void ixgbe_fdir_filter_restore(struct ixgbe_adapter *adapter)
 	hlist_for_each_entry_safe(filter, node, node2,
 				  &adapter->fdir_filter_list, fdir_node) {
 		ixgbe_fdir_write_perfect_filter_82599(hw,
-						      &filter->filter,
-						      filter->sw_idx,
-						      filter->action);
+				&filter->filter,
+				filter->sw_idx,
+				(filter->action == IXGBE_FDIR_DROP_QUEUE) ?
+				IXGBE_FDIR_DROP_QUEUE :
+				adapter->rx_ring[filter->action]->reg_idx);
 	}
 
 	spin_unlock(&adapter->fdir_perfect_lock);
-- 
1.7.5.4


^ permalink raw reply related

* [net-next 06/11] ixgbe: add support for nfc addition and removal of filters
From: Jeff Kirsher @ 2011-06-24  6:03 UTC (permalink / raw)
  To: davem; +Cc: Alexander Duyck, netdev, gospo, Jeff Kirsher
In-Reply-To: <1308895397-23133-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Alexander Duyck <alexander.h.duyck@intel.com>

This change is meant to allow for nfc to insert and remove filters in order
to test the ethtool interface which includes it's own rules manager.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Ross Brattain <ross.b.brattain@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ixgbe/ixgbe_ethtool.c |  245 +++++++++++++++++++++++++++++++++++++
 drivers/net/ixgbe/ixgbe_main.c    |   45 +++++++
 2 files changed, 290 insertions(+), 0 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_ethtool.c b/drivers/net/ixgbe/ixgbe_ethtool.c
index 649e596..2965b6e 100644
--- a/drivers/net/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ixgbe/ixgbe_ethtool.c
@@ -2456,6 +2456,250 @@ static int ixgbe_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
 	return ret;
 }
 
+static int ixgbe_update_ethtool_fdir_entry(struct ixgbe_adapter *adapter,
+					   struct ixgbe_fdir_filter *input,
+					   u16 sw_idx)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	struct hlist_node *node, *node2, *parent;
+	struct ixgbe_fdir_filter *rule;
+	int err = -EINVAL;
+
+	parent = NULL;
+	rule = NULL;
+
+	hlist_for_each_entry_safe(rule, node, node2,
+				  &adapter->fdir_filter_list, fdir_node) {
+		/* hash found, or no matching entry */
+		if (rule->sw_idx >= sw_idx)
+			break;
+		parent = node;
+	}
+
+	/* if there is an old rule occupying our place remove it */
+	if (rule && (rule->sw_idx == sw_idx)) {
+		if (!input || (rule->filter.formatted.bkt_hash !=
+			       input->filter.formatted.bkt_hash)) {
+			err = ixgbe_fdir_erase_perfect_filter_82599(hw,
+								&rule->filter,
+								sw_idx);
+		}
+
+		hlist_del(&rule->fdir_node);
+		kfree(rule);
+		adapter->fdir_filter_count--;
+	}
+
+	/*
+	 * If no input this was a delete, err should be 0 if a rule was
+	 * successfully found and removed from the list else -EINVAL
+	 */
+	if (!input)
+		return err;
+
+	/* initialize node and set software index */
+	INIT_HLIST_NODE(&input->fdir_node);
+
+	/* add filter to the list */
+	if (parent)
+		hlist_add_after(parent, &input->fdir_node);
+	else
+		hlist_add_head(&input->fdir_node,
+			       &adapter->fdir_filter_list);
+
+	/* update counts */
+	adapter->fdir_filter_count++;
+
+	return 0;
+}
+
+static int ixgbe_flowspec_to_flow_type(struct ethtool_rx_flow_spec *fsp,
+				       u8 *flow_type)
+{
+	switch (fsp->flow_type & ~FLOW_EXT) {
+	case TCP_V4_FLOW:
+		*flow_type = IXGBE_ATR_FLOW_TYPE_TCPV4;
+		break;
+	case UDP_V4_FLOW:
+		*flow_type = IXGBE_ATR_FLOW_TYPE_UDPV4;
+		break;
+	case SCTP_V4_FLOW:
+		*flow_type = IXGBE_ATR_FLOW_TYPE_SCTPV4;
+		break;
+	case IP_USER_FLOW:
+		switch (fsp->h_u.usr_ip4_spec.proto) {
+		case IPPROTO_TCP:
+			*flow_type = IXGBE_ATR_FLOW_TYPE_TCPV4;
+			break;
+		case IPPROTO_UDP:
+			*flow_type = IXGBE_ATR_FLOW_TYPE_UDPV4;
+			break;
+		case IPPROTO_SCTP:
+			*flow_type = IXGBE_ATR_FLOW_TYPE_SCTPV4;
+			break;
+		case 0:
+			if (!fsp->m_u.usr_ip4_spec.proto) {
+				*flow_type = IXGBE_ATR_FLOW_TYPE_IPV4;
+				break;
+			}
+		default:
+			return 0;
+		}
+		break;
+	default:
+		return 0;
+	}
+
+	return 1;
+}
+
+static int ixgbe_add_ethtool_fdir_entry(struct ixgbe_adapter *adapter,
+					struct ethtool_rxnfc *cmd)
+{
+	struct ethtool_rx_flow_spec *fsp =
+		(struct ethtool_rx_flow_spec *)&cmd->fs;
+	struct ixgbe_hw *hw = &adapter->hw;
+	struct ixgbe_fdir_filter *input;
+	union ixgbe_atr_input mask;
+	int err;
+
+	if (!(adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE))
+		return -EOPNOTSUPP;
+
+	/*
+	 * Don't allow programming if the action is a queue greater than
+	 * the number of online Rx queues.
+	 */
+	if ((fsp->ring_cookie != RX_CLS_FLOW_DISC) &&
+	    (fsp->ring_cookie >= adapter->num_rx_queues))
+		return -EINVAL;
+
+	/* Don't allow indexes to exist outside of available space */
+	if (fsp->location >= ((1024 << adapter->fdir_pballoc) - 2)) {
+		e_err(drv, "Location out of range\n");
+		return -EINVAL;
+	}
+
+	input = kzalloc(sizeof(*input), GFP_ATOMIC);
+	if (!input)
+		return -ENOMEM;
+
+	memset(&mask, 0, sizeof(union ixgbe_atr_input));
+
+	/* set SW index */
+	input->sw_idx = fsp->location;
+
+	/* record flow type */
+	if (!ixgbe_flowspec_to_flow_type(fsp,
+					 &input->filter.formatted.flow_type)) {
+		e_err(drv, "Unrecognized flow type\n");
+		goto err_out;
+	}
+
+	mask.formatted.flow_type = IXGBE_ATR_L4TYPE_IPV6_MASK |
+				   IXGBE_ATR_L4TYPE_MASK;
+
+	if (input->filter.formatted.flow_type == IXGBE_ATR_FLOW_TYPE_IPV4)
+		mask.formatted.flow_type &= IXGBE_ATR_L4TYPE_IPV6_MASK;
+
+	/* Copy input into formatted structures */
+	input->filter.formatted.src_ip[0] = fsp->h_u.tcp_ip4_spec.ip4src;
+	mask.formatted.src_ip[0] = fsp->m_u.tcp_ip4_spec.ip4src;
+	input->filter.formatted.dst_ip[0] = fsp->h_u.tcp_ip4_spec.ip4dst;
+	mask.formatted.dst_ip[0] = fsp->m_u.tcp_ip4_spec.ip4dst;
+	input->filter.formatted.src_port = fsp->h_u.tcp_ip4_spec.psrc;
+	mask.formatted.src_port = fsp->m_u.tcp_ip4_spec.psrc;
+	input->filter.formatted.dst_port = fsp->h_u.tcp_ip4_spec.pdst;
+	mask.formatted.dst_port = fsp->m_u.tcp_ip4_spec.pdst;
+
+	if (fsp->flow_type & FLOW_EXT) {
+		input->filter.formatted.vm_pool =
+				(unsigned char)ntohl(fsp->h_ext.data[1]);
+		mask.formatted.vm_pool =
+				(unsigned char)ntohl(fsp->m_ext.data[1]);
+		input->filter.formatted.vlan_id = fsp->h_ext.vlan_tci;
+		mask.formatted.vlan_id = fsp->m_ext.vlan_tci;
+		input->filter.formatted.flex_bytes =
+						fsp->h_ext.vlan_etype;
+		mask.formatted.flex_bytes = fsp->m_ext.vlan_etype;
+	}
+
+	/* determine if we need to drop or route the packet */
+	if (fsp->ring_cookie == RX_CLS_FLOW_DISC)
+		input->action = IXGBE_FDIR_DROP_QUEUE;
+	else
+		input->action = fsp->ring_cookie;
+
+	spin_lock(&adapter->fdir_perfect_lock);
+
+	if (hlist_empty(&adapter->fdir_filter_list)) {
+		/* save mask and program input mask into HW */
+		memcpy(&adapter->fdir_mask, &mask, sizeof(mask));
+		err = ixgbe_fdir_set_input_mask_82599(hw, &mask);
+		if (err) {
+			e_err(drv, "Error writing mask\n");
+			goto err_out_w_lock;
+		}
+	} else if (memcmp(&adapter->fdir_mask, &mask, sizeof(mask))) {
+		e_err(drv, "Only one mask supported per port\n");
+		goto err_out_w_lock;
+	}
+
+	/* apply mask and compute/store hash */
+	ixgbe_atr_compute_perfect_hash_82599(&input->filter, &mask);
+
+	/* program filters to filter memory */
+	err = ixgbe_fdir_write_perfect_filter_82599(hw,
+				&input->filter, input->sw_idx,
+				adapter->rx_ring[input->action]->reg_idx);
+	if (err)
+		goto err_out_w_lock;
+
+	ixgbe_update_ethtool_fdir_entry(adapter, input, input->sw_idx);
+
+	spin_unlock(&adapter->fdir_perfect_lock);
+
+	return err;
+err_out_w_lock:
+	spin_unlock(&adapter->fdir_perfect_lock);
+err_out:
+	kfree(input);
+	return -EINVAL;
+}
+
+static int ixgbe_del_ethtool_fdir_entry(struct ixgbe_adapter *adapter,
+					struct ethtool_rxnfc *cmd)
+{
+	struct ethtool_rx_flow_spec *fsp =
+		(struct ethtool_rx_flow_spec *)&cmd->fs;
+	int err;
+
+	spin_lock(&adapter->fdir_perfect_lock);
+	err = ixgbe_update_ethtool_fdir_entry(adapter, NULL, fsp->location);
+	spin_unlock(&adapter->fdir_perfect_lock);
+
+	return err;
+}
+
+static int ixgbe_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(dev);
+	int ret = -EOPNOTSUPP;
+
+	switch (cmd->cmd) {
+	case ETHTOOL_SRXCLSRLINS:
+		ret = ixgbe_add_ethtool_fdir_entry(adapter, cmd);
+		break;
+	case ETHTOOL_SRXCLSRLDEL:
+		ret = ixgbe_del_ethtool_fdir_entry(adapter, cmd);
+		break;
+	default:
+		break;
+	}
+
+	return ret;
+}
+
 static const struct ethtool_ops ixgbe_ethtool_ops = {
 	.get_settings           = ixgbe_get_settings,
 	.set_settings           = ixgbe_set_settings,
@@ -2492,6 +2736,7 @@ static const struct ethtool_ops ixgbe_ethtool_ops = {
 	.get_flags              = ethtool_op_get_flags,
 	.set_flags              = ixgbe_set_flags,
 	.get_rxnfc		= ixgbe_get_rxnfc,
+	.set_rxnfc		= ixgbe_set_rxnfc,
 };
 
 void ixgbe_set_ethtool_ops(struct net_device *netdev)
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index e177b5d..2886dae 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -3741,6 +3741,28 @@ static void ixgbe_configure_pb(struct ixgbe_adapter *adapter)
 	hw->mac.ops.set_rxpba(&adapter->hw, num_tc, hdrm, PBA_STRATEGY_EQUAL);
 }
 
+static void ixgbe_fdir_filter_restore(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	struct hlist_node *node, *node2;
+	struct ixgbe_fdir_filter *filter;
+
+	spin_lock(&adapter->fdir_perfect_lock);
+
+	if (!hlist_empty(&adapter->fdir_filter_list))
+		ixgbe_fdir_set_input_mask_82599(hw, &adapter->fdir_mask);
+
+	hlist_for_each_entry_safe(filter, node, node2,
+				  &adapter->fdir_filter_list, fdir_node) {
+		ixgbe_fdir_write_perfect_filter_82599(hw,
+						      &filter->filter,
+						      filter->sw_idx,
+						      filter->action);
+	}
+
+	spin_unlock(&adapter->fdir_perfect_lock);
+}
+
 static void ixgbe_configure(struct ixgbe_adapter *adapter)
 {
 	struct net_device *netdev = adapter->netdev;
@@ -3765,6 +3787,10 @@ static void ixgbe_configure(struct ixgbe_adapter *adapter)
 			adapter->tx_ring[i]->atr_sample_rate =
 						       adapter->atr_sample_rate;
 		ixgbe_init_fdir_signature_82599(hw, adapter->fdir_pballoc);
+	} else if (adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE) {
+		ixgbe_init_fdir_perfect_82599(&adapter->hw,
+					      adapter->fdir_pballoc);
+		ixgbe_fdir_filter_restore(adapter);
 	}
 	ixgbe_configure_virtualization(adapter);
 
@@ -4141,6 +4167,23 @@ static void ixgbe_clean_all_tx_rings(struct ixgbe_adapter *adapter)
 		ixgbe_clean_tx_ring(adapter->tx_ring[i]);
 }
 
+static void ixgbe_fdir_filter_exit(struct ixgbe_adapter *adapter)
+{
+	struct hlist_node *node, *node2;
+	struct ixgbe_fdir_filter *filter;
+
+	spin_lock(&adapter->fdir_perfect_lock);
+
+	hlist_for_each_entry_safe(filter, node, node2,
+				  &adapter->fdir_filter_list, fdir_node) {
+		hlist_del(&filter->fdir_node);
+		kfree(filter);
+	}
+	adapter->fdir_filter_count = 0;
+
+	spin_unlock(&adapter->fdir_perfect_lock);
+}
+
 void ixgbe_down(struct ixgbe_adapter *adapter)
 {
 	struct net_device *netdev = adapter->netdev;
@@ -5527,6 +5570,8 @@ static int ixgbe_close(struct net_device *netdev)
 	ixgbe_down(adapter);
 	ixgbe_free_irq(adapter);
 
+	ixgbe_fdir_filter_exit(adapter);
+
 	ixgbe_free_all_tx_resources(adapter);
 	ixgbe_free_all_rx_resources(adapter);
 
-- 
1.7.5.4


^ permalink raw reply related

* [net-next 00/11][pull request] Intel Wired LAN Driver Update
From: Jeff Kirsher @ 2011-06-24  6:03 UTC (permalink / raw)
  To: davem; +Cc: Jeff Kirsher, netdev, gospo

The following series contains updates to ixgbe.  There are 2 fixes, along
with several cleanups from Alex and Emil.  In addition, nfc support was added
as well as bumping the driver version.

Dropped patch 7 (ixgbe: add support for modifying UDP RSS flow hash options)
from the previous submission based on comments from Ben H.

The following are changes since commit 4d054f2f1445aceedab3f9642692d55d2caa7ec6:
  dcb: use nlmsg_free() instead of kfree()
and are available in the git repository at:
  master.kernel.org:/pub/scm/linux/kernel/git/jkirsher/net-next-2.6 master

Alexander Duyck (7):
  ixgbe: remove ntuple filtering
  ixgbe: fix flags relating to perfect filters to support coexistence
  ixgbe: update perfect filter framework to support retaining filters
  ixgbe: add basic support for setting and getting nfc controls
  ixgbe: add support for displaying ntuple filters via the nfc
    interface
  ixgbe: add support for nfc addition and removal of filters
  ixgbe: fix ring assignment issues for SR-IOV and drop cases

Don Skidmore (1):
  ixgbe: update driver version string

Emil Tantilov (3):
  ixgbe: move setting RSC into a separate function
  ixgbe: move reset code into a separate function
  ixgbe: disable RSC when Rx checksum is off

 drivers/net/ixgbe/ixgbe.h         |   29 ++-
 drivers/net/ixgbe/ixgbe_82599.c   |  603 +++++++++++++++++++++----------------
 drivers/net/ixgbe/ixgbe_dcb_nl.c  |   13 +-
 drivers/net/ixgbe/ixgbe_ethtool.c |  544 ++++++++++++++++++++++++---------
 drivers/net/ixgbe/ixgbe_main.c    |   89 ++++--
 drivers/net/ixgbe/ixgbe_type.h    |   28 +-
 6 files changed, 850 insertions(+), 456 deletions(-)

-- 
1.7.5.4


^ permalink raw reply

* [net-next 09/11] ixgbe: disable RSC when Rx checksum is off
From: Jeff Kirsher @ 2011-06-24  6:03 UTC (permalink / raw)
  To: davem; +Cc: Emil Tantilov, netdev, gospo, Jeff Kirsher
In-Reply-To: <1308895397-23133-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Emil Tantilov <emil.s.tantilov@intel.com>

Disabling Rx checksumming leads to performance degradation due to
RSC causing packets to have incorrect checksums.

Signed-off-by: Emil Tantilov <emil.s.tantilov@intel.com>
Tested-by: Evan Swanson <evan.swanson@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ixgbe/ixgbe_ethtool.c |   44 +++++++++++++++++++++++++++---------
 1 files changed, 33 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_ethtool.c b/drivers/net/ixgbe/ixgbe_ethtool.c
index bb8441e..596d794 100644
--- a/drivers/net/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ixgbe/ixgbe_ethtool.c
@@ -458,17 +458,6 @@ static u32 ixgbe_get_rx_csum(struct net_device *netdev)
 	return adapter->flags & IXGBE_FLAG_RX_CSUM_ENABLED;
 }
 
-static int ixgbe_set_rx_csum(struct net_device *netdev, u32 data)
-{
-	struct ixgbe_adapter *adapter = netdev_priv(netdev);
-	if (data)
-		adapter->flags |= IXGBE_FLAG_RX_CSUM_ENABLED;
-	else
-		adapter->flags &= ~IXGBE_FLAG_RX_CSUM_ENABLED;
-
-	return 0;
-}
-
 static void ixgbe_set_rsc(struct ixgbe_adapter *adapter)
 {
 	int i;
@@ -484,6 +473,39 @@ static void ixgbe_set_rsc(struct ixgbe_adapter *adapter)
 	}
 }
 
+static int ixgbe_set_rx_csum(struct net_device *netdev, u32 data)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	bool need_reset = false;
+
+	if (data) {
+		adapter->flags |= IXGBE_FLAG_RX_CSUM_ENABLED;
+	} else {
+		adapter->flags &= ~IXGBE_FLAG_RX_CSUM_ENABLED;
+
+		if (adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE) {
+			adapter->flags2 &= ~IXGBE_FLAG2_RSC_ENABLED;
+			netdev->features &= ~NETIF_F_LRO;
+		}
+
+		switch (adapter->hw.mac.type) {
+		case ixgbe_mac_X540:
+			ixgbe_set_rsc(adapter);
+			break;
+		case ixgbe_mac_82599EB:
+			need_reset = true;
+			break;
+		default:
+			break;
+		}
+	}
+
+	if (need_reset)
+		ixgbe_do_reset(netdev);
+
+	return 0;
+}
+
 static u32 ixgbe_get_tx_csum(struct net_device *netdev)
 {
 	return (netdev->features & NETIF_F_IP_CSUM) != 0;
-- 
1.7.5.4


^ permalink raw reply related

* [net-next 05/11] ixgbe: add support for displaying ntuple filters via the nfc interface
From: Jeff Kirsher @ 2011-06-24  6:03 UTC (permalink / raw)
  To: davem; +Cc: Alexander Duyck, netdev, gospo, Jeff Kirsher
In-Reply-To: <1308895397-23133-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Alexander Duyck <alexander.h.duyck@intel.com>

This code adds support for displaying the filters that were added via the
nfc interface.  This is primarily to test the interface for now, but I am
also looking into the feasibility of moving all of the ntuple filter code
in ixgbe over to the nfc interface since it seems to be better implemented.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Ross Brattain <ross.b.brattain@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ixgbe/ixgbe.h         |   11 ++++
 drivers/net/ixgbe/ixgbe_ethtool.c |  102 +++++++++++++++++++++++++++++++++++++
 2 files changed, 113 insertions(+), 0 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe.h b/drivers/net/ixgbe/ixgbe.h
index 5ea5b4c..d6bfb2f 100644
--- a/drivers/net/ixgbe/ixgbe.h
+++ b/drivers/net/ixgbe/ixgbe.h
@@ -482,6 +482,17 @@ struct ixgbe_adapter {
 	struct vf_macvlans vf_mvs;
 	struct vf_macvlans *mv_list;
 	bool antispoofing_enabled;
+
+	struct hlist_head fdir_filter_list;
+	union ixgbe_atr_input fdir_mask;
+	int fdir_filter_count;
+};
+
+struct ixgbe_fdir_filter {
+	struct hlist_node fdir_node;
+	union ixgbe_atr_input filter;
+	u16 sw_idx;
+	u16 action;
 };
 
 enum ixbge_state_t {
diff --git a/drivers/net/ixgbe/ixgbe_ethtool.c b/drivers/net/ixgbe/ixgbe_ethtool.c
index 8373244..649e596 100644
--- a/drivers/net/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ixgbe/ixgbe_ethtool.c
@@ -2336,6 +2336,97 @@ static int ixgbe_set_flags(struct net_device *netdev, u32 data)
 	return 0;
 }
 
+static int ixgbe_get_ethtool_fdir_entry(struct ixgbe_adapter *adapter,
+					struct ethtool_rxnfc *cmd)
+{
+	union ixgbe_atr_input *mask = &adapter->fdir_mask;
+	struct ethtool_rx_flow_spec *fsp =
+		(struct ethtool_rx_flow_spec *)&cmd->fs;
+	struct hlist_node *node, *node2;
+	struct ixgbe_fdir_filter *rule = NULL;
+
+	/* report total rule count */
+	cmd->data = (1024 << adapter->fdir_pballoc) - 2;
+
+	hlist_for_each_entry_safe(rule, node, node2,
+				  &adapter->fdir_filter_list, fdir_node) {
+		if (fsp->location <= rule->sw_idx)
+			break;
+	}
+
+	if (!rule || fsp->location != rule->sw_idx)
+		return -EINVAL;
+
+	/* fill out the flow spec entry */
+
+	/* set flow type field */
+	switch (rule->filter.formatted.flow_type) {
+	case IXGBE_ATR_FLOW_TYPE_TCPV4:
+		fsp->flow_type = TCP_V4_FLOW;
+		break;
+	case IXGBE_ATR_FLOW_TYPE_UDPV4:
+		fsp->flow_type = UDP_V4_FLOW;
+		break;
+	case IXGBE_ATR_FLOW_TYPE_SCTPV4:
+		fsp->flow_type = SCTP_V4_FLOW;
+		break;
+	case IXGBE_ATR_FLOW_TYPE_IPV4:
+		fsp->flow_type = IP_USER_FLOW;
+		fsp->h_u.usr_ip4_spec.ip_ver = ETH_RX_NFC_IP4;
+		fsp->h_u.usr_ip4_spec.proto = 0;
+		fsp->m_u.usr_ip4_spec.proto = 0;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	fsp->h_u.tcp_ip4_spec.psrc = rule->filter.formatted.src_port;
+	fsp->m_u.tcp_ip4_spec.psrc = mask->formatted.src_port;
+	fsp->h_u.tcp_ip4_spec.pdst = rule->filter.formatted.dst_port;
+	fsp->m_u.tcp_ip4_spec.pdst = mask->formatted.dst_port;
+	fsp->h_u.tcp_ip4_spec.ip4src = rule->filter.formatted.src_ip[0];
+	fsp->m_u.tcp_ip4_spec.ip4src = mask->formatted.src_ip[0];
+	fsp->h_u.tcp_ip4_spec.ip4dst = rule->filter.formatted.dst_ip[0];
+	fsp->m_u.tcp_ip4_spec.ip4dst = mask->formatted.dst_ip[0];
+	fsp->h_ext.vlan_tci = rule->filter.formatted.vlan_id;
+	fsp->m_ext.vlan_tci = mask->formatted.vlan_id;
+	fsp->h_ext.vlan_etype = rule->filter.formatted.flex_bytes;
+	fsp->m_ext.vlan_etype = mask->formatted.flex_bytes;
+	fsp->h_ext.data[1] = htonl(rule->filter.formatted.vm_pool);
+	fsp->m_ext.data[1] = htonl(mask->formatted.vm_pool);
+	fsp->flow_type |= FLOW_EXT;
+
+	/* record action */
+	if (rule->action == IXGBE_FDIR_DROP_QUEUE)
+		fsp->ring_cookie = RX_CLS_FLOW_DISC;
+	else
+		fsp->ring_cookie = rule->action;
+
+	return 0;
+}
+
+static int ixgbe_get_ethtool_fdir_all(struct ixgbe_adapter *adapter,
+				      struct ethtool_rxnfc *cmd,
+				      u32 *rule_locs)
+{
+	struct hlist_node *node, *node2;
+	struct ixgbe_fdir_filter *rule;
+	int cnt = 0;
+
+	/* report total rule count */
+	cmd->data = (1024 << adapter->fdir_pballoc) - 2;
+
+	hlist_for_each_entry_safe(rule, node, node2,
+				  &adapter->fdir_filter_list, fdir_node) {
+		if (cnt == cmd->rule_cnt)
+			return -EMSGSIZE;
+		rule_locs[cnt] = rule->sw_idx;
+		cnt++;
+	}
+
+	return 0;
+}
+
 static int ixgbe_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
 			   void *rule_locs)
 {
@@ -2347,6 +2438,17 @@ static int ixgbe_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
 		cmd->data = adapter->num_rx_queues;
 		ret = 0;
 		break;
+	case ETHTOOL_GRXCLSRLCNT:
+		cmd->rule_cnt = adapter->fdir_filter_count;
+		ret = 0;
+		break;
+	case ETHTOOL_GRXCLSRULE:
+		ret = ixgbe_get_ethtool_fdir_entry(adapter, cmd);
+		break;
+	case ETHTOOL_GRXCLSRLALL:
+		ret = ixgbe_get_ethtool_fdir_all(adapter, cmd,
+						 (u32 *)rule_locs);
+		break;
 	default:
 		break;
 	}
-- 
1.7.5.4


^ permalink raw reply related

* [net-next 01/11] ixgbe: remove ntuple filtering
From: Jeff Kirsher @ 2011-06-24  6:03 UTC (permalink / raw)
  To: davem; +Cc: Alexander Duyck, netdev, gospo, Jeff Kirsher
In-Reply-To: <1308895397-23133-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Alexander Duyck <alexander.h.duyck@intel.com>

Due to numerous issues in ntuple filters it has been decided to move the
interface over to the network flow classification interface.  As a first
step to achieving this I first need to remove the old ntuple interface.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Ross Brattain <ross.b.brattain@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ixgbe/ixgbe_ethtool.c |  136 -------------------------------------
 1 files changed, 0 insertions(+), 136 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_ethtool.c b/drivers/net/ixgbe/ixgbe_ethtool.c
index 4950d03..e41dd24 100644
--- a/drivers/net/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ixgbe/ixgbe_ethtool.c
@@ -2336,141 +2336,6 @@ static int ixgbe_set_flags(struct net_device *netdev, u32 data)
 	return 0;
 }
 
-static int ixgbe_set_rx_ntuple(struct net_device *dev,
-                               struct ethtool_rx_ntuple *cmd)
-{
-	struct ixgbe_adapter *adapter = netdev_priv(dev);
-	struct ethtool_rx_ntuple_flow_spec *fs = &cmd->fs;
-	union ixgbe_atr_input input_struct;
-	struct ixgbe_atr_input_masks input_masks;
-	int target_queue;
-	int err;
-
-	if (adapter->hw.mac.type == ixgbe_mac_82598EB)
-		return -EOPNOTSUPP;
-
-	/*
-	 * Don't allow programming if the action is a queue greater than
-	 * the number of online Tx queues.
-	 */
-	if ((fs->action >= adapter->num_tx_queues) ||
-	    (fs->action < ETHTOOL_RXNTUPLE_ACTION_DROP))
-		return -EINVAL;
-
-	memset(&input_struct, 0, sizeof(union ixgbe_atr_input));
-	memset(&input_masks, 0, sizeof(struct ixgbe_atr_input_masks));
-
-	/* record flow type */
-	switch (fs->flow_type) {
-	case IPV4_FLOW:
-		input_struct.formatted.flow_type = IXGBE_ATR_FLOW_TYPE_IPV4;
-		break;
-	case TCP_V4_FLOW:
-		input_struct.formatted.flow_type = IXGBE_ATR_FLOW_TYPE_TCPV4;
-		break;
-	case UDP_V4_FLOW:
-		input_struct.formatted.flow_type = IXGBE_ATR_FLOW_TYPE_UDPV4;
-		break;
-	case SCTP_V4_FLOW:
-		input_struct.formatted.flow_type = IXGBE_ATR_FLOW_TYPE_SCTPV4;
-		break;
-	default:
-		return -1;
-	}
-
-	/* copy vlan tag minus the CFI bit */
-	if ((fs->vlan_tag & 0xEFFF) || (~fs->vlan_tag_mask & 0xEFFF)) {
-		input_struct.formatted.vlan_id = htons(fs->vlan_tag & 0xEFFF);
-		if (!fs->vlan_tag_mask) {
-			input_masks.vlan_id_mask = htons(0xEFFF);
-		} else {
-			switch (~fs->vlan_tag_mask & 0xEFFF) {
-			/* all of these are valid vlan-mask values */
-			case 0xEFFF:
-			case 0xE000:
-			case 0x0FFF:
-			case 0x0000:
-				input_masks.vlan_id_mask =
-					htons(~fs->vlan_tag_mask);
-				break;
-			/* exit with error if vlan-mask is invalid */
-			default:
-				e_err(drv, "Partial VLAN ID or "
-				      "priority mask in vlan-mask is not "
-				      "supported by hardware\n");
-				return -1;
-			}
-		}
-	}
-
-	/* make sure we only use the first 2 bytes of user data */
-	if ((fs->data & 0xFFFF) || (~fs->data_mask & 0xFFFF)) {
-		input_struct.formatted.flex_bytes = htons(fs->data & 0xFFFF);
-		if (!(fs->data_mask & 0xFFFF)) {
-			input_masks.flex_mask = 0xFFFF;
-		} else if (~fs->data_mask & 0xFFFF) {
-			e_err(drv, "Partial user-def-mask is not "
-			      "supported by hardware\n");
-			return -1;
-		}
-	}
-
-	/*
-	 * Copy input into formatted structures
-	 *
-	 * These assignments are based on the following logic
-	 * If neither input or mask are set assume value is masked out.
-	 * If input is set, but mask is not mask should default to accept all.
-	 * If input is not set, but mask is set then mask likely results in 0.
-	 * If input is set and mask is set then assign both.
-	 */
-	if (fs->h_u.tcp_ip4_spec.ip4src || ~fs->m_u.tcp_ip4_spec.ip4src) {
-		input_struct.formatted.src_ip[0] = fs->h_u.tcp_ip4_spec.ip4src;
-		if (!fs->m_u.tcp_ip4_spec.ip4src)
-			input_masks.src_ip_mask[0] = 0xFFFFFFFF;
-		else
-			input_masks.src_ip_mask[0] =
-				~fs->m_u.tcp_ip4_spec.ip4src;
-	}
-	if (fs->h_u.tcp_ip4_spec.ip4dst || ~fs->m_u.tcp_ip4_spec.ip4dst) {
-		input_struct.formatted.dst_ip[0] = fs->h_u.tcp_ip4_spec.ip4dst;
-		if (!fs->m_u.tcp_ip4_spec.ip4dst)
-			input_masks.dst_ip_mask[0] = 0xFFFFFFFF;
-		else
-			input_masks.dst_ip_mask[0] =
-				~fs->m_u.tcp_ip4_spec.ip4dst;
-	}
-	if (fs->h_u.tcp_ip4_spec.psrc || ~fs->m_u.tcp_ip4_spec.psrc) {
-		input_struct.formatted.src_port = fs->h_u.tcp_ip4_spec.psrc;
-		if (!fs->m_u.tcp_ip4_spec.psrc)
-			input_masks.src_port_mask = 0xFFFF;
-		else
-			input_masks.src_port_mask = ~fs->m_u.tcp_ip4_spec.psrc;
-	}
-	if (fs->h_u.tcp_ip4_spec.pdst || ~fs->m_u.tcp_ip4_spec.pdst) {
-		input_struct.formatted.dst_port = fs->h_u.tcp_ip4_spec.pdst;
-		if (!fs->m_u.tcp_ip4_spec.pdst)
-			input_masks.dst_port_mask = 0xFFFF;
-		else
-			input_masks.dst_port_mask = ~fs->m_u.tcp_ip4_spec.pdst;
-	}
-
-	/* determine if we need to drop or route the packet */
-	if (fs->action == ETHTOOL_RXNTUPLE_ACTION_DROP)
-		target_queue = MAX_RX_QUEUES - 1;
-	else
-		target_queue = fs->action;
-
-	spin_lock(&adapter->fdir_perfect_lock);
-	err = ixgbe_fdir_add_perfect_filter_82599(&adapter->hw,
-						  &input_struct,
-						  &input_masks, 0,
-						  target_queue);
-	spin_unlock(&adapter->fdir_perfect_lock);
-
-	return err ? -1 : 0;
-}
-
 static const struct ethtool_ops ixgbe_ethtool_ops = {
 	.get_settings           = ixgbe_get_settings,
 	.set_settings           = ixgbe_set_settings,
@@ -2506,7 +2371,6 @@ static const struct ethtool_ops ixgbe_ethtool_ops = {
 	.set_coalesce           = ixgbe_set_coalesce,
 	.get_flags              = ethtool_op_get_flags,
 	.set_flags              = ixgbe_set_flags,
-	.set_rx_ntuple          = ixgbe_set_rx_ntuple,
 };
 
 void ixgbe_set_ethtool_ops(struct net_device *netdev)
-- 
1.7.5.4


^ permalink raw reply related

* [net-next 03/11] ixgbe: update perfect filter framework to support retaining filters
From: Jeff Kirsher @ 2011-06-24  6:03 UTC (permalink / raw)
  To: davem; +Cc: Alexander Duyck, netdev, gospo, Jeff Kirsher
In-Reply-To: <1308895397-23133-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Alexander Duyck <alexander.h.duyck@intel.com>

This change is meant to update the internal framework of ixgbe so that
perfect filters can be stored and tracked via software.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Ross Brattain <ross.b.brattain@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ixgbe/ixgbe.h       |   18 +-
 drivers/net/ixgbe/ixgbe_82599.c |  603 ++++++++++++++++++++++-----------------
 drivers/net/ixgbe/ixgbe_main.c  |    2 +-
 drivers/net/ixgbe/ixgbe_type.h  |   25 +-
 4 files changed, 368 insertions(+), 280 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe.h b/drivers/net/ixgbe/ixgbe.h
index d5674fc..5ea5b4c 100644
--- a/drivers/net/ixgbe/ixgbe.h
+++ b/drivers/net/ixgbe/ixgbe.h
@@ -543,16 +543,22 @@ extern void ixgbe_alloc_rx_buffers(struct ixgbe_ring *, u16);
 extern void ixgbe_write_eitr(struct ixgbe_q_vector *);
 extern int ethtool_ioctl(struct ifreq *ifr);
 extern s32 ixgbe_reinit_fdir_tables_82599(struct ixgbe_hw *hw);
-extern s32 ixgbe_init_fdir_signature_82599(struct ixgbe_hw *hw, u32 pballoc);
-extern s32 ixgbe_init_fdir_perfect_82599(struct ixgbe_hw *hw, u32 pballoc);
+extern s32 ixgbe_init_fdir_signature_82599(struct ixgbe_hw *hw, u32 fdirctrl);
+extern s32 ixgbe_init_fdir_perfect_82599(struct ixgbe_hw *hw, u32 fdirctrl);
 extern s32 ixgbe_fdir_add_signature_filter_82599(struct ixgbe_hw *hw,
 						 union ixgbe_atr_hash_dword input,
 						 union ixgbe_atr_hash_dword common,
                                                  u8 queue);
-extern s32 ixgbe_fdir_add_perfect_filter_82599(struct ixgbe_hw *hw,
-                                      union ixgbe_atr_input *input,
-                                      struct ixgbe_atr_input_masks *input_masks,
-                                      u16 soft_id, u8 queue);
+extern s32 ixgbe_fdir_set_input_mask_82599(struct ixgbe_hw *hw,
+					   union ixgbe_atr_input *input_mask);
+extern s32 ixgbe_fdir_write_perfect_filter_82599(struct ixgbe_hw *hw,
+						 union ixgbe_atr_input *input,
+						 u16 soft_id, u8 queue);
+extern s32 ixgbe_fdir_erase_perfect_filter_82599(struct ixgbe_hw *hw,
+						 union ixgbe_atr_input *input,
+						 u16 soft_id);
+extern void ixgbe_atr_compute_perfect_hash_82599(union ixgbe_atr_input *input,
+						 union ixgbe_atr_input *mask);
 extern void ixgbe_configure_rscctl(struct ixgbe_adapter *adapter,
                                    struct ixgbe_ring *ring);
 extern void ixgbe_clear_rscctl(struct ixgbe_adapter *adapter,
diff --git a/drivers/net/ixgbe/ixgbe_82599.c b/drivers/net/ixgbe/ixgbe_82599.c
index 4a6826b..3b3dd4d 100644
--- a/drivers/net/ixgbe/ixgbe_82599.c
+++ b/drivers/net/ixgbe/ixgbe_82599.c
@@ -1107,115 +1107,87 @@ s32 ixgbe_reinit_fdir_tables_82599(struct ixgbe_hw *hw)
 }
 
 /**
- *  ixgbe_init_fdir_signature_82599 - Initialize Flow Director signature filters
+ *  ixgbe_set_fdir_rxpba_82599 - Initialize Flow Director Rx packet buffer
  *  @hw: pointer to hardware structure
  *  @pballoc: which mode to allocate filters with
  **/
-s32 ixgbe_init_fdir_signature_82599(struct ixgbe_hw *hw, u32 pballoc)
+static s32 ixgbe_set_fdir_rxpba_82599(struct ixgbe_hw *hw, const u32 pballoc)
 {
-	u32 fdirctrl = 0;
+	u32 fdir_pbsize = hw->mac.rx_pb_size << IXGBE_RXPBSIZE_SHIFT;
+	u32 current_rxpbsize = 0;
 	int i;
 
-	/* Send interrupt when 64 filters are left */
-	fdirctrl |= 4 << IXGBE_FDIRCTRL_FULL_THRESH_SHIFT;
-
-	/* Set the maximum length per hash bucket to 0xA filters */
-	fdirctrl |= 0xA << IXGBE_FDIRCTRL_MAX_LENGTH_SHIFT;
-
+	/* reserve space for Flow Director filters */
 	switch (pballoc) {
-	case IXGBE_FDIR_PBALLOC_64K:
-		/* 8k - 1 signature filters */
-		fdirctrl |= IXGBE_FDIRCTRL_PBALLOC_64K;
+	case IXGBE_FDIR_PBALLOC_256K:
+		fdir_pbsize -= 256 << IXGBE_RXPBSIZE_SHIFT;
 		break;
 	case IXGBE_FDIR_PBALLOC_128K:
-		/* 16k - 1 signature filters */
-		fdirctrl |= IXGBE_FDIRCTRL_PBALLOC_128K;
+		fdir_pbsize -= 128 << IXGBE_RXPBSIZE_SHIFT;
 		break;
-	case IXGBE_FDIR_PBALLOC_256K:
-		/* 32k - 1 signature filters */
-		fdirctrl |= IXGBE_FDIRCTRL_PBALLOC_256K;
+	case IXGBE_FDIR_PBALLOC_64K:
+		fdir_pbsize -= 64 << IXGBE_RXPBSIZE_SHIFT;
 		break;
+	case IXGBE_FDIR_PBALLOC_NONE:
 	default:
-		/* bad value */
-		return IXGBE_ERR_CONFIG;
+		return IXGBE_ERR_PARAM;
 	}
 
-	/* Move the flexible bytes to use the ethertype - shift 6 words */
-	fdirctrl |= (0x6 << IXGBE_FDIRCTRL_FLEX_SHIFT);
+	/* determine current RX packet buffer size */
+	for (i = 0; i < 8; i++)
+		current_rxpbsize += IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
 
+	/* if there is already room for the filters do nothing */
+	if (current_rxpbsize <= fdir_pbsize)
+		return 0;
 
-	/* Prime the keys for hashing */
-	IXGBE_WRITE_REG(hw, IXGBE_FDIRHKEY, IXGBE_ATR_BUCKET_HASH_KEY);
-	IXGBE_WRITE_REG(hw, IXGBE_FDIRSKEY, IXGBE_ATR_SIGNATURE_HASH_KEY);
-
-	/*
-	 * Poll init-done after we write the register.  Estimated times:
-	 *      10G: PBALLOC = 11b, timing is 60us
-	 *       1G: PBALLOC = 11b, timing is 600us
-	 *     100M: PBALLOC = 11b, timing is 6ms
-	 *
-	 *     Multiple these timings by 4 if under full Rx load
-	 *
-	 * So we'll poll for IXGBE_FDIR_INIT_DONE_POLL times, sleeping for
-	 * 1 msec per poll time.  If we're at line rate and drop to 100M, then
-	 * this might not finish in our poll time, but we can live with that
-	 * for now.
-	 */
-	IXGBE_WRITE_REG(hw, IXGBE_FDIRCTRL, fdirctrl);
-	IXGBE_WRITE_FLUSH(hw);
-	for (i = 0; i < IXGBE_FDIR_INIT_DONE_POLL; i++) {
-		if (IXGBE_READ_REG(hw, IXGBE_FDIRCTRL) &
-		                   IXGBE_FDIRCTRL_INIT_DONE)
-			break;
-		usleep_range(1000, 2000);
+	if (current_rxpbsize > hw->mac.rx_pb_size) {
+		/*
+		 * if rxpbsize is greater than max then HW max the Rx buffer
+		 * sizes are unconfigured or misconfigured since HW default is
+		 * to give the full buffer to each traffic class resulting in
+		 * the total size being buffer size 8x actual size
+		 *
+		 * This assumes no DCB since the RXPBSIZE registers appear to
+		 * be unconfigured.
+		 */
+		IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(0), fdir_pbsize);
+		for (i = 1; i < 8; i++)
+			IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), 0);
+	} else {
+		/*
+		 * Since the Rx packet buffer appears to have already been
+		 * configured we need to shrink each packet buffer by enough
+		 * to make room for the filters.  As such we take each rxpbsize
+		 * value and multiply it by a fraction representing the size
+		 * needed over the size we currently have.
+		 *
+		 * We need to reduce fdir_pbsize and current_rxpbsize to
+		 * 1/1024 of their original values in order to avoid
+		 * overflowing the u32 being used to store rxpbsize.
+		 */
+		fdir_pbsize >>= IXGBE_RXPBSIZE_SHIFT;
+		current_rxpbsize >>= IXGBE_RXPBSIZE_SHIFT;
+		for (i = 0; i < 8; i++) {
+			u32 rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
+			rxpbsize *= fdir_pbsize;
+			rxpbsize /= current_rxpbsize;
+			IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
+		}
 	}
-	if (i >= IXGBE_FDIR_INIT_DONE_POLL)
-		hw_dbg(hw, "Flow Director Signature poll time exceeded!\n");
 
 	return 0;
 }
 
 /**
- *  ixgbe_init_fdir_perfect_82599 - Initialize Flow Director perfect filters
+ *  ixgbe_fdir_enable_82599 - Initialize Flow Director control registers
  *  @hw: pointer to hardware structure
- *  @pballoc: which mode to allocate filters with
+ *  @fdirctrl: value to write to flow director control register
  **/
-s32 ixgbe_init_fdir_perfect_82599(struct ixgbe_hw *hw, u32 pballoc)
+static void ixgbe_fdir_enable_82599(struct ixgbe_hw *hw, u32 fdirctrl)
 {
-	u32 fdirctrl = 0;
 	int i;
 
-	/* Send interrupt when 64 filters are left */
-	fdirctrl |= 4 << IXGBE_FDIRCTRL_FULL_THRESH_SHIFT;
-
-	/* Initialize the drop queue to Rx queue 127 */
-	fdirctrl |= (127 << IXGBE_FDIRCTRL_DROP_Q_SHIFT);
-
-	switch (pballoc) {
-	case IXGBE_FDIR_PBALLOC_64K:
-		/* 2k - 1 perfect filters */
-		fdirctrl |= IXGBE_FDIRCTRL_PBALLOC_64K;
-		break;
-	case IXGBE_FDIR_PBALLOC_128K:
-		/* 4k - 1 perfect filters */
-		fdirctrl |= IXGBE_FDIRCTRL_PBALLOC_128K;
-		break;
-	case IXGBE_FDIR_PBALLOC_256K:
-		/* 8k - 1 perfect filters */
-		fdirctrl |= IXGBE_FDIRCTRL_PBALLOC_256K;
-		break;
-	default:
-		/* bad value */
-		return IXGBE_ERR_CONFIG;
-	}
-
-	/* Turn perfect match filtering on */
-	fdirctrl |= IXGBE_FDIRCTRL_PERFECT_MATCH;
-	fdirctrl |= IXGBE_FDIRCTRL_REPORT_STATUS;
-
-	/* Move the flexible bytes to use the ethertype - shift 6 words */
-	fdirctrl |= (0x6 << IXGBE_FDIRCTRL_FLEX_SHIFT);
-
 	/* Prime the keys for hashing */
 	IXGBE_WRITE_REG(hw, IXGBE_FDIRHKEY, IXGBE_ATR_BUCKET_HASH_KEY);
 	IXGBE_WRITE_REG(hw, IXGBE_FDIRSKEY, IXGBE_ATR_SIGNATURE_HASH_KEY);
@@ -1233,10 +1205,6 @@ s32 ixgbe_init_fdir_perfect_82599(struct ixgbe_hw *hw, u32 pballoc)
 	 * this might not finish in our poll time, but we can live with that
 	 * for now.
 	 */
-
-	/* Set the maximum length per hash bucket to 0xA filters */
-	fdirctrl |= (0xA << IXGBE_FDIRCTRL_MAX_LENGTH_SHIFT);
-
 	IXGBE_WRITE_REG(hw, IXGBE_FDIRCTRL, fdirctrl);
 	IXGBE_WRITE_FLUSH(hw);
 	for (i = 0; i < IXGBE_FDIR_INIT_DONE_POLL; i++) {
@@ -1245,101 +1213,77 @@ s32 ixgbe_init_fdir_perfect_82599(struct ixgbe_hw *hw, u32 pballoc)
 			break;
 		usleep_range(1000, 2000);
 	}
-	if (i >= IXGBE_FDIR_INIT_DONE_POLL)
-		hw_dbg(hw, "Flow Director Perfect poll time exceeded!\n");
 
-	return 0;
+	if (i >= IXGBE_FDIR_INIT_DONE_POLL)
+		hw_dbg(hw, "Flow Director poll time exceeded!\n");
 }
 
-
 /**
- *  ixgbe_atr_compute_hash_82599 - Compute the hashes for SW ATR
- *  @stream: input bitstream to compute the hash on
- *  @key: 32-bit hash key
+ *  ixgbe_init_fdir_signature_82599 - Initialize Flow Director signature filters
+ *  @hw: pointer to hardware structure
+ *  @fdirctrl: value to write to flow director control register, initially
+ *             contains just the value of the Rx packet buffer allocation
  **/
-static u32 ixgbe_atr_compute_hash_82599(union ixgbe_atr_input *atr_input,
-					u32 key)
+s32 ixgbe_init_fdir_signature_82599(struct ixgbe_hw *hw, u32 fdirctrl)
 {
-	/*
-	 * The algorithm is as follows:
-	 *    Hash[15:0] = Sum { S[n] x K[n+16] }, n = 0...350
-	 *    where Sum {A[n]}, n = 0...n is bitwise XOR of A[0], A[1]...A[n]
-	 *    and A[n] x B[n] is bitwise AND between same length strings
-	 *
-	 *    K[n] is 16 bits, defined as:
-	 *       for n modulo 32 >= 15, K[n] = K[n % 32 : (n % 32) - 15]
-	 *       for n modulo 32 < 15, K[n] =
-	 *             K[(n % 32:0) | (31:31 - (14 - (n % 32)))]
-	 *
-	 *    S[n] is 16 bits, defined as:
-	 *       for n >= 15, S[n] = S[n:n - 15]
-	 *       for n < 15, S[n] = S[(n:0) | (350:350 - (14 - n))]
-	 *
-	 *    To simplify for programming, the algorithm is implemented
-	 *    in software this way:
-	 *
-	 *    key[31:0], hi_hash_dword[31:0], lo_hash_dword[31:0], hash[15:0]
-	 *
-	 *    for (i = 0; i < 352; i+=32)
-	 *        hi_hash_dword[31:0] ^= Stream[(i+31):i];
-	 *
-	 *    lo_hash_dword[15:0]  ^= Stream[15:0];
-	 *    lo_hash_dword[15:0]  ^= hi_hash_dword[31:16];
-	 *    lo_hash_dword[31:16] ^= hi_hash_dword[15:0];
-	 *
-	 *    hi_hash_dword[31:0]  ^= Stream[351:320];
-	 *
-	 *    if(key[0])
-	 *        hash[15:0] ^= Stream[15:0];
-	 *
-	 *    for (i = 0; i < 16; i++) {
-	 *        if (key[i])
-	 *            hash[15:0] ^= lo_hash_dword[(i+15):i];
-	 *        if (key[i + 16])
-	 *            hash[15:0] ^= hi_hash_dword[(i+15):i];
-	 *    }
-	 *
-	 */
-	__be32 common_hash_dword = 0;
-	u32 hi_hash_dword, lo_hash_dword, flow_vm_vlan;
-	u32 hash_result = 0;
-	u8 i;
+	s32 err;
 
-	/* record the flow_vm_vlan bits as they are a key part to the hash */
-	flow_vm_vlan = ntohl(atr_input->dword_stream[0]);
+	/* Before enabling Flow Director, verify the Rx Packet Buffer size */
+	err = ixgbe_set_fdir_rxpba_82599(hw, fdirctrl);
+	if (err)
+		return err;
 
-	/* generate common hash dword */
-	for (i = 10; i; i -= 2)
-		common_hash_dword ^= atr_input->dword_stream[i] ^
-				     atr_input->dword_stream[i - 1];
+	/*
+	 * Continue setup of fdirctrl register bits:
+	 *  Move the flexible bytes to use the ethertype - shift 6 words
+	 *  Set the maximum length per hash bucket to 0xA filters
+	 *  Send interrupt when 64 filters are left
+	 */
+	fdirctrl |= (0x6 << IXGBE_FDIRCTRL_FLEX_SHIFT) |
+		    (0xA << IXGBE_FDIRCTRL_MAX_LENGTH_SHIFT) |
+		    (4 << IXGBE_FDIRCTRL_FULL_THRESH_SHIFT);
 
-	hi_hash_dword = ntohl(common_hash_dword);
+	/* write hashes and fdirctrl register, poll for completion */
+	ixgbe_fdir_enable_82599(hw, fdirctrl);
 
-	/* low dword is word swapped version of common */
-	lo_hash_dword = (hi_hash_dword >> 16) | (hi_hash_dword << 16);
+	return 0;
+}
 
-	/* apply flow ID/VM pool/VLAN ID bits to hash words */
-	hi_hash_dword ^= flow_vm_vlan ^ (flow_vm_vlan >> 16);
+/**
+ *  ixgbe_init_fdir_perfect_82599 - Initialize Flow Director perfect filters
+ *  @hw: pointer to hardware structure
+ *  @fdirctrl: value to write to flow director control register, initially
+ *             contains just the value of the Rx packet buffer allocation
+ **/
+s32 ixgbe_init_fdir_perfect_82599(struct ixgbe_hw *hw, u32 fdirctrl)
+{
+	s32 err;
 
-	/* Process bits 0 and 16 */
-	if (key & 0x0001) hash_result ^= lo_hash_dword;
-	if (key & 0x00010000) hash_result ^= hi_hash_dword;
+	/* Before enabling Flow Director, verify the Rx Packet Buffer size */
+	err = ixgbe_set_fdir_rxpba_82599(hw, fdirctrl);
+	if (err)
+		return err;
 
 	/*
-	 * apply flow ID/VM pool/VLAN ID bits to lo hash dword, we had to
-	 * delay this because bit 0 of the stream should not be processed
-	 * so we do not add the vlan until after bit 0 was processed
+	 * Continue setup of fdirctrl register bits:
+	 *  Turn perfect match filtering on
+	 *  Report hash in RSS field of Rx wb descriptor
+	 *  Initialize the drop queue
+	 *  Move the flexible bytes to use the ethertype - shift 6 words
+	 *  Set the maximum length per hash bucket to 0xA filters
+	 *  Send interrupt when 64 (0x4 * 16) filters are left
 	 */
-	lo_hash_dword ^= flow_vm_vlan ^ (flow_vm_vlan << 16);
+	fdirctrl |= IXGBE_FDIRCTRL_PERFECT_MATCH |
+		    IXGBE_FDIRCTRL_REPORT_STATUS |
+		    (IXGBE_FDIR_DROP_QUEUE << IXGBE_FDIRCTRL_DROP_Q_SHIFT) |
+		    (0x6 << IXGBE_FDIRCTRL_FLEX_SHIFT) |
+		    (0xA << IXGBE_FDIRCTRL_MAX_LENGTH_SHIFT) |
+		    (4 << IXGBE_FDIRCTRL_FULL_THRESH_SHIFT);
 
+	/* write hashes and fdirctrl register, poll for completion */
+	ixgbe_fdir_enable_82599(hw, fdirctrl);
 
-	/* process the remaining 30 bits in the key 2 bits at a time */
-	for (i = 15; i; i-- ) {
-		if (key & (0x0001 << i)) hash_result ^= lo_hash_dword >> i;
-		if (key & (0x00010000 << i)) hash_result ^= hi_hash_dword >> i;
-	}
-
-	return hash_result & IXGBE_ATR_HASH_MASK;
+	return 0;
 }
 
 /*
@@ -1476,7 +1420,6 @@ s32 ixgbe_fdir_add_signature_filter_82599(struct ixgbe_hw *hw,
 	 */
 	fdirhashcmd = (u64)fdircmd << 32;
 	fdirhashcmd |= ixgbe_atr_compute_sig_hash_82599(input, common);
-
 	IXGBE_WRITE_REG64(hw, IXGBE_FDIRHASH, fdirhashcmd);
 
 	hw_dbg(hw, "Tx Queue=%x hash=%x\n", queue, (u32)fdirhashcmd);
@@ -1484,6 +1427,101 @@ s32 ixgbe_fdir_add_signature_filter_82599(struct ixgbe_hw *hw,
 	return 0;
 }
 
+#define IXGBE_COMPUTE_BKT_HASH_ITERATION(_n) \
+do { \
+	u32 n = (_n); \
+	if (IXGBE_ATR_BUCKET_HASH_KEY & (0x01 << n)) \
+		bucket_hash ^= lo_hash_dword >> n; \
+	if (IXGBE_ATR_BUCKET_HASH_KEY & (0x01 << (n + 16))) \
+		bucket_hash ^= hi_hash_dword >> n; \
+} while (0);
+
+/**
+ *  ixgbe_atr_compute_perfect_hash_82599 - Compute the perfect filter hash
+ *  @atr_input: input bitstream to compute the hash on
+ *  @input_mask: mask for the input bitstream
+ *
+ *  This function serves two main purposes.  First it applys the input_mask
+ *  to the atr_input resulting in a cleaned up atr_input data stream.
+ *  Secondly it computes the hash and stores it in the bkt_hash field at
+ *  the end of the input byte stream.  This way it will be available for
+ *  future use without needing to recompute the hash.
+ **/
+void ixgbe_atr_compute_perfect_hash_82599(union ixgbe_atr_input *input,
+					  union ixgbe_atr_input *input_mask)
+{
+
+	u32 hi_hash_dword, lo_hash_dword, flow_vm_vlan;
+	u32 bucket_hash = 0;
+
+	/* Apply masks to input data */
+	input->dword_stream[0]  &= input_mask->dword_stream[0];
+	input->dword_stream[1]  &= input_mask->dword_stream[1];
+	input->dword_stream[2]  &= input_mask->dword_stream[2];
+	input->dword_stream[3]  &= input_mask->dword_stream[3];
+	input->dword_stream[4]  &= input_mask->dword_stream[4];
+	input->dword_stream[5]  &= input_mask->dword_stream[5];
+	input->dword_stream[6]  &= input_mask->dword_stream[6];
+	input->dword_stream[7]  &= input_mask->dword_stream[7];
+	input->dword_stream[8]  &= input_mask->dword_stream[8];
+	input->dword_stream[9]  &= input_mask->dword_stream[9];
+	input->dword_stream[10] &= input_mask->dword_stream[10];
+
+	/* record the flow_vm_vlan bits as they are a key part to the hash */
+	flow_vm_vlan = ntohl(input->dword_stream[0]);
+
+	/* generate common hash dword */
+	hi_hash_dword = ntohl(input->dword_stream[1] ^
+				    input->dword_stream[2] ^
+				    input->dword_stream[3] ^
+				    input->dword_stream[4] ^
+				    input->dword_stream[5] ^
+				    input->dword_stream[6] ^
+				    input->dword_stream[7] ^
+				    input->dword_stream[8] ^
+				    input->dword_stream[9] ^
+				    input->dword_stream[10]);
+
+	/* low dword is word swapped version of common */
+	lo_hash_dword = (hi_hash_dword >> 16) | (hi_hash_dword << 16);
+
+	/* apply flow ID/VM pool/VLAN ID bits to hash words */
+	hi_hash_dword ^= flow_vm_vlan ^ (flow_vm_vlan >> 16);
+
+	/* Process bits 0 and 16 */
+	IXGBE_COMPUTE_BKT_HASH_ITERATION(0);
+
+	/*
+	 * apply flow ID/VM pool/VLAN ID bits to lo hash dword, we had to
+	 * delay this because bit 0 of the stream should not be processed
+	 * so we do not add the vlan until after bit 0 was processed
+	 */
+	lo_hash_dword ^= flow_vm_vlan ^ (flow_vm_vlan << 16);
+
+	/* Process remaining 30 bit of the key */
+	IXGBE_COMPUTE_BKT_HASH_ITERATION(1);
+	IXGBE_COMPUTE_BKT_HASH_ITERATION(2);
+	IXGBE_COMPUTE_BKT_HASH_ITERATION(3);
+	IXGBE_COMPUTE_BKT_HASH_ITERATION(4);
+	IXGBE_COMPUTE_BKT_HASH_ITERATION(5);
+	IXGBE_COMPUTE_BKT_HASH_ITERATION(6);
+	IXGBE_COMPUTE_BKT_HASH_ITERATION(7);
+	IXGBE_COMPUTE_BKT_HASH_ITERATION(8);
+	IXGBE_COMPUTE_BKT_HASH_ITERATION(9);
+	IXGBE_COMPUTE_BKT_HASH_ITERATION(10);
+	IXGBE_COMPUTE_BKT_HASH_ITERATION(11);
+	IXGBE_COMPUTE_BKT_HASH_ITERATION(12);
+	IXGBE_COMPUTE_BKT_HASH_ITERATION(13);
+	IXGBE_COMPUTE_BKT_HASH_ITERATION(14);
+	IXGBE_COMPUTE_BKT_HASH_ITERATION(15);
+
+	/*
+	 * Limit hash to 13 bits since max bucket count is 8K.
+	 * Store result at the end of the input stream.
+	 */
+	input->formatted.bkt_hash = bucket_hash & 0x1FFF;
+}
+
 /**
  *  ixgbe_get_fdirtcpm_82599 - generate a tcp port from atr_input_masks
  *  @input_mask: mask to be bit swapped
@@ -1493,11 +1531,11 @@ s32 ixgbe_fdir_add_signature_filter_82599(struct ixgbe_hw *hw,
  *  generate a correctly swapped value we need to bit swap the mask and that
  *  is what is accomplished by this function.
  **/
-static u32 ixgbe_get_fdirtcpm_82599(struct ixgbe_atr_input_masks *input_masks)
+static u32 ixgbe_get_fdirtcpm_82599(union ixgbe_atr_input *input_mask)
 {
-	u32 mask = ntohs(input_masks->dst_port_mask);
+	u32 mask = ntohs(input_mask->formatted.dst_port);
 	mask <<= IXGBE_FDIRTCPM_DPORTM_SHIFT;
-	mask |= ntohs(input_masks->src_port_mask);
+	mask |= ntohs(input_mask->formatted.src_port);
 	mask = ((mask & 0x55555555) << 1) | ((mask & 0xAAAAAAAA) >> 1);
 	mask = ((mask & 0x33333333) << 2) | ((mask & 0xCCCCCCCC) >> 2);
 	mask = ((mask & 0x0F0F0F0F) << 4) | ((mask & 0xF0F0F0F0) >> 4);
@@ -1519,52 +1557,14 @@ static u32 ixgbe_get_fdirtcpm_82599(struct ixgbe_atr_input_masks *input_masks)
 	IXGBE_WRITE_REG((a), (reg), IXGBE_STORE_AS_BE32(ntohl(value)))
 
 #define IXGBE_STORE_AS_BE16(_value) \
-	(((u16)(_value) >> 8) | ((u16)(_value) << 8))
+	ntohs(((u16)(_value) >> 8) | ((u16)(_value) << 8))
 
-/**
- *  ixgbe_fdir_add_perfect_filter_82599 - Adds a perfect filter
- *  @hw: pointer to hardware structure
- *  @input: input bitstream
- *  @input_masks: bitwise masks for relevant fields
- *  @soft_id: software index into the silicon hash tables for filter storage
- *  @queue: queue index to direct traffic to
- *
- *  Note that the caller to this function must lock before calling, since the
- *  hardware writes must be protected from one another.
- **/
-s32 ixgbe_fdir_add_perfect_filter_82599(struct ixgbe_hw *hw,
-                                      union ixgbe_atr_input *input,
-                                      struct ixgbe_atr_input_masks *input_masks,
-                                      u16 soft_id, u8 queue)
+s32 ixgbe_fdir_set_input_mask_82599(struct ixgbe_hw *hw,
+				    union ixgbe_atr_input *input_mask)
 {
-	u32 fdirhash;
-	u32 fdircmd;
-	u32 fdirport, fdirtcpm;
-	u32 fdirvlan;
-	/* start with VLAN, flex bytes, VM pool, and IPv6 destination masked */
-	u32 fdirm = IXGBE_FDIRM_VLANID | IXGBE_FDIRM_VLANP | IXGBE_FDIRM_FLEX |
-		    IXGBE_FDIRM_POOL | IXGBE_FDIRM_DIPv6;
-
-	/*
-	 * Check flow_type formatting, and bail out before we touch the hardware
-	 * if there's a configuration issue
-	 */
-	switch (input->formatted.flow_type) {
-	case IXGBE_ATR_FLOW_TYPE_IPV4:
-		/* use the L4 protocol mask for raw IPv4/IPv6 traffic */
-		fdirm |= IXGBE_FDIRM_L4P;
-	case IXGBE_ATR_FLOW_TYPE_SCTPV4:
-		if (input_masks->dst_port_mask || input_masks->src_port_mask) {
-			hw_dbg(hw, " Error on src/dst port mask\n");
-			return IXGBE_ERR_CONFIG;
-		}
-	case IXGBE_ATR_FLOW_TYPE_TCPV4:
-	case IXGBE_ATR_FLOW_TYPE_UDPV4:
-		break;
-	default:
-		hw_dbg(hw, " Error on flow type input\n");
-		return IXGBE_ERR_CONFIG;
-	}
+	/* mask IPv6 since it is currently not supported */
+	u32 fdirm = IXGBE_FDIRM_DIPv6;
+	u32 fdirtcpm;
 
 	/*
 	 * Program the relevant mask registers.  If src/dst_port or src/dst_addr
@@ -1576,41 +1576,71 @@ s32 ixgbe_fdir_add_perfect_filter_82599(struct ixgbe_hw *hw,
 	 * point in time.
 	 */
 
-	/* Program FDIRM */
-	switch (ntohs(input_masks->vlan_id_mask) & 0xEFFF) {
-	case 0xEFFF:
-		/* Unmask VLAN ID - bit 0 and fall through to unmask prio */
-		fdirm &= ~IXGBE_FDIRM_VLANID;
-	case 0xE000:
-		/* Unmask VLAN prio - bit 1 */
-		fdirm &= ~IXGBE_FDIRM_VLANP;
+	/* verify bucket hash is cleared on hash generation */
+	if (input_mask->formatted.bkt_hash)
+		hw_dbg(hw, " bucket hash should always be 0 in mask\n");
+
+	/* Program FDIRM and verify partial masks */
+	switch (input_mask->formatted.vm_pool & 0x7F) {
+	case 0x0:
+		fdirm |= IXGBE_FDIRM_POOL;
+	case 0x7F:
 		break;
-	case 0x0FFF:
-		/* Unmask VLAN ID - bit 0 */
-		fdirm &= ~IXGBE_FDIRM_VLANID;
+	default:
+		hw_dbg(hw, " Error on vm pool mask\n");
+		return IXGBE_ERR_CONFIG;
+	}
+
+	switch (input_mask->formatted.flow_type & IXGBE_ATR_L4TYPE_MASK) {
+	case 0x0:
+		fdirm |= IXGBE_FDIRM_L4P;
+		if (input_mask->formatted.dst_port ||
+		    input_mask->formatted.src_port) {
+			hw_dbg(hw, " Error on src/dst port mask\n");
+			return IXGBE_ERR_CONFIG;
+		}
+	case IXGBE_ATR_L4TYPE_MASK:
 		break;
+	default:
+		hw_dbg(hw, " Error on flow type mask\n");
+		return IXGBE_ERR_CONFIG;
+	}
+
+	switch (ntohs(input_mask->formatted.vlan_id) & 0xEFFF) {
 	case 0x0000:
-		/* do nothing, vlans already masked */
+		/* mask VLAN ID, fall through to mask VLAN priority */
+		fdirm |= IXGBE_FDIRM_VLANID;
+	case 0x0FFF:
+		/* mask VLAN priority */
+		fdirm |= IXGBE_FDIRM_VLANP;
+		break;
+	case 0xE000:
+		/* mask VLAN ID only, fall through */
+		fdirm |= IXGBE_FDIRM_VLANID;
+	case 0xEFFF:
+		/* no VLAN fields masked */
 		break;
 	default:
 		hw_dbg(hw, " Error on VLAN mask\n");
 		return IXGBE_ERR_CONFIG;
 	}
 
-	if (input_masks->flex_mask & 0xFFFF) {
-		if ((input_masks->flex_mask & 0xFFFF) != 0xFFFF) {
-			hw_dbg(hw, " Error on flexible byte mask\n");
-			return IXGBE_ERR_CONFIG;
-		}
-		/* Unmask Flex Bytes - bit 4 */
-		fdirm &= ~IXGBE_FDIRM_FLEX;
+	switch (input_mask->formatted.flex_bytes & 0xFFFF) {
+	case 0x0000:
+		/* Mask Flex Bytes, fall through */
+		fdirm |= IXGBE_FDIRM_FLEX;
+	case 0xFFFF:
+		break;
+	default:
+		hw_dbg(hw, " Error on flexible byte mask\n");
+		return IXGBE_ERR_CONFIG;
 	}
 
 	/* Now mask VM pool and destination IPv6 - bits 5 and 2 */
 	IXGBE_WRITE_REG(hw, IXGBE_FDIRM, fdirm);
 
 	/* store the TCP/UDP port masks, bit reversed from port layout */
-	fdirtcpm = ixgbe_get_fdirtcpm_82599(input_masks);
+	fdirtcpm = ixgbe_get_fdirtcpm_82599(input_mask);
 
 	/* write both the same so that UDP and TCP use the same mask */
 	IXGBE_WRITE_REG(hw, IXGBE_FDIRTCPM, ~fdirtcpm);
@@ -1618,24 +1648,32 @@ s32 ixgbe_fdir_add_perfect_filter_82599(struct ixgbe_hw *hw,
 
 	/* store source and destination IP masks (big-enian) */
 	IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRSIP4M,
-			     ~input_masks->src_ip_mask[0]);
+			     ~input_mask->formatted.src_ip[0]);
 	IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRDIP4M,
-			     ~input_masks->dst_ip_mask[0]);
+			     ~input_mask->formatted.dst_ip[0]);
 
-	/* Apply masks to input data */
-	input->formatted.vlan_id &= input_masks->vlan_id_mask;
-	input->formatted.flex_bytes &= input_masks->flex_mask;
-	input->formatted.src_port &= input_masks->src_port_mask;
-	input->formatted.dst_port &= input_masks->dst_port_mask;
-	input->formatted.src_ip[0] &= input_masks->src_ip_mask[0];
-	input->formatted.dst_ip[0] &= input_masks->dst_ip_mask[0];
+	return 0;
+}
 
-	/* record vlan (little-endian) and flex_bytes(big-endian) */
-	fdirvlan =
-		IXGBE_STORE_AS_BE16(ntohs(input->formatted.flex_bytes));
-	fdirvlan <<= IXGBE_FDIRVLAN_FLEX_SHIFT;
-	fdirvlan |= ntohs(input->formatted.vlan_id);
-	IXGBE_WRITE_REG(hw, IXGBE_FDIRVLAN, fdirvlan);
+s32 ixgbe_fdir_write_perfect_filter_82599(struct ixgbe_hw *hw,
+					  union ixgbe_atr_input *input,
+					  u16 soft_id, u8 queue)
+{
+	u32 fdirport, fdirvlan, fdirhash, fdircmd;
+
+	/* currently IPv6 is not supported, must be programmed with 0 */
+	IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRSIPv6(0),
+			     input->formatted.src_ip[0]);
+	IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRSIPv6(1),
+			     input->formatted.src_ip[1]);
+	IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRSIPv6(2),
+			     input->formatted.src_ip[2]);
+
+	/* record the source address (big-endian) */
+	IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRIPSA, input->formatted.src_ip[0]);
+
+	/* record the first 32 bits of the destination address (big-endian) */
+	IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRIPDA, input->formatted.dst_ip[0]);
 
 	/* record source and destination port (little-endian)*/
 	fdirport = ntohs(input->formatted.dst_port);
@@ -1643,29 +1681,80 @@ s32 ixgbe_fdir_add_perfect_filter_82599(struct ixgbe_hw *hw,
 	fdirport |= ntohs(input->formatted.src_port);
 	IXGBE_WRITE_REG(hw, IXGBE_FDIRPORT, fdirport);
 
-	/* record the first 32 bits of the destination address (big-endian) */
-	IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRIPDA, input->formatted.dst_ip[0]);
+	/* record vlan (little-endian) and flex_bytes(big-endian) */
+	fdirvlan = IXGBE_STORE_AS_BE16(input->formatted.flex_bytes);
+	fdirvlan <<= IXGBE_FDIRVLAN_FLEX_SHIFT;
+	fdirvlan |= ntohs(input->formatted.vlan_id);
+	IXGBE_WRITE_REG(hw, IXGBE_FDIRVLAN, fdirvlan);
 
-	/* record the source address (big-endian) */
-	IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRIPSA, input->formatted.src_ip[0]);
+	/* configure FDIRHASH register */
+	fdirhash = input->formatted.bkt_hash;
+	fdirhash |= soft_id << IXGBE_FDIRHASH_SIG_SW_INDEX_SHIFT;
+	IXGBE_WRITE_REG(hw, IXGBE_FDIRHASH, fdirhash);
+
+	/*
+	 * flush all previous writes to make certain registers are
+	 * programmed prior to issuing the command
+	 */
+	IXGBE_WRITE_FLUSH(hw);
 
 	/* configure FDIRCMD register */
 	fdircmd = IXGBE_FDIRCMD_CMD_ADD_FLOW | IXGBE_FDIRCMD_FILTER_UPDATE |
 		  IXGBE_FDIRCMD_LAST | IXGBE_FDIRCMD_QUEUE_EN;
+	if (queue == IXGBE_FDIR_DROP_QUEUE)
+		fdircmd |= IXGBE_FDIRCMD_DROP;
 	fdircmd |= input->formatted.flow_type << IXGBE_FDIRCMD_FLOW_TYPE_SHIFT;
 	fdircmd |= (u32)queue << IXGBE_FDIRCMD_RX_QUEUE_SHIFT;
+	fdircmd |= (u32)input->formatted.vm_pool << IXGBE_FDIRCMD_VT_POOL_SHIFT;
 
-	/* we only want the bucket hash so drop the upper 16 bits */
-	fdirhash = ixgbe_atr_compute_hash_82599(input,
-						IXGBE_ATR_BUCKET_HASH_KEY);
-	fdirhash |= soft_id << IXGBE_FDIRHASH_SIG_SW_INDEX_SHIFT;
-
-	IXGBE_WRITE_REG(hw, IXGBE_FDIRHASH, fdirhash);
 	IXGBE_WRITE_REG(hw, IXGBE_FDIRCMD, fdircmd);
 
 	return 0;
 }
 
+s32 ixgbe_fdir_erase_perfect_filter_82599(struct ixgbe_hw *hw,
+					  union ixgbe_atr_input *input,
+					  u16 soft_id)
+{
+	u32 fdirhash;
+	u32 fdircmd = 0;
+	u32 retry_count;
+	s32 err = 0;
+
+	/* configure FDIRHASH register */
+	fdirhash = input->formatted.bkt_hash;
+	fdirhash |= soft_id << IXGBE_FDIRHASH_SIG_SW_INDEX_SHIFT;
+	IXGBE_WRITE_REG(hw, IXGBE_FDIRHASH, fdirhash);
+
+	/* flush hash to HW */
+	IXGBE_WRITE_FLUSH(hw);
+
+	/* Query if filter is present */
+	IXGBE_WRITE_REG(hw, IXGBE_FDIRCMD, IXGBE_FDIRCMD_CMD_QUERY_REM_FILT);
+
+	for (retry_count = 10; retry_count; retry_count--) {
+		/* allow 10us for query to process */
+		udelay(10);
+		/* verify query completed successfully */
+		fdircmd = IXGBE_READ_REG(hw, IXGBE_FDIRCMD);
+		if (!(fdircmd & IXGBE_FDIRCMD_CMD_MASK))
+			break;
+	}
+
+	if (!retry_count)
+		err = IXGBE_ERR_FDIR_REINIT_FAILED;
+
+	/* if filter exists in hardware then remove it */
+	if (fdircmd & IXGBE_FDIRCMD_FILTER_VALID) {
+		IXGBE_WRITE_REG(hw, IXGBE_FDIRHASH, fdirhash);
+		IXGBE_WRITE_FLUSH(hw);
+		IXGBE_WRITE_REG(hw, IXGBE_FDIRCMD,
+				IXGBE_FDIRCMD_CMD_REMOVE_FLOW);
+	}
+
+	return err;
+}
+
 /**
  *  ixgbe_read_analog_reg8_82599 - Reads 8 bit Omer analog register
  *  @hw: pointer to hardware structure
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index 5483b9c..e177b5d 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -5130,7 +5130,7 @@ static int __devinit ixgbe_sw_init(struct ixgbe_adapter *adapter)
 		adapter->atr_sample_rate = 20;
 		adapter->ring_feature[RING_F_FDIR].indices =
 							 IXGBE_MAX_FDIR_INDICES;
-		adapter->fdir_pballoc = 0;
+		adapter->fdir_pballoc = IXGBE_FDIR_PBALLOC_64K;
 #ifdef IXGBE_FCOE
 		adapter->flags |= IXGBE_FLAG_FCOE_CAPABLE;
 		adapter->flags &= ~IXGBE_FLAG_FCOE_ENABLED;
diff --git a/drivers/net/ixgbe/ixgbe_type.h b/drivers/net/ixgbe/ixgbe_type.h
index 9a499a6..8b1abd4 100644
--- a/drivers/net/ixgbe/ixgbe_type.h
+++ b/drivers/net/ixgbe/ixgbe_type.h
@@ -2056,9 +2056,10 @@ enum {
 #define IXGBE_VFLREC(_i)                 (0x00700 + (_i * 4))
 
 enum ixgbe_fdir_pballoc_type {
-	IXGBE_FDIR_PBALLOC_64K = 0,
-	IXGBE_FDIR_PBALLOC_128K,
-	IXGBE_FDIR_PBALLOC_256K,
+	IXGBE_FDIR_PBALLOC_NONE = 0,
+	IXGBE_FDIR_PBALLOC_64K  = 1,
+	IXGBE_FDIR_PBALLOC_128K = 2,
+	IXGBE_FDIR_PBALLOC_256K = 3,
 };
 #define IXGBE_FDIR_PBALLOC_SIZE_SHIFT           16
 
@@ -2112,7 +2113,7 @@ enum ixgbe_fdir_pballoc_type {
 #define IXGBE_FDIRCMD_CMD_ADD_FLOW              0x00000001
 #define IXGBE_FDIRCMD_CMD_REMOVE_FLOW           0x00000002
 #define IXGBE_FDIRCMD_CMD_QUERY_REM_FILT        0x00000003
-#define IXGBE_FDIRCMD_CMD_QUERY_REM_HASH        0x00000007
+#define IXGBE_FDIRCMD_FILTER_VALID              0x00000004
 #define IXGBE_FDIRCMD_FILTER_UPDATE             0x00000008
 #define IXGBE_FDIRCMD_IPv6DMATCH                0x00000010
 #define IXGBE_FDIRCMD_L4TYPE_UDP                0x00000020
@@ -2131,6 +2132,8 @@ enum ixgbe_fdir_pballoc_type {
 #define IXGBE_FDIR_INIT_DONE_POLL               10
 #define IXGBE_FDIRCMD_CMD_POLL                  10
 
+#define IXGBE_FDIR_DROP_QUEUE                   127
+
 /* Manageablility Host Interface defines */
 #define IXGBE_HI_MAX_BLOCK_BYTE_LENGTH       1792 /* Num of bytes in range */
 #define IXGBE_HI_MAX_BLOCK_DWORD_LENGTH      448 /* Num of dwords in range */
@@ -2350,7 +2353,7 @@ union ixgbe_atr_input {
 	 * src_port   - 2 bytes
 	 * dst_port   - 2 bytes
 	 * flex_bytes - 2 bytes
-	 * rsvd0      - 2 bytes - space reserved must be 0.
+	 * bkt_hash   - 2 bytes
 	 */
 	struct {
 		u8     vm_pool;
@@ -2361,7 +2364,7 @@ union ixgbe_atr_input {
 		__be16 src_port;
 		__be16 dst_port;
 		__be16 flex_bytes;
-		__be16 rsvd0;
+		__be16 bkt_hash;
 	} formatted;
 	__be32 dword_stream[11];
 };
@@ -2382,16 +2385,6 @@ union ixgbe_atr_hash_dword {
 	__be32 dword;
 };
 
-struct ixgbe_atr_input_masks {
-	__be16 rsvd0;
-	__be16 vlan_id_mask;
-	__be32 dst_ip_mask[4];
-	__be32 src_ip_mask[4];
-	__be16 src_port_mask;
-	__be16 dst_port_mask;
-	__be16 flex_mask;
-};
-
 enum ixgbe_eeprom_type {
 	ixgbe_eeprom_uninitialized = 0,
 	ixgbe_eeprom_spi,
-- 
1.7.5.4


^ permalink raw reply related

* [net-next 04/11] ixgbe: add basic support for setting and getting nfc controls
From: Jeff Kirsher @ 2011-06-24  6:03 UTC (permalink / raw)
  To: davem; +Cc: Alexander Duyck, netdev, gospo, Jeff Kirsher
In-Reply-To: <1308895397-23133-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Alexander Duyck <alexander.h.duyck@intel.com>

This change adds basic support for the obtaining of RSS ring counts.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Ross Brattain <ross.b.brattain@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ixgbe/ixgbe_ethtool.c |   19 +++++++++++++++++++
 1 files changed, 19 insertions(+), 0 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_ethtool.c b/drivers/net/ixgbe/ixgbe_ethtool.c
index a2d8ed5..8373244 100644
--- a/drivers/net/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ixgbe/ixgbe_ethtool.c
@@ -2336,6 +2336,24 @@ static int ixgbe_set_flags(struct net_device *netdev, u32 data)
 	return 0;
 }
 
+static int ixgbe_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
+			   void *rule_locs)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(dev);
+	int ret = -EOPNOTSUPP;
+
+	switch (cmd->cmd) {
+	case ETHTOOL_GRXRINGS:
+		cmd->data = adapter->num_rx_queues;
+		ret = 0;
+		break;
+	default:
+		break;
+	}
+
+	return ret;
+}
+
 static const struct ethtool_ops ixgbe_ethtool_ops = {
 	.get_settings           = ixgbe_get_settings,
 	.set_settings           = ixgbe_set_settings,
@@ -2371,6 +2389,7 @@ static const struct ethtool_ops ixgbe_ethtool_ops = {
 	.set_coalesce           = ixgbe_set_coalesce,
 	.get_flags              = ethtool_op_get_flags,
 	.set_flags              = ixgbe_set_flags,
+	.get_rxnfc		= ixgbe_get_rxnfc,
 };
 
 void ixgbe_set_ethtool_ops(struct net_device *netdev)
-- 
1.7.5.4


^ permalink raw reply related

* [net-next 02/11] ixgbe: fix flags relating to perfect filters to support coexistence
From: Jeff Kirsher @ 2011-06-24  6:03 UTC (permalink / raw)
  To: davem; +Cc: Alexander Duyck, netdev, gospo, Jeff Kirsher
In-Reply-To: <1308895397-23133-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Alexander Duyck <alexander.h.duyck@intel.com>

I am removing the requirement that Ntuple filters have the same
number of queues and requirements as ATR.  As a result this change will
make it so that all the Ntuple flag does is disable ATR for now.

This change fixes an issue in which we were incorrectly re-enabling ATR
when we exited perfect filter mode.  This was due to the fact that the
logic assumed RSS and DCB were mutually exclusive which is no longer the
case.

To correct this we just need to add a check to guarantee DCB is disabled
before re-enabling ATR.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Ross Brattain <ross.b.brattain@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ixgbe/ixgbe_dcb_nl.c  |   13 ++++++-------
 drivers/net/ixgbe/ixgbe_ethtool.c |   24 ++++++++++++------------
 drivers/net/ixgbe/ixgbe_main.c    |   34 ++++++++++++----------------------
 3 files changed, 30 insertions(+), 41 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_dcb_nl.c b/drivers/net/ixgbe/ixgbe_dcb_nl.c
index 08c7aeb..bd2d752 100644
--- a/drivers/net/ixgbe/ixgbe_dcb_nl.c
+++ b/drivers/net/ixgbe/ixgbe_dcb_nl.c
@@ -114,11 +114,12 @@ static u8 ixgbe_dcbnl_set_state(struct net_device *netdev, u8 state)
 	u8 err = 0;
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 
+	/* verify there is something to do, if not then exit */
+	if (!!state != !(adapter->flags & IXGBE_FLAG_DCB_ENABLED))
+		return err;
+
 	if (state > 0) {
 		/* Turn on DCB */
-		if (adapter->flags & IXGBE_FLAG_DCB_ENABLED)
-			goto out;
-
 		if (!(adapter->flags & IXGBE_FLAG_MSIX_ENABLED)) {
 			e_err(drv, "Enable failed, needs MSI-X\n");
 			err = 1;
@@ -143,9 +144,6 @@ static u8 ixgbe_dcbnl_set_state(struct net_device *netdev, u8 state)
 		ixgbe_setup_tc(netdev, MAX_TRAFFIC_CLASS);
 	} else {
 		/* Turn off DCB */
-		if (!(adapter->flags & IXGBE_FLAG_DCB_ENABLED))
-			goto out;
-
 		adapter->hw.fc.requested_mode = adapter->last_lfc_mode;
 		adapter->temp_dcb_cfg.pfc_mode_enable = false;
 		adapter->dcb_cfg.pfc_mode_enable = false;
@@ -153,7 +151,8 @@ static u8 ixgbe_dcbnl_set_state(struct net_device *netdev, u8 state)
 		switch (adapter->hw.mac.type) {
 		case ixgbe_mac_82599EB:
 		case ixgbe_mac_X540:
-			adapter->flags |= IXGBE_FLAG_FDIR_HASH_CAPABLE;
+			if (!(adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE))
+				adapter->flags |= IXGBE_FLAG_FDIR_HASH_CAPABLE;
 			break;
 		default:
 			break;
diff --git a/drivers/net/ixgbe/ixgbe_ethtool.c b/drivers/net/ixgbe/ixgbe_ethtool.c
index e41dd24..a2d8ed5 100644
--- a/drivers/net/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ixgbe/ixgbe_ethtool.c
@@ -2310,20 +2310,20 @@ static int ixgbe_set_flags(struct net_device *netdev, u32 data)
 	 * Check if Flow Director n-tuple support was enabled or disabled.  If
 	 * the state changed, we need to reset.
 	 */
-	if ((adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE) &&
-	    (!(data & ETH_FLAG_NTUPLE))) {
-		/* turn off Flow Director perfect, set hash and reset */
+	if (!(adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE)) {
+		/* turn off ATR, enable perfect filters and reset */
+		if (data & ETH_FLAG_NTUPLE) {
+			adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE;
+			adapter->flags |= IXGBE_FLAG_FDIR_PERFECT_CAPABLE;
+			need_reset = true;
+		}
+	} else if (!(data & ETH_FLAG_NTUPLE)) {
+		/* turn off Flow Director, set ATR and reset */
 		adapter->flags &= ~IXGBE_FLAG_FDIR_PERFECT_CAPABLE;
-		adapter->flags |= IXGBE_FLAG_FDIR_HASH_CAPABLE;
-		need_reset = true;
-	} else if ((!(adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE)) &&
-	           (data & ETH_FLAG_NTUPLE)) {
-		/* turn off Flow Director hash, enable perfect and reset */
-		adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE;
-		adapter->flags |= IXGBE_FLAG_FDIR_PERFECT_CAPABLE;
+		if ((adapter->flags & IXGBE_FLAG_RSS_ENABLED) &&
+		    !(adapter->flags & IXGBE_FLAG_DCB_ENABLED))
+			adapter->flags |= IXGBE_FLAG_FDIR_HASH_CAPABLE;
 		need_reset = true;
-	} else {
-		/* no state change */
 	}
 
 	if (need_reset) {
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index 4cd66ae..5483b9c 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -1555,9 +1555,8 @@ static void ixgbe_configure_msix(struct ixgbe_adapter *adapter)
 			q_vector->eitr = adapter->rx_eitr_param;
 
 		ixgbe_write_eitr(q_vector);
-		/* If Flow Director is enabled, set interrupt affinity */
-		if ((adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE) ||
-		    (adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE)) {
+		/* If ATR is enabled, set interrupt affinity */
+		if (adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE) {
 			/*
 			 * Allocate the affinity_hint cpumask, assign the mask
 			 * for this vector, and set our affinity_hint for
@@ -2468,8 +2467,7 @@ static inline void ixgbe_irq_enable(struct ixgbe_adapter *adapter, bool queues,
 	default:
 		break;
 	}
-	if (adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE ||
-	    adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE)
+	if (adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE)
 		mask |= IXGBE_EIMS_FLOW_DIR;
 
 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, mask);
@@ -3767,8 +3765,6 @@ static void ixgbe_configure(struct ixgbe_adapter *adapter)
 			adapter->tx_ring[i]->atr_sample_rate =
 						       adapter->atr_sample_rate;
 		ixgbe_init_fdir_signature_82599(hw, adapter->fdir_pballoc);
-	} else if (adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE) {
-		ixgbe_init_fdir_perfect_82599(hw, adapter->fdir_pballoc);
 	}
 	ixgbe_configure_virtualization(adapter);
 
@@ -4334,15 +4330,13 @@ static inline bool ixgbe_set_fdir_queues(struct ixgbe_adapter *adapter)
 	f_fdir->mask = 0;
 
 	/* Flow Director must have RSS enabled */
-	if (adapter->flags & IXGBE_FLAG_RSS_ENABLED &&
-	    ((adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE ||
-	     (adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE)))) {
+	if ((adapter->flags & IXGBE_FLAG_RSS_ENABLED) &&
+	    (adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE)) {
 		adapter->num_tx_queues = f_fdir->indices;
 		adapter->num_rx_queues = f_fdir->indices;
 		ret = true;
 	} else {
 		adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE;
-		adapter->flags &= ~IXGBE_FLAG_FDIR_PERFECT_CAPABLE;
 	}
 	return ret;
 }
@@ -4372,12 +4366,12 @@ static inline bool ixgbe_set_fcoe_queues(struct ixgbe_adapter *adapter)
 
 	if (adapter->flags & IXGBE_FLAG_RSS_ENABLED) {
 		e_info(probe, "FCoE enabled with RSS\n");
-		if ((adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE) ||
-		    (adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE))
+		if (adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE)
 			ixgbe_set_fdir_queues(adapter);
 		else
 			ixgbe_set_rss_queues(adapter);
 	}
+
 	/* adding FCoE rx rings to the end */
 	f->mask = adapter->num_rx_queues;
 	adapter->num_rx_queues += f->indices;
@@ -4670,9 +4664,8 @@ static inline bool ixgbe_cache_ring_fdir(struct ixgbe_adapter *adapter)
 	int i;
 	bool ret = false;
 
-	if (adapter->flags & IXGBE_FLAG_RSS_ENABLED &&
-	    ((adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE) ||
-	     (adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE))) {
+	if ((adapter->flags & IXGBE_FLAG_RSS_ENABLED) &&
+	    (adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE)) {
 		for (i = 0; i < adapter->num_rx_queues; i++)
 			adapter->rx_ring[i]->reg_idx = i;
 		for (i = 0; i < adapter->num_tx_queues; i++)
@@ -4701,8 +4694,7 @@ static inline bool ixgbe_cache_ring_fcoe(struct ixgbe_adapter *adapter)
 		return false;
 
 	if (adapter->flags & IXGBE_FLAG_RSS_ENABLED) {
-		if ((adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE) ||
-		    (adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE))
+		if (adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE)
 			ixgbe_cache_ring_fdir(adapter);
 		else
 			ixgbe_cache_ring_rss(adapter);
@@ -4882,14 +4874,12 @@ static int ixgbe_set_interrupt_capability(struct ixgbe_adapter *adapter)
 
 	adapter->flags &= ~IXGBE_FLAG_DCB_ENABLED;
 	adapter->flags &= ~IXGBE_FLAG_RSS_ENABLED;
-	if (adapter->flags & (IXGBE_FLAG_FDIR_HASH_CAPABLE |
-			      IXGBE_FLAG_FDIR_PERFECT_CAPABLE)) {
+	if (adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE) {
 		e_err(probe,
-		      "Flow Director is not supported while multiple "
+		      "ATR is not supported while multiple "
 		      "queues are disabled.  Disabling Flow Director\n");
 	}
 	adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE;
-	adapter->flags &= ~IXGBE_FLAG_FDIR_PERFECT_CAPABLE;
 	adapter->atr_sample_rate = 0;
 	if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)
 		ixgbe_disable_sriov(adapter);
-- 
1.7.5.4


^ permalink raw reply related

* [RFC PATCH 1/1] BPF JIT for PPC64
From: Matt Evans @ 2011-06-24  6:02 UTC (permalink / raw)
  To: netdev, linuxppc-dev


 arch/powerpc/Kconfig                  |    1 +
 arch/powerpc/Makefile                 |    3 +-
 arch/powerpc/include/asm/ppc-opcode.h |   40 ++
 arch/powerpc/net/Makefile             |    4 +
 arch/powerpc/net/bpf_jit.S            |  138 +++++++
 arch/powerpc/net/bpf_jit.h            |  226 +++++++++++
 arch/powerpc/net/bpf_jit_comp.c       |  697 +++++++++++++++++++++++++++++++++
 7 files changed, 1108 insertions(+), 1 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 2729c66..39860fc 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -134,6 +134,7 @@ config PPC
 	select GENERIC_IRQ_SHOW_LEVEL
 	select HAVE_RCU_TABLE_FREE if SMP
 	select HAVE_SYSCALL_TRACEPOINTS
+	select HAVE_BPF_JIT if PPC64
 
 config EARLY_PRINTK
 	bool
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index b7212b6..b94740f 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -154,7 +154,8 @@ core-y				+= arch/powerpc/kernel/ \
 				   arch/powerpc/lib/ \
 				   arch/powerpc/sysdev/ \
 				   arch/powerpc/platforms/ \
-				   arch/powerpc/math-emu/
+				   arch/powerpc/math-emu/ \
+				   arch/powerpc/net/
 core-$(CONFIG_XMON)		+= arch/powerpc/xmon/
 core-$(CONFIG_KVM) 		+= arch/powerpc/kvm/
 
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index e472659..e980faa 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -71,6 +71,42 @@
 #define PPC_INST_ERATSX			0x7c000126
 #define PPC_INST_ERATSX_DOT		0x7c000127
 
+/* Misc instructions for BPF compiler */
+#define PPC_INST_LD			0xe8000000
+#define PPC_INST_LHZ			0xa0000000
+#define PPC_INST_LWZ			0x80000000
+#define PPC_INST_STD			0xf8000000
+#define PPC_INST_STDU			0xf8000001
+#define PPC_INST_MFLR			0x7c0802a6
+#define PPC_INST_MTLR			0x7c0803a6
+#define PPC_INST_CMPWI			0x2c000000
+#define PPC_INST_CMPDI			0x2c200000
+#define PPC_INST_CMPLW			0x7c000040
+#define PPC_INST_CMPLWI			0x28000000
+#define PPC_INST_ADDI			0x38000000
+#define PPC_INST_ADDIS			0x3c000000
+#define PPC_INST_ADD			0x7c000214
+#define PPC_INST_SUB			0x7c000050
+#define PPC_INST_BLR			0x4e800020
+#define PPC_INST_BLRL			0x4e800021
+#define PPC_INST_MULLW			0x7c0001d6
+#define PPC_INST_MULHWU			0x7c000016
+#define PPC_INST_MULLI			0x1c000000
+#define PPC_INST_DIVWU			0x7c0003d6
+#define PPC_INST_RLWINM			0x54000000
+#define PPC_INST_RLDICR			0x78000004
+#define PPC_INST_SLW			0x7c000030
+#define PPC_INST_SRW			0x7c000430
+#define PPC_INST_AND			0x7c000038
+#define PPC_INST_ANDDOT			0x7c000039
+#define PPC_INST_OR			0x7c000378
+#define PPC_INST_ANDI			0x70000000
+#define PPC_INST_ORI			0x60000000
+#define PPC_INST_ORIS			0x64000000
+#define PPC_INST_NEG			0x7c0000d0
+#define PPC_INST_BRANCH			0x48000000
+#define PPC_INST_BRANCH_COND		0x40800000
+
 /* macros to insert fields into opcodes */
 #define __PPC_RA(a)	(((a) & 0x1f) << 16)
 #define __PPC_RB(b)	(((b) & 0x1f) << 11)
@@ -83,6 +119,10 @@
 #define __PPC_T_TLB(t)	(((t) & 0x3) << 21)
 #define __PPC_WC(w)	(((w) & 0x3) << 21)
 #define __PPC_WS(w)	(((w) & 0x1f) << 11)
+#define __PPC_SH(s)	__PPC_WS(s)
+#define __PPC_MB(s)	(((s) & 0x1f) << 6)
+#define __PPC_ME(s)	(((s) & 0x1f) << 1)
+#define __PPC_BI(s)	(((s) & 0x1f) << 16)
 
 /*
  * Only use the larx hint bit on 64bit CPUs. e500v1/v2 based CPUs will treat a
diff --git a/arch/powerpc/net/Makefile b/arch/powerpc/net/Makefile
new file mode 100644
index 0000000..90568c3
--- /dev/null
+++ b/arch/powerpc/net/Makefile
@@ -0,0 +1,4 @@
+#
+# Arch-specific network modules
+#
+obj-$(CONFIG_BPF_JIT) += bpf_jit.o bpf_jit_comp.o
diff --git a/arch/powerpc/net/bpf_jit.S b/arch/powerpc/net/bpf_jit.S
new file mode 100644
index 0000000..ce2225e
--- /dev/null
+++ b/arch/powerpc/net/bpf_jit.S
@@ -0,0 +1,138 @@
+/* bpf_jit.S: Packet/header access helper functions
+ * for PPC64 BPF compiler.
+ *
+ * Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <asm/ppc_asm.h>
+#include "bpf_jit.h"
+
+/*
+ * All of these routines are called directly from generated code,
+ * whose register usage is:
+ *
+ * r3		skb
+ * r4,r5	A,X
+ * r6		*** address parameter to helper ***
+ * r7-r10	scratch
+ * r14		skb->data
+ * r15		skb headlen
+ * r16-31	M[]
+ */
+
+/*
+ * To consider:	 These helpers are so small it may be better to just
+ * generate them inline.  They're out here for debug at the moment,
+ * but inline code can do the simple headlen check then branch
+ * directly to slow_path_XXX if required.  (In fact, could load a spare
+ * GPR with slow_path_generic and pass size as an argument.)
+ *
+ * Technically, the "is addr < 0" check is unnecessary & slowing down
+ * the ABS path, as it's statically checked on generation.
+ */
+	.globl	sk_load_word
+sk_load_word:
+	cmpdi	r_addr, 0
+	blt	bpf_error
+	/* Are we accessing past headlen? */
+	subi	r_scratch1, r_HL, 4
+	cmpd	r_scratch1, r_addr
+	blt	bpf_slow_path_word
+	/* Nope, just hitting the header.  cr0 here is eq or gt! */
+	lwzx	r_A, r_D, r_addr
+	/* When big endian we don't need to byteswap. */
+	blr	/* Return success, cr0 != LT */
+
+	.globl	sk_load_half
+sk_load_half:
+	cmpdi	r_addr, 0
+	blt	bpf_error
+	subi	r_scratch1, r_HL, 2
+	cmpd	r_scratch1, r_addr
+	blt	bpf_slow_path_half
+	lhzx	r_A, r_D, r_addr
+	blr
+
+	.globl	sk_load_byte
+sk_load_byte:
+	cmpdi	r_addr, 0
+	blt	bpf_error
+	cmpd	r_HL, r_addr
+	ble	bpf_slow_path_byte
+	lbzx	r_A, r_D, r_addr
+	blr
+
+/*
+ * BPF_S_LDX_B_MSH: ldxb  4*([offset]&0xf)
+ * r_addr is the offset value, already known positive
+ */
+	.globl sk_load_byte_msh
+sk_load_byte_msh:
+	cmpd	r_HL, r_addr
+	ble	bpf_slow_path_byte_msh
+	lbzx	r_X, r_D, r_addr
+	rlwinm	r_X, r_X, 2, 32-4-2, 31-2
+	blr
+
+bpf_error:
+	/* Entered with cr0 = lt */
+	li	r3, 0
+	/* Generated code will 'blt epilogue', returning 0. */
+	blr
+
+/* Call out to skb_copy_bits:
+ * We'll need to back up our volatile regs first; we have
+ * local variable space at r1+(BPF_PPC_STACK_BASIC).
+ * Allocate a new stack frame here to remain ABI-compliant in
+ * stashing LR.
+ */
+#define bpf_slow_path_common(SIZE)				\
+	mflr	r0;						\
+	std	r0, 16(r1);					\
+	/* R3 goes in parameter space of caller's frame */	\
+	std	r_skb, (BPF_PPC_STACKFRAME+48)(r1);		\
+	std	r_A, (BPF_PPC_STACK_BASIC+(0*8))(r1);		\
+	std	r_X, (BPF_PPC_STACK_BASIC+(1*8))(r1);		\
+	stdu	r1, -128(r1);					\
+	/* R3 = r_skb, as passed */				\
+	mr	r4, r_addr;					\
+	addi	r5, r1, 128+BPF_PPC_STACK_BASIC+(2*8);		\
+	li	r6, SIZE;					\
+	bl	skb_copy_bits;					\
+	/* R3 = 0 on success */					\
+	addi	r1, r1, 128;					\
+	ld	r0, 16(r1);					\
+	ld	r_A, (BPF_PPC_STACK_BASIC+(0*8))(r1);		\
+	ld	r_X, (BPF_PPC_STACK_BASIC+(1*8))(r1);		\
+	mtlr	r0;						\
+	cmpdi	r3, 0;						\
+	blt	bpf_error;	/* cr0 = LT */			\
+	ld	r_skb, (BPF_PPC_STACKFRAME+48)(r1);		\
+	/* Great success! */
+
+bpf_slow_path_word:
+	bpf_slow_path_common(4)
+	/* Data value is on stack, and cr0 != LT */
+	lwz	r_A, BPF_PPC_STACK_BASIC+(2*8)(r1)
+	blr
+
+bpf_slow_path_half:
+	bpf_slow_path_common(2)
+	lhz	r_A, BPF_PPC_STACK_BASIC+(2*8)(r1)
+	blr
+
+bpf_slow_path_byte:
+	bpf_slow_path_common(1)
+	lbz	r_A, BPF_PPC_STACK_BASIC+(2*8)(r1)
+	blr
+
+bpf_slow_path_byte_msh:
+	bpf_slow_path_common(1)
+	lbz	r_X, BPF_PPC_STACK_BASIC+(2*8)(r1)
+	rlwinm	r_X, r_X, 2, 32-4-2, 31-2
+	blr
diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
new file mode 100644
index 0000000..1fe54b9
--- /dev/null
+++ b/arch/powerpc/net/bpf_jit.h
@@ -0,0 +1,226 @@
+/* bpf_jit.h: BPF JIT compiler for PPC64
+ *
+ * Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+#ifndef _BPF_JIT_H
+#define _BPF_JIT_H
+
+#define BPF_PPC_STACK_LOCALS	32
+#define BPF_PPC_STACK_BASIC	112
+#define BPF_PPC_STACK_SAVE	128
+#define BPF_PPC_STACKFRAME	(BPF_PPC_STACK_BASIC+BPF_PPC_STACK_LOCALS+ \
+				 BPF_PPC_STACK_SAVE)
+
+/*
+ * Generated code register usage:
+ *
+ * As normal PPC C ABI (e.g. r1=sp, r2=TOC), with:
+ *
+ * skb		r3	(Entry parameter)
+ * A register	r4
+ * X register	r5
+ * addr param	r6
+ * r7-r10	scratch
+ * skb->data	r14
+ * skb headlen	r15	(skb->len - skb->data_len)
+ * m[0]		r16
+ * m[...]	...
+ * m[15]	r31
+ */
+#define r_skb		3
+#define r_ret		3
+#define r_A		4
+#define r_X		5
+#define r_addr		6
+#define r_scratch1	7
+#define r_D		14
+#define r_HL		15
+#define r_M		16
+
+#ifndef __ASSEMBLY__
+
+/*
+ * Assembly helpers from arch/powerpc/net/bpf_jit.S:
+ */
+extern u8 sk_load_word[], sk_load_half[], sk_load_byte[], sk_load_byte_msh[];
+
+#define FUNCTION_DESCR_SIZE	24
+
+/*
+ * 16-bit immediate helper macros: HA() is for use with sign-extending instrs
+ * (e.g. LD, ADDI).  If the bottom 16 bits is "-ve", add another bit into the
+ * top half to negate the effect (i.e. 0xffff + 1 = 0x(1)0000).
+ */
+#define IMM_H(i)		((uintptr_t)(i)>>16)
+#define IMM_HA(i)		(((uintptr_t)(i)>>16) +			      \
+				 (((uintptr_t)(i) & 0x8000) >> 15))
+#define IMM_L(i)		((uintptr_t)(i) & 0xffff)
+
+#define PLANT_INSTR(d, idx, instr)					      \
+	do { if (d) { (d)[idx] = instr; } idx++; } while (0)
+#define EMIT(instr)		PLANT_INSTR(image, ctx->idx, instr)
+
+#define PPC_NOP()		EMIT(PPC_INST_NOP)
+#define PPC_BLR()		EMIT(PPC_INST_BLR)
+#define PPC_BLRL()		EMIT(PPC_INST_BLRL)
+#define PPC_MTLR(r)		EMIT(PPC_INST_MTLR | __PPC_RT(r))
+#define PPC_ADDI(d, a, i)	EMIT(PPC_INST_ADDI | __PPC_RT(d) |	      \
+				     __PPC_RA(a) | IMM_L(i))
+#define PPC_MR(d, a)		PPC_OR(d, a, a)
+#define PPC_LI(r, i)		PPC_ADDI(r, 0, i)
+#define PPC_ADDIS(d, a, i)	EMIT(PPC_INST_ADDIS |			      \
+				     __PPC_RS(d) | __PPC_RA(a) | IMM_L(i))
+#define PPC_LIS(r, i)		PPC_ADDIS(r, 0, i)
+#define PPC_STD(r, base, i)	EMIT(PPC_INST_STD | __PPC_RS(r) |	      \
+				     __PPC_RA(base) | ((i) & 0xfffc))
+
+#define PPC_LD(r, base, i)	EMIT(PPC_INST_LD | __PPC_RT(r) |	      \
+				     __PPC_RA(base) | IMM_L(i))
+#define PPC_LWZ(r, base, i)	EMIT(PPC_INST_LWZ | __PPC_RT(r) |	      \
+				     __PPC_RA(base) | IMM_L(i))
+#define PPC_LHZ(r, base, i)	EMIT(PPC_INST_LHZ | __PPC_RT(r) |	      \
+				     __PPC_RA(base) | IMM_L(i))
+/* Convenience helpers for the above with 'far' offsets: */
+#define PPC_LD_OFFS(r, base, i) do { if ((i) < 32768) PPC_LD(r, base, i);     \
+		else {	PPC_ADDIS(r, base, IMM_HA(i));			      \
+			PPC_LD(r, r, IMM_L(i)); } } while(0)
+
+#define PPC_LWZ_OFFS(r, base, i) do { if ((i) < 32768) PPC_LWZ(r, base, i);   \
+		else {	PPC_ADDIS(r, base, IMM_HA(i));			      \
+			PPC_LWZ(r, r, IMM_L(i)); } } while(0)
+
+#define PPC_LHZ_OFFS(r, base, i) do { if ((i) < 32768) PPC_LHZ(r, base, i);   \
+		else {	PPC_ADDIS(r, base, IMM_HA(i));			      \
+			PPC_LHZ(r, r, IMM_L(i)); } } while(0)
+
+#define PPC_CMPWI(a, i)		EMIT(PPC_INST_CMPWI | __PPC_RA(a) | IMM_L(i))
+#define PPC_CMPDI(a, i)		EMIT(PPC_INST_CMPDI | __PPC_RA(a) | IMM_L(i))
+#define PPC_CMPLWI(a, i)	EMIT(PPC_INST_CMPLWI | __PPC_RA(a) | IMM_L(i))
+#define PPC_CMPLW(a, b)		EMIT(PPC_INST_CMPLW | __PPC_RA(a) | __PPC_RB(b))
+
+#define PPC_SUB(d, a, b)	EMIT(PPC_INST_SUB | __PPC_RT(d) |	      \
+				     __PPC_RB(a) | __PPC_RA(b))
+#define PPC_ADD(d, a, b)	EMIT(PPC_INST_ADD | __PPC_RT(d) |	      \
+				     __PPC_RA(a) | __PPC_RB(b))
+#define PPC_MUL(d, a, b)	EMIT(PPC_INST_MULLW | __PPC_RT(d) |	      \
+				     __PPC_RA(a) | __PPC_RB(b))
+#define PPC_MULHWU(d, a, b)	EMIT(PPC_INST_MULHWU | __PPC_RT(d) |	      \
+				     __PPC_RA(a) | __PPC_RB(b))
+#define PPC_MULI(d, a, i)	EMIT(PPC_INST_MULLI | __PPC_RT(d) |	      \
+				     __PPC_RA(a) | IMM_L(i))
+#define PPC_DIVWU(d, a, b)	EMIT(PPC_INST_DIVWU | __PPC_RT(d) |	      \
+				     __PPC_RA(a) | __PPC_RB(b))
+#define PPC_AND(d, a, b)	EMIT(PPC_INST_AND | __PPC_RA(d) |	      \
+				     __PPC_RS(a) | __PPC_RB(b))
+#define PPC_ANDI(d, a, i)	EMIT(PPC_INST_ANDI | __PPC_RA(d) |	      \
+				     __PPC_RS(a) | IMM_L(i))
+#define PPC_AND_DOT(d, a, b)	EMIT(PPC_INST_ANDDOT | __PPC_RA(d) |	      \
+				     __PPC_RS(a) | __PPC_RB(b))
+#define PPC_OR(d, a, b)		EMIT(PPC_INST_OR | __PPC_RA(d) |	      \
+				     __PPC_RS(a) | __PPC_RB(b))
+#define PPC_ORI(d, a, i)	EMIT(PPC_INST_ORI | __PPC_RA(d) |	      \
+				     __PPC_RS(a) | IMM_L(i))
+#define PPC_ORIS(d, a, i)	EMIT(PPC_INST_ORIS | __PPC_RA(d) |	      \
+				     __PPC_RS(a) | IMM_L(i))
+#define PPC_SLW(d, a, s)	EMIT(PPC_INST_SLW | __PPC_RA(d) |	      \
+				     __PPC_RS(a) | __PPC_RB(s))
+#define PPC_SRW(d, a, s)	EMIT(PPC_INST_SRW | __PPC_RA(d) |	      \
+				     __PPC_RS(a) | __PPC_RB(s))
+/* slwi = rlwinm Rx, Ry, n, 0, 31-n */
+#define PPC_SLWI(d, a, i)	EMIT(PPC_INST_RLWINM | __PPC_RA(d) |	      \
+				     __PPC_RS(a) | __PPC_SH(i) |	      \
+				     __PPC_MB(0) | __PPC_ME(31-(i)))
+/* srwi = rlwinm Rx, Ry, 32-n, n, 31 */
+#define PPC_SRWI(d, a, i)	EMIT(PPC_INST_RLWINM | __PPC_RA(d) |	      \
+				     __PPC_RS(a) | __PPC_SH(32-(i)) |	      \
+				     __PPC_MB(i) | __PPC_ME(31))
+/* sldi = rldicr Rx, Ry, n, 63-n */
+#define PPC_SLDI(d, a, i)	EMIT(PPC_INST_RLDICR | __PPC_RA(d) |	      \
+				     __PPC_RS(a) | __PPC_SH(i) |	      \
+				     __PPC_MB(63-(i)) | (((i) & 0x20) >> 4))
+#define PPC_NEG(d, a)		EMIT(PPC_INST_NEG | __PPC_RT(d) | __PPC_RA(a))
+
+/* Long jump; (unconditional 'branch') */
+#define PPC_JMP(dest)		EMIT(PPC_INST_BRANCH |			      \
+				     (((dest) - (ctx->idx * 4)) & 0x03fffffc))
+/* "cond" here covers BO:BI fields. */
+#define PPC_BCC_SHORT(cond, dest)	EMIT(PPC_INST_BRANCH_COND |	      \
+					     (((cond) & 0x3ff) << 16) |	      \
+					     (((dest) - (ctx->idx * 4)) &     \
+					      0xfffc))
+#define PPC_LI32(d, i)		do { PPC_LI(d, IMM_L(i));		      \
+		if ((u32)(uintptr_t)(i) >= 32768) {			      \
+			PPC_ADDIS(d, d, IMM_HA(i));			      \
+		} } while(0)
+#define PPC_LI64(d, i)		do {					      \
+		if (!((uintptr_t)(i) & 0xffffffff00000000ULL))		      \
+			PPC_LI32(d, i);					      \
+		else {							      \
+			PPC_LIS(d, ((uintptr_t)(i) >> 48));		      \
+			if ((uintptr_t)(i) & 0x0000ffff00000000ULL)	      \
+				PPC_ORI(d, d,				      \
+					((uintptr_t)(i) >> 32) & 0xffff);     \
+			PPC_SLDI(d, d, 32);				      \
+			if ((uintptr_t)(i) & 0x00000000ffff0000ULL)	      \
+				PPC_ORIS(d, d,				      \
+					 ((uintptr_t)(i) >> 16) & 0xffff);    \
+			if ((uintptr_t)(i) & 0x000000000000ffffULL)	      \
+				PPC_ORI(d, d, (uintptr_t)(i) & 0xffff);	      \
+		} } while (0);
+
+static inline bool is_nearbranch(int offset)
+{
+	return (offset < 32768) && (offset >= -32768);
+}
+
+/*
+ * The fly in the ointment of code size changing from pass to pass is
+ * avoided by padding the short branch case with a NOP.	 If code size differs
+ * with different branch reaches we will have the issue of code moving from
+ * one pass to the next and will need a few passes to converge on a stable
+ * state.
+ */
+#define PPC_BCC(cond, dest)	do {					      \
+		if (is_nearbranch((dest) - (ctx->idx * 4))) {		      \
+			PPC_BCC_SHORT(cond, dest);			      \
+			PPC_NOP();					      \
+		} else {						      \
+			/* Flip the 'T or F' bit to invert comparison */      \
+			PPC_BCC_SHORT(cond ^ COND_CMP_TRUE, (ctx->idx+2)*4);  \
+			PPC_JMP(dest);					      \
+		} } while(0)
+
+/* To create a branch condition, select a bit of cr0... */
+#define CR0_LT		0
+#define CR0_GT		1
+#define CR0_EQ		2
+/* ...and modify BO[3] */
+#define COND_CMP_TRUE	0x100
+#define COND_CMP_FALSE	0x000
+/* Together, they make all required comparisons: */
+#define COND_GT		(CR0_GT | COND_CMP_TRUE)
+#define COND_GE		(CR0_LT | COND_CMP_FALSE)
+#define COND_EQ		(CR0_EQ | COND_CMP_TRUE)
+#define COND_NE		(CR0_EQ | COND_CMP_FALSE)
+#define COND_LT		(CR0_LT | COND_CMP_TRUE)
+
+#define SEEN_DATAREF 0x10000 /* might call external helpers */
+#define SEEN_XREG    0x20000 /* X reg is used */
+#define SEEN_MEM     0x40000 /* SEEN_MEM+(1<<n) = use mem[n] for temporary
+			      * storage */
+#define SEEN_MEM_MSK 0x0ffff
+
+struct codegen_context {
+	unsigned int seen;
+	unsigned int idx;
+	int pc_ret0; /* bpf index of first RET #0 instruction (if any) */
+};
+
+#endif
+
+#endif
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
new file mode 100644
index 0000000..8a76350
--- /dev/null
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -0,0 +1,697 @@
+/* bpf_jit_comp.c: BPF JIT compiler for PPC64
+ *
+ * Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation
+ *
+ * Based on the x86 BPF compiler, by Eric Dumazet (eric.dumazet@gmail.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+#include <linux/moduleloader.h>
+#include <asm/cacheflush.h>
+#include <linux/netdevice.h>
+#include <linux/filter.h>
+#include "bpf_jit.h"
+
+#ifndef __BIG_ENDIAN
+/* There are endianness assumptions herein. */
+#error "Little-endian PPC not supported in BPF compiler"
+#endif
+
+int bpf_jit_enable __read_mostly;
+
+
+static inline void bpf_flush_icache(void *start, void *end)
+{
+	smp_wmb();
+	flush_icache_range((unsigned long)start, (unsigned long)end);
+}
+
+static void bpf_jit_build_prologue(struct sk_filter *fp, u32 *image,
+				   struct codegen_context *ctx)
+{
+	int i;
+	const struct sock_filter *filter = fp->insns;
+
+	if (ctx->seen & (SEEN_MEM | SEEN_DATAREF)) {
+		/* Make stackframe */
+		if (ctx->seen & SEEN_DATAREF) {
+			/* If we call any helpers (for loads), save LR */
+			EMIT(PPC_INST_MFLR | __PPC_RT(0));
+			PPC_STD(0, 1, 16);
+
+			/* Back up non-volatile regs. */
+			PPC_STD(r_D, 1, -(8*(32-r_D)));
+			PPC_STD(r_HL, 1, -(8*(32-r_HL)));
+		}
+		if (ctx->seen & SEEN_MEM) {
+			/*
+			 * Conditionally save regs r15-r31 as some will be used
+			 * for M[] data.
+			 */
+			for (i = 0; i < 16; i++) {
+				if (ctx->seen & (1 << i))
+					PPC_STD(r_M + i, 1, -128 + (8*i));
+			}
+		}
+		EMIT(PPC_INST_STDU | __PPC_RS(1) | __PPC_RA(1) |
+		     (-BPF_PPC_STACKFRAME & 0xfffc));
+	}
+
+	if (ctx->seen & SEEN_DATAREF) {
+		/*
+		 * If this filter needs to access skb data,
+		 * prepare r_D and r_HL:
+		 *  r_HL = skb->len - skb->data_len
+		 *  r_D	 = skb->data
+		 */
+		PPC_LWZ_OFFS(r_scratch1, r_skb, offsetof(struct sk_buff,
+							 data_len));
+		PPC_LWZ_OFFS(r_HL, r_skb, offsetof(struct sk_buff, len));
+		PPC_SUB(r_HL, r_HL, r_scratch1);
+		PPC_LD_OFFS(r_D, r_skb, offsetof(struct sk_buff, data));
+	}
+
+	if (ctx->seen & SEEN_XREG) {
+		/*
+		 * TODO: Could also detect whether first instr. sets X and
+		 * avoid this (as below, with A).
+		 */
+		PPC_LI(r_X, 0);
+	}
+
+	switch (filter[0].code) {
+	case BPF_S_RET_K:
+	case BPF_S_LD_W_LEN:
+	case BPF_S_ANC_PROTOCOL:
+	case BPF_S_ANC_IFINDEX:
+	case BPF_S_ANC_MARK:
+	case BPF_S_ANC_RXHASH:
+	case BPF_S_ANC_CPU:
+	case BPF_S_ANC_QUEUE:
+	case BPF_S_LD_W_ABS:
+	case BPF_S_LD_H_ABS:
+	case BPF_S_LD_B_ABS:
+		/* first instruction sets A register (or is RET 'constant') */
+		break;
+	default:
+		/* make sure we dont leak kernel information to user */
+		PPC_LI(r_A, 0);
+	}
+}
+
+static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
+{
+	int i;
+
+	if (ctx->seen & (SEEN_MEM | SEEN_DATAREF)) {
+		PPC_ADDI(1, 1, BPF_PPC_STACKFRAME);
+		if (ctx->seen & SEEN_DATAREF) {
+			PPC_LD(0, 1, 16);
+			PPC_MTLR(0);
+			PPC_LD(r_D, 1, -(8*(32-r_D)));
+			PPC_LD(r_HL, 1, -(8*(32-r_HL)));
+		}
+		if (ctx->seen & SEEN_MEM) {
+			/* Restore any saved non-vol registers */
+			for (i = 0; i < 16; i++) {
+				if (ctx->seen & (1 << i))
+					PPC_LD(r_M + i, 1, -128 + (8*i));
+			}
+		}
+	}
+	/* The RETs have left a return value in R3. */
+
+	PPC_BLR();
+}
+
+/* Assemble the body code between the prologue & epilogue. */
+static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
+			      struct codegen_context *ctx,
+			      unsigned int *addrs)
+{
+	const struct sock_filter *filter = fp->insns;
+	int flen = fp->len;
+	u8 *func;
+	unsigned int true_cond;
+	int i;
+
+	/* Start of epilogue code */
+	unsigned int exit_addr = addrs[flen];
+
+	for (i = 0; i < flen; i++) {
+		unsigned int K = filter[i].k;
+
+		/*
+		 * addrs[] maps a BPF bytecode address into a real offset from
+		 * the start of the body code.
+		 */
+		addrs[i] = ctx->idx * 4;
+
+		switch (filter[i].code) {
+			/*** ALU ops ***/
+		case BPF_S_ALU_ADD_X: /* A += X; */
+			ctx->seen |= SEEN_XREG;
+			PPC_ADD(r_A, r_A, r_X);
+			break;
+		case BPF_S_ALU_ADD_K: /* A += K; */
+			if (!K)
+				break;
+			if (K < 32768)
+				PPC_ADDI(r_A, r_A, K);
+			else
+				PPC_ADDI(r_A, r_A, IMM_L(K));
+				PPC_ADDIS(r_A, r_A, IMM_HA(K));
+			break;
+		case BPF_S_ALU_SUB_X: /* A -= X; */
+			ctx->seen |= SEEN_XREG;
+			PPC_SUB(r_A, r_A, r_X);
+			break;
+		case BPF_S_ALU_SUB_K: /* A -= K */
+			if (!K)
+				break;
+			if (K < 32768)
+				PPC_ADDI(r_A, r_A, -K);
+			else
+				PPC_ADDI(r_A, r_A, IMM_L(-K));
+				PPC_ADDIS(r_A, r_A, IMM_HA(-K));
+			break;
+		case BPF_S_ALU_MUL_X: /* A *= X; */
+			ctx->seen |= SEEN_XREG;
+			PPC_MUL(r_A, r_A, r_X);
+			break;
+		case BPF_S_ALU_MUL_K: /* A *= K */
+			if (K < 32768)
+				PPC_MULI(r_A, r_A, K);
+			else {
+				PPC_LI32(r_scratch1, K);
+				PPC_MUL(r_A, r_A, r_scratch1);
+			}
+			break;
+		case BPF_S_ALU_DIV_X: /* A /= X; */
+			ctx->seen |= SEEN_XREG;
+			PPC_CMPWI(r_X, 0);
+			if (ctx->pc_ret0 != -1) {
+				PPC_BCC(COND_EQ, addrs[ctx->pc_ret0]);
+			} else {
+				/*
+				 * Exit, returning 0; first pass hits here
+				 * (longer worst-case code size).
+				 */
+				PPC_BCC_SHORT(COND_NE, (ctx->idx*4)+12);
+				PPC_LI(r_ret, 0);
+				PPC_JMP(exit_addr);
+			}
+			PPC_DIVWU(r_A, r_A, r_X);
+			break;
+		case BPF_S_ALU_DIV_K: /* A = reciprocal_divide(A, K); */
+			PPC_LI32(r_scratch1, K);
+			/* Top 32 bits of 64bit result -> A */
+			PPC_MULHWU(r_A, r_A, r_scratch1);
+			break;
+		case BPF_S_ALU_AND_X:
+			ctx->seen |= SEEN_XREG;
+			PPC_AND(r_A, r_A, r_X);
+			break;
+		case BPF_S_ALU_AND_K:
+			if (!IMM_H(K))
+				PPC_ANDI(r_A, r_A, K);
+			else {
+				PPC_LI32(r_scratch1, K);
+				PPC_AND(r_A, r_A, r_scratch1);
+			}
+			break;
+		case BPF_S_ALU_OR_X:
+			ctx->seen |= SEEN_XREG;
+			PPC_OR(r_A, r_A, r_X);
+			break;
+		case BPF_S_ALU_OR_K:
+			if (IMM_L(K))
+				PPC_ORI(r_A, r_A, IMM_L(K));
+			if (K >= 65536)
+				PPC_ORIS(r_A, r_A, IMM_H(K));
+			break;
+		case BPF_S_ALU_LSH_X: /* A <<= X; */
+			ctx->seen |= SEEN_XREG;
+			PPC_SLW(r_A, r_A, r_X);
+			break;
+		case BPF_S_ALU_LSH_K:
+			if (K == 0)
+				break;
+			else
+				PPC_SLWI(r_A, r_A, K);
+			break;
+		case BPF_S_ALU_RSH_X: /* A >>= X; */
+			ctx->seen |= SEEN_XREG;
+			PPC_SRW(r_A, r_A, r_X);
+			break;
+		case BPF_S_ALU_RSH_K: /* A >>= K; */
+			if (K == 0)
+				break;
+			else
+				PPC_SRWI(r_A, r_A, K);
+			break;
+		case BPF_S_ALU_NEG:
+			PPC_NEG(r_A, r_A);
+			break;
+		case BPF_S_RET_K:
+			PPC_LI32(r_ret, K);
+			if (!K) {
+				if (ctx->pc_ret0 == -1)
+					ctx->pc_ret0 = i;
+			}
+			/*
+			 * If this isn't the very last instruction, branch to
+			 * the epilogue if we've stuff to clean up.  Otherwise,
+			 * if there's nothing to tidy, just return.  If we /are/
+			 * the last instruction, we're about to fall through to
+			 * the epilogue to return.
+			 */
+			if (i != flen - 1) {
+				/*
+				 * Note: 'seen' is properly valid only on pass
+				 * #2.	Both parts of this conditional are the
+				 * same instruction size though, meaning the
+				 * first pass will still correctly determine the
+				 * code size/addresses.
+				 */
+				if (ctx->seen)
+					PPC_JMP(exit_addr);
+				else
+					PPC_BLR();
+			}
+			break;
+		case BPF_S_RET_A:
+			PPC_MR(r_ret, r_A);
+			if (i != flen - 1) {
+				if (ctx->seen)
+					PPC_JMP(exit_addr);
+				else
+					PPC_BLR();
+			}
+			break;
+		case BPF_S_MISC_TAX: /* X = A */
+			ctx->seen |= SEEN_XREG;
+			PPC_MR(r_X, r_A);
+			break;
+		case BPF_S_MISC_TXA: /* A = X */
+			ctx->seen |= SEEN_XREG;
+			PPC_MR(r_A, r_X);
+			break;
+
+			/*** Constant loads/M[] access ***/
+		case BPF_S_LD_IMM: /* A = K */
+			PPC_LI32(r_A, K);
+			break;
+		case BPF_S_LDX_IMM: /* X = K */
+			ctx->seen |= SEEN_XREG;
+			PPC_LI32(r_X, K);
+			break;
+		case BPF_S_LD_MEM: /* A = mem[K] */
+			PPC_MR(r_A, r_M + (K & 0xf));
+			ctx->seen |= SEEN_MEM | (1<<(K & 0xf));
+			break;
+		case BPF_S_LDX_MEM: /* X = mem[K] */
+			PPC_MR(r_X, r_M + (K & 0xf));
+			ctx->seen |= SEEN_XREG | SEEN_MEM | (1<<(K & 0xf));
+			break;
+		case BPF_S_ST: /* mem[K] = A */
+			PPC_MR(r_M + (K & 0xf), r_A);
+			ctx->seen |= SEEN_MEM | (1<<(K & 0xf));
+			break;
+		case BPF_S_STX: /* mem[K] = X */
+			PPC_MR(r_M + (K & 0xf), r_X);
+			ctx->seen |= SEEN_XREG | SEEN_MEM | (1<<(K & 0xf));
+			break;
+		case BPF_S_LD_W_LEN: /*	A = skb->len; */
+			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);
+			PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, len));
+			break;
+		case BPF_S_LDX_W_LEN: /* X = skb->len; */
+			ctx->seen |= SEEN_XREG;
+			PPC_LWZ_OFFS(r_X, r_skb, offsetof(struct sk_buff, len));
+			break;
+
+			/*** Ancillary info loads ***/
+
+			/* None of the BPF_S_ANC* codes appear to be passed by
+			 * sk_chk_filter().  The interpreter and the x86 BPF
+			 * compiler implement them so we do too -- they may be
+			 * planted in future.
+			 */
+		case BPF_S_ANC_PROTOCOL: /* A = ntohs(skb->protocol); */
+			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
+						  protocol) != 2);
+			PPC_LHZ_OFFS(r_A, r_skb, offsetof(struct sk_buff,
+							  protocol));
+			/* ntohs is a NOP with BE loads. */
+			break;
+		case BPF_S_ANC_IFINDEX:
+			PPC_LD_OFFS(r_scratch1, r_skb, offsetof(struct sk_buff,
+								dev));
+			PPC_CMPDI(r_scratch1, 0);
+			if (ctx->pc_ret0 != -1) {
+				PPC_BCC(COND_EQ, addrs[ctx->pc_ret0]);
+			} else {
+				/* Exit, returning 0; first pass hits here. */
+				PPC_BCC_SHORT(COND_NE, (ctx->idx*4)+12);
+				PPC_LI(r_ret, 0);
+				PPC_JMP(exit_addr);
+			}
+			BUILD_BUG_ON(FIELD_SIZEOF(struct net_device,
+						  ifindex) != 4);
+			PPC_LWZ_OFFS(r_A, r_scratch1,
+				     offsetof(struct net_device, ifindex));
+			break;
+		case BPF_S_ANC_MARK:
+			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
+			PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff,
+							  mark));
+			break;
+		case BPF_S_ANC_RXHASH:
+			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, rxhash) != 4);
+			PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff,
+							  rxhash));
+			break;
+		case BPF_S_ANC_QUEUE:
+			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
+						  queue_mapping) != 2);
+			PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff,
+							  queue_mapping));
+			break;
+		case BPF_S_ANC_CPU:
+#ifdef CONFIG_SMP
+			/*
+			 * PACA ptr is r13:
+			 * raw_smp_processor_id() = local_paca->paca_index
+			 */
+			PPC_LHZ_OFFS(r_A, 13,
+				     offsetof(struct paca_struct, paca_index));
+#else
+			PPC_LI(r_A, 0);
+#endif
+			break;
+
+			/*** Absolute loads from packet header/data ***/
+		case BPF_S_LD_W_ABS:
+			func = sk_load_word;
+			goto common_load;
+		case BPF_S_LD_H_ABS:
+			func = sk_load_half;
+			goto common_load;
+		case BPF_S_LD_B_ABS:
+			func = sk_load_byte;
+		common_load:
+			/*
+			 * Load from [K].  Reference with the (negative)
+			 * SKF_NET_OFF/SKF_LL_OFF offsets is unsupported.
+			 */
+			ctx->seen |= SEEN_DATAREF;
+			if ((int)K < 0)
+				return -ENOTSUPP;
+			PPC_LI64(r_scratch1, func);
+			PPC_MTLR(r_scratch1);
+			PPC_LI32(r_addr, K);
+			PPC_BLRL();
+			/*
+			 * Helper returns 'lt' condition on error, and an
+			 * appropriate return value in r3
+			 */
+			PPC_BCC(COND_LT, exit_addr);
+			break;
+
+			/*** Indirect loads from packet header/data ***/
+		case BPF_S_LD_W_IND:
+			func = sk_load_word;
+			goto common_load_ind;
+		case BPF_S_LD_H_IND:
+			func = sk_load_half;
+			goto common_load_ind;
+		case BPF_S_LD_B_IND:
+			func = sk_load_byte;
+		common_load_ind:
+			/*
+			 * Load from [X + K].  Negative offsets are tested for
+			 * in the helper functions, and result in a 'ret 0'.
+			 */
+			ctx->seen |= SEEN_DATAREF | SEEN_XREG;
+			PPC_LI64(r_scratch1, func);
+			PPC_MTLR(r_scratch1);
+			PPC_ADDI(r_addr, r_X, IMM_L(K));
+			if (K >= 32768)
+				PPC_ADDIS(r_addr, r_addr, IMM_HA(K));
+			PPC_BLRL();
+			/* If error, cr0.LT set */
+			PPC_BCC(COND_LT, exit_addr);
+			break;
+
+		case BPF_S_LDX_B_MSH:
+			/*
+			 * x86 version drops packet (RET 0) when K<0, whereas
+			 * interpreter does allow K<0 (__load_pointer, special
+			 * ancillary data).
+			 */
+			ctx->seen |= SEEN_XREG;
+			func = sk_load_byte_msh;
+			goto common_load;
+			break;
+
+			/*** Jump and branches ***/
+		case BPF_S_JMP_JA:
+			PPC_JMP(addrs[i + K]);
+			break;
+
+		case BPF_S_JMP_JGT_K:
+		case BPF_S_JMP_JGT_X:
+			true_cond = COND_GT;
+			goto cond_branch;
+		case BPF_S_JMP_JGE_K:
+		case BPF_S_JMP_JGE_X:
+			true_cond = COND_GE;
+			goto cond_branch;
+		case BPF_S_JMP_JEQ_K:
+		case BPF_S_JMP_JEQ_X:
+			true_cond = COND_EQ;
+			goto cond_branch;
+		case BPF_S_JMP_JSET_K:
+		case BPF_S_JMP_JSET_X:
+			true_cond = COND_NE;
+			/* Fall through */
+		cond_branch:
+			/* same targets, can avoid doing the test :) */
+			if (filter[i].jt == filter[i].jf) {
+				if (filter[i].jt > 0)
+					PPC_JMP(addrs[i + 1 + filter[i].jt]);
+				break;
+			}
+
+			switch (filter[i].code) {
+			case BPF_S_JMP_JGT_X:
+			case BPF_S_JMP_JGE_X:
+			case BPF_S_JMP_JEQ_X:
+				ctx->seen |= SEEN_XREG;
+				PPC_CMPLW(r_A, r_X);
+				break;
+			case BPF_S_JMP_JSET_X:
+				ctx->seen |= SEEN_XREG;
+				PPC_AND_DOT(r_scratch1, r_A, r_X);
+				break;
+			case BPF_S_JMP_JEQ_K:
+			case BPF_S_JMP_JGT_K:
+			case BPF_S_JMP_JGE_K:
+				if (K < 32768)
+					PPC_CMPLWI(r_A, K);
+				else {
+					PPC_LI32(r_scratch1, K);
+					PPC_CMPLW(r_A, r_scratch1);
+				}
+				break;
+			case BPF_S_JMP_JSET_K:
+				if (K < 32768)
+					/* PPC_ANDI is /only/ dot-form */
+					PPC_ANDI(r_scratch1, r_A, K);
+				else {
+					PPC_LI32(r_scratch1, K);
+					PPC_AND_DOT(r_scratch1, r_A,
+						    r_scratch1);
+				}
+				break;
+			}
+			/* Sometimes branches are constructed "backward", with
+			 * the false path being the branch and true path being
+			 * a fallthrough to the next instruction.
+			 */
+			if (filter[i].jt == 0)
+				/* Swap the sense of the branch */
+				PPC_BCC(true_cond ^ COND_CMP_TRUE,
+					addrs[i + 1 + filter[i].jf]);
+			else {
+				PPC_BCC(true_cond, addrs[i + 1 + filter[i].jt]);
+				if (filter[i].jf != 0)
+					PPC_JMP(addrs[i + 1 + filter[i].jf]);
+			}
+			break;
+		default:
+			/* The filter contains something cruel & unusual.
+			 * We don't handle it, but also there shouldn't be
+			 * anything missing from our list.
+			 */
+			pr_err("BPF filter opcode %04x (@%d) unsupported\n",
+			       filter[i].code, i);
+			return -ENOTSUPP;
+		}
+
+	}
+	/* Set end-of-body-code address for exit. */
+	addrs[i] = ctx->idx * 4;
+
+	return 0;
+}
+
+void bpf_jit_compile(struct sk_filter *fp)
+{
+	unsigned int proglen;
+	unsigned int alloclen;
+	u32 *image = NULL;
+	u32 *code_base;
+	unsigned int *addrs;
+	struct codegen_context cgctx;
+	int pass;
+	int flen = fp->len;
+
+	if (!bpf_jit_enable)
+		return;
+
+	addrs = kzalloc((flen+1) * sizeof(*addrs), GFP_KERNEL);
+	if (addrs == NULL)
+		return;
+
+	/*
+	 * There are multiple assembly passes as the generated code will change
+	 * size as it settles down, figuring out the max branch offsets/exit
+	 * paths required.
+	 *
+	 * The range of standard conditional branches is +/- 32Kbytes.	Since
+	 * BPF_MAXINSNS = 4096, we can only jump from (worst case) start to
+	 * finish with 8 bytes/instruction.  Not feasible, so long jumps are
+	 * used, distinct from short branches.
+	 *
+	 * Current:
+	 *
+	 * For now, both branch types assemble to 2 words (short branches padded
+	 * with a NOP); this is inefficient, but assembly will always complete
+	 * after exactly 3 passes:
+	 *
+	 * First pass: No code buffer; Program is "faux-generated" -- no code
+	 * emitted but maximum size of output determined (and addrs[] filled
+	 * in).	 Also, we note whether we use M[], whether we use skb data, etc.
+	 * All generation choices assumed to be 'worst-case', e.g. branches all
+	 * far (2 instructions), return path code reduction not available, etc.
+	 *
+	 * Second pass: Code buffer allocated with size determined previously.
+	 * Prologue generated to support features we have seen used.  Exit paths
+	 * determined and addrs[] is filled in again, as code may be slightly
+	 * smaller as a result.
+	 *
+	 * Third pass: Code generated 'for real', and branch destinations
+	 * determined from now-accurate addrs[] map.
+	 *
+	 * Ideal:
+	 *
+	 * If we optimise this, near branches will be shorter.	On the
+	 * first assembly pass, we should err on the side of caution and
+	 * generate the biggest code.  On subsequent passes, branches will be
+	 * generated short or long and code size will reduce.  With smaller
+	 * code, more branches may fall into the short category, and code will
+	 * reduce more.
+	 *
+	 * Finally, if we see one pass generate code the same size as the
+	 * previous pass we have converged and should now generate code for
+	 * real.  Allocating at the end will also save the memory that would
+	 * otherwise be wasted by the (small) current code shrinkage.
+	 * Preferably, we should do a small number of passes (e.g. 5) and if we
+	 * haven't converged by then, get impatient and force code to generate
+	 * as-is, even if the odd branch would be left long.  The chances of a
+	 * long jump are tiny with all but the most enormous of BPF filter
+	 * inputs, so we should usually converge on the third pass.
+	 */
+
+	cgctx.idx = 0;
+	cgctx.seen = 0;
+	cgctx.pc_ret0 = -1;
+	/* Scouting faux-generate pass 0 */
+	if (bpf_jit_build_body(fp, 0, &cgctx, addrs))
+		/* We hit something illegal or unsupported. */
+		goto out;
+
+	/*
+	 * Pretend to build prologue, given the features we've seen.  This will
+	 * update ctgtx.idx as it pretends to output instructions, then we can
+	 * calculate total size from idx.
+	 */
+	bpf_jit_build_prologue(fp, 0, &cgctx);
+	bpf_jit_build_epilogue(0, &cgctx);
+
+	proglen = cgctx.idx * 4;
+	alloclen = proglen + FUNCTION_DESCR_SIZE;
+	image = module_alloc(max_t(unsigned int, alloclen,
+				   sizeof(struct work_struct)));
+	if (!image)
+		goto out;
+
+	code_base = image + (FUNCTION_DESCR_SIZE/4);
+
+	/* Code generation passes 1-2 */
+	for (pass = 1; pass < 3; pass++) {
+		/* Now build the prologue, body code & epilogue for real. */
+		cgctx.idx = 0;
+		bpf_jit_build_prologue(fp, code_base, &cgctx);
+		bpf_jit_build_body(fp, code_base, &cgctx, addrs);
+		bpf_jit_build_epilogue(code_base, &cgctx);
+
+		if (bpf_jit_enable > 1)
+			pr_info("Pass %d: shrink = %d, seen = 0x%x\n", pass,
+				proglen - (cgctx.idx * 4), cgctx.seen);
+	}
+
+	if (bpf_jit_enable > 1)
+		pr_info("flen=%d proglen=%u pass=%d image=%p\n",
+		       flen, proglen, pass, image);
+
+	if (image) {
+		if (bpf_jit_enable > 1)
+			print_hex_dump(KERN_ERR, "JIT code: ",
+				       DUMP_PREFIX_ADDRESS,
+				       16, 1, code_base,
+				       proglen, false);
+
+		bpf_flush_icache(code_base, code_base + (proglen/4));
+		/* Function descriptor nastiness: Address + TOC */
+		((u64 *)image)[0] = (u64)code_base;
+		((u64 *)image)[1] = local_paca->kernel_toc;
+		fp->bpf_func = (void *)image;
+	}
+out:
+	kfree(addrs);
+	return;
+}
+
+static void jit_free_defer(struct work_struct *arg)
+{
+	module_free(NULL, arg);
+}
+
+/* run from softirq, we must use a work_struct to call
+ * module_free() from process context
+ */
+void bpf_jit_free(struct sk_filter *fp)
+{
+	if (fp->bpf_func != sk_run_filter) {
+		struct work_struct *work = (struct work_struct *)fp->bpf_func;
+
+		INIT_WORK(work, jit_free_defer);
+		schedule_work(work);
+	}
+}

^ permalink raw reply related

* [RFC PATCH 0/1] BPF JIT for PPC64
From: Matt Evans @ 2011-06-24  6:02 UTC (permalink / raw)
  To: netdev, linuxppc-dev

Hi,

Inspired by Eric Dumazet's x86-64 compiler for Berkeley Packet Filter programs,
I've written a BPF compiler for 64-bit PowerPC.  Although it hasn't finished its
strenuous testing regime, I'll have intermittent net access for a couple of
weeks so thought I'd post it for feedback now and submit a 'proper' version when
I'm back.

It's a fairly simple code generator, following a similar structure to the x86
version.  The filter programs are an array of opcode/constant/branch destination
structs, and can perform arithmetic/logical/comparison operations on two virtual
registers A and X, loads from packet headers/data and accesses to local
variables, M[].  Branching is also supported, but only forwards and only within
the extent of the program.

I would probably describe this as more of a "static template binary translator"
than a "JIT" but have kept naming consistent :)

Features include:

- Filter code is generated as an ABI-compliant function, stackframe &
  prologue/epilogue if necessary.

- Simple filters (e.g. RET nn) need no stackframe or save/restore code so
  generate into only an li/blr.

- Local variables, M[], live in registers

- I believe this supports all BPF opcodes, although "complicated" loads from
  negative packet offsets (e.g. SKF_LL_OFF) are not yet supported.

Caveats include:  :)

- Packet data loads call out to simple helper functions (bpf_jit.S) which
  themselves may fall back to a trampoline to skb_copy_bits.  I haven't decided
  whether (as per comments there) it would be better to generate the simple
  loads inline and only call out in the slow case.

- Branches currently generate to "bcc 1f; b <far dest>; 1:" or
  "bcc <near dest> ; nop" so either case is the same size.  Multiple passes of
  assembly are used (the first gets an idea of how big everything is and what
  features are required), the next generates everything at accurate size, the
  third generates everything with accurate branch destination addresses); I
  intend not to nop-pad the short branch case but changing code size may
  result in more passes and a 'settling-down period'.  Kept simple for now.

- Anyone running PPC64 little-endian is doing something both interesting and
  unsupported for this work :-)  (There are some trivial endian assumptions.)

Tested in-situ (tcpdump with varying complexity filters) and with a random BPF
generator; I haven't verified loads from the fall back skb_copy_bits path.  Bug
reports/testing would be very welcome.

Cheers,

Matt

^ permalink raw reply

* Re: [PATCH] net/usb: kalmia: Various fixes for better support of non-x86 architectures.
From: Oliver Neukum @ 2011-06-24  5:42 UTC (permalink / raw)
  To: Sergei Shtylyov
  Cc: Alan Stern, Marius B. Kotsbak, davem-fT/PcQaiUtIeIZ0/mPfg9Q,
	netdev-u79uwXL29TY76Z2rM5mHXA, linux-usb-u79uwXL29TY76Z2rM5mHXA,
	Marius B. Kotsbak
In-Reply-To: <4E0353D0.1060604-hkdhdckH98+B+jHODAdFcQ@public.gmane.org>

Am Donnerstag, 23. Juni 2011, 16:55:12 schrieb Sergei Shtylyov:
> >>     Actually 'const' alone should've been enough for the variable to be placed 
> >> in the data section ISO stack.
> 
> > I don't understand what "ISO stack" means here.
> 
>     ISO == instead of.

That doesn't matter. If you are sufficiently unlucky it will land on
a page boundary. You must not do DMA on the data section either.

	Regards
		Oliver

--
To unsubscribe from this list: send the line "unsubscribe linux-usb" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH net-2.6] cxgb3: skb_record_rx_queue now records the queue index relative to the net_device.
From: Eric Dumazet @ 2011-06-24  4:53 UTC (permalink / raw)
  To: John Hernandez; +Cc: davem, netdev, divy, Shawn Bohrer
In-Reply-To: <20110624003912.14349.62988.stgit@speedy5.asicdesigners.com>

Le jeudi 23 juin 2011 à 17:39 -0700, John Hernandez a écrit :
> From: John (Jay) Hernandez <jay@chelsio.com>
> 
> Fixed call to skb_record_rx_queue where we were passing the queue index
> relative to the adapter when it should have been relative to the net_device.
> 
> Signed-off-by: John (Jay) Hernandez <jay@chelsio.com>
> Signed-off-by: Divy Le Ray <divy@chelsio.com>
> 

Hmm, wasnt this bug reported by Shawn Bohrer ?

Reported-by: Shawn Bohrer <sbohrer@rgmadvisors.com>

Proper credits please ?

^ permalink raw reply

* Bug#629604: linux-image-2.6.38-2-686: sky2 eth0 rx length errors (~5/second)
From: Freedom Tea @ 2011-06-24  4:30 UTC (permalink / raw)
  To: 629604; +Cc: Stephen Hemminger, netdev
In-Reply-To: <1307540756.22348.498.camel@localhost>

[-- Attachment #1: Type: text/plain, Size: 617 bytes --]

Problem persists in latest wheezy kernel, 2.6.39-2-686-pae

On Wed, Jun 8, 2011 at 11:45 PM, Ben Hutchings <ben@decadent.org.uk> wrote:

> -------- Forwarded Message --------
> From: Freedom Tea <freedomtea@gmail.com>
> Reply-to: Freedom Tea <freedomtea@gmail.com>, 629604@bugs.debian.org
> To: 629604@bugs.debian.org
> Subject: Bug#629604: linux-image-2.6.38-2-686: sky2 eth0 rx length errors
> (~5/second)
> Date: Wed, 8 Jun 2011 13:38:52 +1000
>
> Sorry, yes I was on 2.6.32.  The 5 came from "2.6.32-5-686" (seen many
> times in grub).
>
> kernel: [    1.084097] sky2 0000:02:00.0: Yukon-2 EC chip revision 2
>
>

[-- Attachment #2: Type: text/html, Size: 1096 bytes --]

^ permalink raw reply

* Re: linux-next: Tree for June 23 (net)
From: David Miller @ 2011-06-24  4:21 UTC (permalink / raw)
  To: randy.dunlap; +Cc: sfr, netdev, linux-next, linux-kernel
In-Reply-To: <20110623091429.2d51b0f0.randy.dunlap@oracle.com>

From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Thu, 23 Jun 2011 09:14:29 -0700

> On Thu, 23 Jun 2011 15:54:31 +1000 Stephen Rothwell wrote:
> 
>> Hi all,
>> 
>> Changes since 20110622:
>> 
>> The powerpc allyesconfig (and probably others) is still broken because we
>> now build the staging drivers and because of a commit in the net tree.
>> The breakage in Linus' tree is fixed by one of Andrew's patches above.
>> 
>> The net tree gained 2 build failures that I have left (see above).
> 
> 
> When CONFIG_INET is not enabled:
> 
> net/core/dev.c:2535: error: implicit declaration of function 'ip_is_fragment'

I'll fix this like so:

diff --git a/include/net/ip.h b/include/net/ip.h
index d603cd3..9fa9416 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -236,6 +236,11 @@ extern void ipfrag_init(void);
 
 extern void ip_static_sysctl_init(void);
 
+static inline bool ip_is_fragment(const struct iphdr *iph)
+{
+	return (iph->frag_off & htons(IP_MF | IP_OFFSET)) != 0;
+}
+
 #ifdef CONFIG_INET
 #include <net/dst.h>
 
@@ -250,11 +255,6 @@ int ip_decrease_ttl(struct iphdr *iph)
 	return --iph->ttl;
 }
 
-static inline bool ip_is_fragment(const struct iphdr *iph)
-{
-	return (iph->frag_off & htons(IP_MF | IP_OFFSET)) != 0;
-}
-
 static inline
 int ip_dont_fragment(struct sock *sk, struct dst_entry *dst)
 {

^ permalink raw reply related

* Re: [PATCH 1/3] serial/imx: add device tree support
From: Grant Likely @ 2011-06-24  4:05 UTC (permalink / raw)
  To: Shawn Guo
  Cc: patches-QSEj5FYQhm4dnm+yROfE0A, netdev-u79uwXL29TY76Z2rM5mHXA,
	devicetree-discuss-uLR06cmDAlY/bJ5BZ2RsiQ, Jason Liu,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Jeremy Kerr, Sascha Hauer,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r
In-Reply-To: <20110624034859.GB19188-+NayF8gZjK2ctlrPMvKcciBecyulp+rMXqFh9Ls21Oc@public.gmane.org>

On Thu, Jun 23, 2011 at 9:49 PM, Shawn Guo <shawn.guo-KZfg59tc24xl57MIdRCFDg@public.gmane.org> wrote:
> On Thu, Jun 23, 2011 at 05:11:54PM -0600, Grant Likely wrote:
>> > +               int devid = get_alias_name_id(pp->name, devname);
>> > +
>> > +               /* We do not want to proceed this sentinel one */
>> > +               if (!strcmp(pp->name, "name") && !strcmp(pp->value, "aliases"))
>> > +                       break;
>>
>> Skipping the 'name' property is good, but I don't think you need to
>> check the value.  You should also skip the "phandle" property.
>>
> Ok. I have not seen "phandle" property in aliases node though.  Can
> you give me an example, so that I can make one up for testing?

It's a standard property that shows up automatically if any node
happens to have a phandle reference to another node.  The name will be
either "phandle" or "linux,phandle".  Search for "linux,phandle" in
drivers/of/fdt.c to see how that property is processed.

g.

^ permalink raw reply

* Re: [net-next 07/12] ixgbe: add support for modifying UDP RSS flow hash options
From: Jeff Kirsher @ 2011-06-24  3:57 UTC (permalink / raw)
  To: Ben Hutchings; +Cc: davem, Alexander Duyck, netdev, gospo
In-Reply-To: <1308795900.3093.713.camel@localhost>

On Wed, Jun 22, 2011 at 19:25, Ben Hutchings <bhutchings@solarflare.com> wrote:
> On Wed, 2011-06-22 at 18:44 -0700, Jeff Kirsher wrote:
>> From: Alexander Duyck <alexander.h.duyck@intel.com>
>>
>> This patch adds the ability to add or remove the UDP source and destination
>> ports from the flow hash generated for RSS.  Currently the UDP flow hash is
>> always disabled.  By adding support for enabling the UDP flow hash we are
>> now providing the option of generating an RSS hash based on the UDP source
>> and destination port numbers.
> [...]
>> --- a/drivers/net/ixgbe/ixgbe_ethtool.c
>> +++ b/drivers/net/ixgbe/ixgbe_ethtool.c
> [...]
>> @@ -2681,6 +2726,110 @@ static int ixgbe_del_ethtool_fdir_entry(struct ixgbe_adapter *adapter,
>>       return err;
>>  }
>>
>> +#define UDP_RSS_FLAGS (IXGBE_FLAG2_RSS_FIELD_IPV4_UDP | \
>> +                    IXGBE_FLAG2_RSS_FIELD_IPV6_UDP)
>> +static int ixgbe_set_rss_hash_opt(struct ixgbe_adapter *adapter,
>> +                               struct ethtool_rxnfc *nfc)
>> +{
>> +     u32 flags2 = adapter->flags2;
>> +
>> +     /*
>> +      * RSS does not support anything other than hashing
>> +      * to queues on src and dst IPs and ports
>> +      */
>> +     if (nfc->data & ~(RXH_IP_SRC | RXH_IP_DST |
>> +                       RXH_L4_B_0_1 | RXH_L4_B_2_3))
>> +             return -EINVAL;
>> +
>> +     switch (nfc->flow_type) {
>> +     case TCP_V4_FLOW:
>> +     case TCP_V6_FLOW:
>> +             if (!(nfc->data & RXH_IP_SRC) ||
>> +                 !(nfc->data & RXH_IP_DST) ||
>> +                 !(nfc->data & RXH_L4_B_0_1) ||
>> +                 !(nfc->data & RXH_L4_B_2_3))
>> +                     return -EINVAL;
>> +             break;
>
> This can be written as a simple equality test.
>
>> +     case UDP_V4_FLOW:
>> +             if (!(nfc->data & RXH_IP_SRC) ||
>> +                 !(nfc->data & RXH_IP_DST))
>> +                     return -EINVAL;
>> +             switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
>
> You could just as well include all flags in the switch.
>
> [...]
>> +     case AH_ESP_V4_FLOW:
>> +     case AH_V4_FLOW:
>> +     case ESP_V4_FLOW:
>> +     case SCTP_V4_FLOW:
>> +     case AH_ESP_V6_FLOW:
>> +     case AH_V6_FLOW:
>> +     case ESP_V6_FLOW:
>> +     case SCTP_V6_FLOW:
>> +             if (!(nfc->data & RXH_IP_SRC) ||
>> +                 !(nfc->data & RXH_IP_DST))
>> +                     return -EINVAL;
>> +               break;
> [...]
>
> This should be written as an equality test, otherwise you will ignore a
> request to include port numbers in the hash for these flow classes.
>
> Ben.
>
>

Alex is on vacation currently, so I will just pull this patch for now
to give Alex time to respond when he returns next week.

-- 
Cheers,
Jeff

^ permalink raw reply

* Re: [RFC] Moving files around in drivers/net
From: Jeff Kirsher @ 2011-06-24  3:56 UTC (permalink / raw)
  To: Giuseppe CAVALLARO
  Cc: Jon Mason, Joe Perches, netdev, David Miller, Paul Gortmaker,
	Jan Engelhardt, Andrew Gallatin
In-Reply-To: <4E033FE9.9060806@st.com>

[-- Attachment #1: Type: text/plain, Size: 979 bytes --]

On Thu, 2011-06-23 at 06:30 -0700, Giuseppe CAVALLARO wrote:
> On 6/16/2011 5:03 PM, Jon Mason wrote:
> >> stmmac may not be a good name.  Maybe stmicro.
> 
> Hello and sorry for the delay of this reply.
> 
> concerning the stmmac, in the past, I had thought to rename it as
> synmac
> because the driver, although was initially designed to work on ST
> platforms, today it runs on other platforms.
> The driver indeed is generic enough and is for MAC 10/100/1000 core by
> Synopsys.
> This is just an idea.
> 
> Moreover, sorry if I'm missing some details of this thread, I can do
> rework if you agree, so let me know how proceed.
> IIUC new patches should be built against:
> git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/next-organize
> 
> Regards
> Peppe 

I can make that change, I am almost finished re-working the patches with
the suggested changes so far, so I will try to get it into the next
update I post in the next couple of days.

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 490 bytes --]

^ permalink raw reply

* Re: [PATCH 1/3] serial/imx: add device tree support
From: Shawn Guo @ 2011-06-24  3:49 UTC (permalink / raw)
  To: Grant Likely
  Cc: patches-QSEj5FYQhm4dnm+yROfE0A, netdev-u79uwXL29TY76Z2rM5mHXA,
	devicetree-discuss-uLR06cmDAlY/bJ5BZ2RsiQ, Jason Liu,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Jeremy Kerr, Sascha Hauer,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r
In-Reply-To: <BANLkTi=8Mks6T85WnEyihmB21U1j7reHrQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>

On Thu, Jun 23, 2011 at 05:11:54PM -0600, Grant Likely wrote:
[...]
> > diff --git a/drivers/of/base.c b/drivers/of/base.c
> > index 632ebae..90349a2 100644
> > --- a/drivers/of/base.c
> > +++ b/drivers/of/base.c
> > @@ -17,12 +17,27 @@
> >  *      as published by the Free Software Foundation; either version
> >  *      2 of the License, or (at your option) any later version.
> >  */
> > +#include <linux/ctype.h>
> >  #include <linux/module.h>
> >  #include <linux/of.h>
> >  #include <linux/spinlock.h>
> >  #include <linux/slab.h>
> >  #include <linux/proc_fs.h>
> >
> > +struct alias_devname {
> > +       char devname[32];
> > +       struct list_head link;
> > +       struct list_head head;
> > +};
> > +
> > +struct alias_devid {
> > +       int devid;
> > +       struct device_node *node;
> > +       struct list_head link;
> > +};
> 
> Some LinuxDoc documentation on the meaning of these structures would
> be helpful.  I'm not convinced that a two level lookup table is really
> necessary.  A flat table containing alias, device_node pointer, and
> possibly decoded devname and id is probably sufficient to get started.
>  Also, I think it will still be useful to store a pointer to the
> actual alias name in the alias_devid record.
> 
I thought two level lookup with driver probe function passing in
stem name will get the matching faster.  But I agree with you that
a flat table may be sufficient to get started, since practically
the alias number will not be that huge.

> > +
> > +static LIST_HEAD(aliases_lookup);
> > +
> >  struct device_node *allnodes;
> >  struct device_node *of_chosen;
> >
> > @@ -922,3 +937,170 @@ out_unlock:
> >  }
> >  #endif /* defined(CONFIG_OF_DYNAMIC) */
> >
> > +/*
> > + * get_alias_dev_name_id - Get device name and id from alias name
> > + *
> > + * an: The alias name passed in
> > + * dn: The pointer used to return device name
> 
> There is actually little point in decoding an alias to the device
> name.  It is more useful to decode alias to the device_node pointer
> which can be found with of_find_node_by_path().  I'd like to have a
> lookup table generated which contains {const char *alias_name,
> device_node *np} pairs.  It would also be useful for that table to
> decode the 'id' from the end of the alias name when available.  Then,
> given an alias stem and id (like imxuart and 2) the code could match
> it to alias imxuart0 and look up the device_node associated with (I
> could see this used by console setup code).  Alternately, driver probe

Yes, this is definitely one way to match.  But it's not as handy as
the alternate one below, in terms of migrating the current platform
drivers that use pdev->id everywhere to dt.  For the imx console
setup example, we can get the device_node by matching alias stem and
id, but we have to address the port we need with another two
contains_of(), device_node -> dev -> port.

> code could use its device_node pointer to lookup its alias, and if no
> alias exists, then use the table to find an unused id (and possibly
> even add an entry to the table when it allocates an id).
> 
And this is easier for the current platform drivers to migrate to
dt, so I would keep purchasing this one.

[...]
> > +               int devid = get_alias_name_id(pp->name, devname);
> > +
> > +               /* We do not want to proceed this sentinel one */
> > +               if (!strcmp(pp->name, "name") && !strcmp(pp->value, "aliases"))
> > +                       break;
> 
> Skipping the 'name' property is good, but I don't think you need to
> check the value.  You should also skip the "phandle" property.
> 
Ok. I have not seen "phandle" property in aliases node though.  Can
you give me an example, so that I can make one up for testing?

-- 
Regards,
Shawn

^ permalink raw reply

* [RFC][NET-NEXT PATCH 2/2] net: Add GSO to vlan_features initialization
From: Shan Wei @ 2011-06-24  3:38 UTC (permalink / raw)
  To: 单卫, David Miller, netdev, Eric Dumazet

This patch is not a bug fix.
Just add GSO to vlan_features initialization, and update comments.


Signed-off-by: Shan Wei <shanwei@cn.fujitsu.com>
---
 net/core/dev.c |    9 +++++----
 1 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 6b6ef14..becc1e5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5478,11 +5478,12 @@ int register_netdevice(struct net_device *dev)
 		dev->features |= NETIF_F_NOCACHE_COPY;
 	}
 
-	/* Enable GRO and NETIF_F_HIGHDMA for vlans by default,
-	 * vlan_dev_init() will do the dev->features check, so these features
-	 * are enabled only if supported by underlying device.
+	/* Enable GSO, GRO and NETIF_F_HIGHDMA for vlans by default,
+	 * vlan_dev_fix_features() will do the features check,
+	 * so NETIF_F_HIGHDMA feature is enabled only if supported
+	 * by underlying device.
 	 */
-	dev->vlan_features |= (NETIF_F_GRO | NETIF_F_HIGHDMA);
+	dev->vlan_features |= (NETIF_F_SOFT_FEATURES | NETIF_F_HIGHDMA);
 
 	ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
 	ret = notifier_to_errno(ret);
-- 
1.7.4.1

^ permalink raw reply related

* [RFC][PATCH 1/2] net: vlan: enable GSO by default
From: Shan Wei @ 2011-06-24  3:38 UTC (permalink / raw)
  To: 单卫, Patrick McHardy, David Miller, netdev

Currently, GSO for vlan device is off, and can't be set to on.
Although underlying device don't support TSO, we still
should use software segments for vlan device.

In vlan_dev_fix_features(), final features is decided by
features of real device and vlan_features of real device.

real_dev->vlan_features is initialized in register_netdevice()
only with NETIF_F_GRO, not NETIF_F_GSO.

So, now GRO is ok, but GSO is broken by default.


Signed-off-by: Shan Wei <shanwei@cn.fujitsu.com>
---
 net/8021q/vlan_dev.c |    5 +++++
 1 files changed, 5 insertions(+), 0 deletions(-)

diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 1c9aa8c..d8f45ba 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -588,9 +588,14 @@ static void vlan_dev_uninit(struct net_device *dev)
 static u32 vlan_dev_fix_features(struct net_device *dev, u32 features)
 {
 	struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
+	u32 old_features = features;
 
 	features &= real_dev->features;
 	features &= real_dev->vlan_features;
+
+	if (old_features & NETIF_F_SOFT_FEATURES)
+		features |= old_features & NETIF_F_SOFT_FEATURES;
+
 	if (dev_ethtool_get_rx_csum(real_dev))
 		features |= NETIF_F_RXCSUM;
 	features |= NETIF_F_LLTX;
-- 
1.7.4.1

^ permalink raw reply related

* [PATCH] net: sh_eth: tidyup compile warning
From: Kuninori Morimoto @ 2011-06-24  2:02 UTC (permalink / raw)
  To: David S. Miller; +Cc: Linux-Net, shimoda

This patch tidyup below warning

${LINUX}/drivers/net/sh_eth.c:1773: warning:
'mdp' may be used uninitialized in this function

Cc: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Signed-off-by: Kuninori Morimoto <morimoto.kuninori@renesas.com>
---
 drivers/net/sh_eth.c |    4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/sh_eth.c b/drivers/net/sh_eth.c
index 8a72a97..96a629f 100644
--- a/drivers/net/sh_eth.c
+++ b/drivers/net/sh_eth.c
@@ -1770,7 +1770,7 @@ static int sh_eth_drv_probe(struct platform_device *pdev)
 	int ret, devno = 0;
 	struct resource *res;
 	struct net_device *ndev = NULL;
-	struct sh_eth_private *mdp;
+	struct sh_eth_private *mdp = NULL;
 	struct sh_eth_plat_data *pd;
 
 	/* get base addr */
@@ -1888,7 +1888,7 @@ out_unregister:
 
 out_release:
 	/* net_dev free */
-	if (mdp->tsu_addr)
+	if (mdp && mdp->tsu_addr)
 		iounmap(mdp->tsu_addr);
 	if (ndev)
 		free_netdev(ndev);
-- 
1.7.4.1


^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox