Netdev List

Netdev List
 help / color / mirror / Atom feed

* [net-next 10/10] igb: Add support for adding offloaded clsflower filters
From: Jeff Kirsher @ 2018-04-25 18:22 UTC (permalink / raw)
  To: davem
  Cc: Vinicius Costa Gomes, netdev, nhorman, sassmann, jogreene,
	Jeff Kirsher
In-Reply-To: <20180425182232.28935-1-jeffrey.t.kirsher@intel.com>

From: Vinicius Costa Gomes <vinicius.gomes@intel.com>

This allows filters added by tc-flower and specifying MAC addresses,
Ethernet types, and the VLAN priority field, to be offloaded to the
controller.

This reuses most of the infrastructure used by ethtool, but clsflower
filters are kept in a separated list, so they are invisible to
ethtool.

To setup clsflower offloading:

$ tc qdisc replace dev eth0 handle 100: parent root mqprio \
     	   	   num_tc 3 map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 \
		   queues 1@0 1@1 2@2 hw 0
(clsflower offloading depends on the netword driver to be configured
with multiple traffic classes, we use mqprio's 'num_tc' parameter to
set it to 3)

$ tc qdisc add dev eth0 ingress

Examples of filters:

$ tc filter add dev eth0 parent ffff: flower \
     	    dst_mac aa:aa:aa:aa:aa:aa \
	    hw_tc 2 skip_sw
(just a simple filter filtering for the destination MAC address and
steering that traffic to queue 2)

$ tc filter add dev enp2s0 parent ffff: proto 0x22f0 flower \
     	    src_mac cc:cc:cc:cc:cc:cc \
	    hw_tc 1 skip_sw
(as the i210 doesn't support steering traffic based on the source
address alone, we need to use another steering traffic, in this case
we are using the ethernet type (0x22f0) to steer traffic to queue 1)

Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igb/igb.h      |   2 +
 drivers/net/ethernet/intel/igb/igb_main.c | 188 +++++++++++++++++++++++++++++-
 2 files changed, 188 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h
index 0eac1df499f8..990a7fb32e4e 100644
--- a/drivers/net/ethernet/intel/igb/igb.h
+++ b/drivers/net/ethernet/intel/igb/igb.h
@@ -465,6 +465,7 @@ struct igb_nfc_input {
 struct igb_nfc_filter {
 	struct hlist_node nfc_node;
 	struct igb_nfc_input filter;
+	unsigned long cookie;
 	u16 etype_reg_index;
 	u16 sw_idx;
 	u16 action;
@@ -604,6 +605,7 @@ struct igb_adapter {
 
 	/* RX network flow classification support */
 	struct hlist_head nfc_filter_list;
+	struct hlist_head cls_flower_list;
 	unsigned int nfc_filter_count;
 	/* lock for RX network flow classification filter */
 	spinlock_t nfc_lock;
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 26b0f0efc97e..3ce43207a35f 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -2514,16 +2514,197 @@ static int igb_offload_cbs(struct igb_adapter *adapter,
 	return 0;
 }
 
+#define ETHER_TYPE_FULL_MASK ((__force __be16)~0)
+#define VLAN_PRIO_FULL_MASK (0x07)
+
+static int igb_parse_cls_flower(struct igb_adapter *adapter,
+				struct tc_cls_flower_offload *f,
+				int traffic_class,
+				struct igb_nfc_filter *input)
+{
+	struct netlink_ext_ack *extack = f->common.extack;
+
+	if (f->dissector->used_keys &
+	    ~(BIT(FLOW_DISSECTOR_KEY_BASIC) |
+	      BIT(FLOW_DISSECTOR_KEY_CONTROL) |
+	      BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
+	      BIT(FLOW_DISSECTOR_KEY_VLAN))) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Unsupported key used, only BASIC, CONTROL, ETH_ADDRS and VLAN are supported");
+		return -EOPNOTSUPP;
+	}
+
+	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
+		struct flow_dissector_key_eth_addrs *key, *mask;
+
+		key = skb_flow_dissector_target(f->dissector,
+						FLOW_DISSECTOR_KEY_ETH_ADDRS,
+						f->key);
+		mask = skb_flow_dissector_target(f->dissector,
+						 FLOW_DISSECTOR_KEY_ETH_ADDRS,
+						 f->mask);
+
+		if (!is_zero_ether_addr(mask->dst)) {
+			if (!is_broadcast_ether_addr(mask->dst)) {
+				NL_SET_ERR_MSG_MOD(extack, "Only full masks are supported for destination MAC address");
+				return -EINVAL;
+			}
+
+			input->filter.match_flags |=
+				IGB_FILTER_FLAG_DST_MAC_ADDR;
+			ether_addr_copy(input->filter.dst_addr, key->dst);
+		}
+
+		if (!is_zero_ether_addr(mask->src)) {
+			if (!is_broadcast_ether_addr(mask->src)) {
+				NL_SET_ERR_MSG_MOD(extack, "Only full masks are supported for source MAC address");
+				return -EINVAL;
+			}
+
+			input->filter.match_flags |=
+				IGB_FILTER_FLAG_SRC_MAC_ADDR;
+			ether_addr_copy(input->filter.src_addr, key->src);
+		}
+	}
+
+	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
+		struct flow_dissector_key_basic *key, *mask;
+
+		key = skb_flow_dissector_target(f->dissector,
+						FLOW_DISSECTOR_KEY_BASIC,
+						f->key);
+		mask = skb_flow_dissector_target(f->dissector,
+						 FLOW_DISSECTOR_KEY_BASIC,
+						 f->mask);
+
+		if (mask->n_proto) {
+			if (mask->n_proto != ETHER_TYPE_FULL_MASK) {
+				NL_SET_ERR_MSG_MOD(extack, "Only full mask is supported for EtherType filter");
+				return -EINVAL;
+			}
+
+			input->filter.match_flags |= IGB_FILTER_FLAG_ETHER_TYPE;
+			input->filter.etype = key->n_proto;
+		}
+	}
+
+	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) {
+		struct flow_dissector_key_vlan *key, *mask;
+
+		key = skb_flow_dissector_target(f->dissector,
+						FLOW_DISSECTOR_KEY_VLAN,
+						f->key);
+		mask = skb_flow_dissector_target(f->dissector,
+						 FLOW_DISSECTOR_KEY_VLAN,
+						 f->mask);
+
+		if (mask->vlan_priority) {
+			if (mask->vlan_priority != VLAN_PRIO_FULL_MASK) {
+				NL_SET_ERR_MSG_MOD(extack, "Only full mask is supported for VLAN priority");
+				return -EINVAL;
+			}
+
+			input->filter.match_flags |= IGB_FILTER_FLAG_VLAN_TCI;
+			input->filter.vlan_tci = key->vlan_priority;
+		}
+	}
+
+	input->action = traffic_class;
+	input->cookie = f->cookie;
+
+	return 0;
+}
+
 static int igb_configure_clsflower(struct igb_adapter *adapter,
 				   struct tc_cls_flower_offload *cls_flower)
 {
-	return -EOPNOTSUPP;
+	struct netlink_ext_ack *extack = cls_flower->common.extack;
+	struct igb_nfc_filter *filter, *f;
+	int err, tc;
+
+	tc = tc_classid_to_hwtc(adapter->netdev, cls_flower->classid);
+	if (tc < 0) {
+		NL_SET_ERR_MSG_MOD(extack, "Invalid traffic class");
+		return -EINVAL;
+	}
+
+	filter = kzalloc(sizeof(*filter), GFP_KERNEL);
+	if (!filter)
+		return -ENOMEM;
+
+	err = igb_parse_cls_flower(adapter, cls_flower, tc, filter);
+	if (err < 0)
+		goto err_parse;
+
+	spin_lock(&adapter->nfc_lock);
+
+	hlist_for_each_entry(f, &adapter->nfc_filter_list, nfc_node) {
+		if (!memcmp(&f->filter, &filter->filter, sizeof(f->filter))) {
+			err = -EEXIST;
+			NL_SET_ERR_MSG_MOD(extack,
+					   "This filter is already set in ethtool");
+			goto err_locked;
+		}
+	}
+
+	hlist_for_each_entry(f, &adapter->cls_flower_list, nfc_node) {
+		if (!memcmp(&f->filter, &filter->filter, sizeof(f->filter))) {
+			err = -EEXIST;
+			NL_SET_ERR_MSG_MOD(extack,
+					   "This filter is already set in cls_flower");
+			goto err_locked;
+		}
+	}
+
+	err = igb_add_filter(adapter, filter);
+	if (err < 0) {
+		NL_SET_ERR_MSG_MOD(extack, "Could not add filter to the adapter");
+		goto err_locked;
+	}
+
+	hlist_add_head(&filter->nfc_node, &adapter->cls_flower_list);
+
+	spin_unlock(&adapter->nfc_lock);
+
+	return 0;
+
+err_locked:
+	spin_unlock(&adapter->nfc_lock);
+
+err_parse:
+	kfree(filter);
+
+	return err;
 }
 
 static int igb_delete_clsflower(struct igb_adapter *adapter,
 				struct tc_cls_flower_offload *cls_flower)
 {
-	return -EOPNOTSUPP;
+	struct igb_nfc_filter *filter;
+	int err;
+
+	spin_lock(&adapter->nfc_lock);
+
+	hlist_for_each_entry(filter, &adapter->cls_flower_list, nfc_node)
+		if (filter->cookie == cls_flower->cookie)
+			break;
+
+	if (!filter) {
+		err = -ENOENT;
+		goto out;
+	}
+
+	err = igb_erase_filter(adapter, filter);
+	if (err < 0)
+		goto out;
+
+	hlist_del(&filter->nfc_node);
+	kfree(filter);
+
+out:
+	spin_unlock(&adapter->nfc_lock);
+
+	return err;
 }
 
 static int igb_setup_tc_cls_flower(struct igb_adapter *adapter,
@@ -9368,6 +9549,9 @@ static void igb_nfc_filter_exit(struct igb_adapter *adapter)
 	hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node)
 		igb_erase_filter(adapter, rule);
 
+	hlist_for_each_entry(rule, &adapter->cls_flower_list, nfc_node)
+		igb_erase_filter(adapter, rule);
+
 	spin_unlock(&adapter->nfc_lock);
 }
 
-- 
2.14.3

^ permalink raw reply related

* [net-next 09/10] igb: Add the skeletons for tc-flower offloading
From: Jeff Kirsher @ 2018-04-25 18:22 UTC (permalink / raw)
  To: davem
  Cc: Vinicius Costa Gomes, netdev, nhorman, sassmann, jogreene,
	Jeff Kirsher
In-Reply-To: <20180425182232.28935-1-jeffrey.t.kirsher@intel.com>

From: Vinicius Costa Gomes <vinicius.gomes@intel.com>

This adds basic functions needed to implement offloading for filters
created by tc-flower.

Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igb/igb_main.c | 66 +++++++++++++++++++++++++++++++
 1 file changed, 66 insertions(+)

diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 9a5c2dc3cf5a..26b0f0efc97e 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -36,6 +36,7 @@
 #include <net/checksum.h>
 #include <net/ip6_checksum.h>
 #include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
 #include <linux/net_tstamp.h>
 #include <linux/mii.h>
 #include <linux/ethtool.h>
@@ -2513,6 +2514,69 @@ static int igb_offload_cbs(struct igb_adapter *adapter,
 	return 0;
 }
 
+static int igb_configure_clsflower(struct igb_adapter *adapter,
+				   struct tc_cls_flower_offload *cls_flower)
+{
+	return -EOPNOTSUPP;
+}
+
+static int igb_delete_clsflower(struct igb_adapter *adapter,
+				struct tc_cls_flower_offload *cls_flower)
+{
+	return -EOPNOTSUPP;
+}
+
+static int igb_setup_tc_cls_flower(struct igb_adapter *adapter,
+				   struct tc_cls_flower_offload *cls_flower)
+{
+	switch (cls_flower->command) {
+	case TC_CLSFLOWER_REPLACE:
+		return igb_configure_clsflower(adapter, cls_flower);
+	case TC_CLSFLOWER_DESTROY:
+		return igb_delete_clsflower(adapter, cls_flower);
+	case TC_CLSFLOWER_STATS:
+		return -EOPNOTSUPP;
+	default:
+		return -EINVAL;
+	}
+}
+
+static int igb_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
+				 void *cb_priv)
+{
+	struct igb_adapter *adapter = cb_priv;
+
+	if (!tc_cls_can_offload_and_chain0(adapter->netdev, type_data))
+		return -EOPNOTSUPP;
+
+	switch (type) {
+	case TC_SETUP_CLSFLOWER:
+		return igb_setup_tc_cls_flower(adapter, type_data);
+
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int igb_setup_tc_block(struct igb_adapter *adapter,
+			      struct tc_block_offload *f)
+{
+	if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+		return -EOPNOTSUPP;
+
+	switch (f->command) {
+	case TC_BLOCK_BIND:
+		return tcf_block_cb_register(f->block, igb_setup_tc_block_cb,
+					     adapter, adapter);
+	case TC_BLOCK_UNBIND:
+		tcf_block_cb_unregister(f->block, igb_setup_tc_block_cb,
+					adapter);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
 static int igb_setup_tc(struct net_device *dev, enum tc_setup_type type,
 			void *type_data)
 {
@@ -2521,6 +2585,8 @@ static int igb_setup_tc(struct net_device *dev, enum tc_setup_type type,
 	switch (type) {
 	case TC_SETUP_QDISC_CBS:
 		return igb_offload_cbs(adapter, type_data);
+	case TC_SETUP_BLOCK:
+		return igb_setup_tc_block(adapter, type_data);
 
 	default:
 		return -EOPNOTSUPP;
-- 
2.14.3

^ permalink raw reply related

* [net-next 08/10] igb: Add MAC address support for ethtool nftuple filters
From: Jeff Kirsher @ 2018-04-25 18:22 UTC (permalink / raw)
  To: davem
  Cc: Vinicius Costa Gomes, netdev, nhorman, sassmann, jogreene,
	Jeff Kirsher
In-Reply-To: <20180425182232.28935-1-jeffrey.t.kirsher@intel.com>

From: Vinicius Costa Gomes <vinicius.gomes@intel.com>

This adds the capability of configuring the queue steering of arriving
packets based on their source and destination MAC addresses.

Source address steering (i.e. driving traffic to a specific queue),
for the i210, does not work, but filtering does (i.e. accepting
traffic based on the source address). So, trying to add a filter
specifying only a source address will be an error.

In practical terms this adds support for the following use cases,
characterized by these examples:

$ ethtool -N eth0 flow-type ether dst aa:aa:aa:aa:aa:aa action 0
(this will direct packets with destination address "aa:aa:aa:aa:aa:aa"
to the RX queue 0)

$ ethtool -N eth0 flow-type ether src 44:44:44:44:44:44 \
  	     	  	    	  proto 0x22f0 action 3
(this will direct packets with source address "44:44:44:44:44:44" and
ethertype 0x22f0 to the RX queue 3)

Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igb/igb_ethtool.c | 43 +++++++++++++++++++++++++---
 1 file changed, 39 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c
index 31b2960a7869..6697c273ab59 100644
--- a/drivers/net/ethernet/intel/igb/igb_ethtool.c
+++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c
@@ -2495,6 +2495,23 @@ static int igb_get_ethtool_nfc_entry(struct igb_adapter *adapter,
 			fsp->h_ext.vlan_tci = rule->filter.vlan_tci;
 			fsp->m_ext.vlan_tci = htons(VLAN_PRIO_MASK);
 		}
+		if (rule->filter.match_flags & IGB_FILTER_FLAG_DST_MAC_ADDR) {
+			ether_addr_copy(fsp->h_u.ether_spec.h_dest,
+					rule->filter.dst_addr);
+			/* As we only support matching by the full
+			 * mask, return the mask to userspace
+			 */
+			eth_broadcast_addr(fsp->m_u.ether_spec.h_dest);
+		}
+		if (rule->filter.match_flags & IGB_FILTER_FLAG_SRC_MAC_ADDR) {
+			ether_addr_copy(fsp->h_u.ether_spec.h_source,
+					rule->filter.src_addr);
+			/* As we only support matching by the full
+			 * mask, return the mask to userspace
+			 */
+			eth_broadcast_addr(fsp->m_u.ether_spec.h_source);
+		}
+
 		return 0;
 	}
 	return -EINVAL;
@@ -2768,8 +2785,16 @@ static int igb_rxnfc_write_vlan_prio_filter(struct igb_adapter *adapter,
 
 int igb_add_filter(struct igb_adapter *adapter, struct igb_nfc_filter *input)
 {
+	struct e1000_hw *hw = &adapter->hw;
 	int err = -EINVAL;
 
+	if (hw->mac.type == e1000_i210 &&
+	    !(input->filter.match_flags & ~IGB_FILTER_FLAG_SRC_MAC_ADDR)) {
+		dev_err(&adapter->pdev->dev,
+			"i210 doesn't support flow classification rules specifying only source addresses.\n");
+		return -EOPNOTSUPP;
+	}
+
 	if (input->filter.match_flags & IGB_FILTER_FLAG_ETHER_TYPE) {
 		err = igb_rxnfc_write_etype_filter(adapter, input);
 		if (err)
@@ -2933,10 +2958,6 @@ static int igb_add_ethtool_nfc_entry(struct igb_adapter *adapter,
 	if ((fsp->flow_type & ~FLOW_EXT) != ETHER_FLOW)
 		return -EINVAL;
 
-	if (fsp->m_u.ether_spec.h_proto != ETHER_TYPE_FULL_MASK &&
-	    fsp->m_ext.vlan_tci != htons(VLAN_PRIO_MASK))
-		return -EINVAL;
-
 	input = kzalloc(sizeof(*input), GFP_KERNEL);
 	if (!input)
 		return -ENOMEM;
@@ -2946,6 +2967,20 @@ static int igb_add_ethtool_nfc_entry(struct igb_adapter *adapter,
 		input->filter.match_flags = IGB_FILTER_FLAG_ETHER_TYPE;
 	}
 
+	/* Only support matching addresses by the full mask */
+	if (is_broadcast_ether_addr(fsp->m_u.ether_spec.h_source)) {
+		input->filter.match_flags |= IGB_FILTER_FLAG_SRC_MAC_ADDR;
+		ether_addr_copy(input->filter.src_addr,
+				fsp->h_u.ether_spec.h_source);
+	}
+
+	/* Only support matching addresses by the full mask */
+	if (is_broadcast_ether_addr(fsp->m_u.ether_spec.h_dest)) {
+		input->filter.match_flags |= IGB_FILTER_FLAG_DST_MAC_ADDR;
+		ether_addr_copy(input->filter.dst_addr,
+				fsp->h_u.ether_spec.h_dest);
+	}
+
 	if ((fsp->flow_type & FLOW_EXT) && fsp->m_ext.vlan_tci) {
 		if (fsp->m_ext.vlan_tci != htons(VLAN_PRIO_MASK)) {
 			err = -EINVAL;
-- 
2.14.3

^ permalink raw reply related

* [PATCH net-next 0/6] liquidio: enhanced ethtool --set-channels feature
From: Felix Manlunas @ 2018-04-25 18:23 UTC (permalink / raw)
  To: davem
  Cc: netdev, raghu.vatsavayi, derek.chickles, satananda.burla,
	felix.manlunas, intiyaz.basha

From: Intiyaz Basha <intiyaz.basha@cavium.com>

For the ethtool --set-channels feature, the liquidio driver currently 
accepts max combined value as the queue count configured during driver
load time, where max combined count is the total count of input and output
queues. This limitation is applicable only when SR-IOV is enabled, that 
is, when VFs are created for PF. If SR-IOV is not enabled, the driver can
configure max supported (64) queues. 

This series of patches are for enhancing driver to accept 
max supported queues for ethtool --set-channels.

Intiyaz Basha (6):
  liquidio: Moved common function if_cfg_callback to lio_core.c
  liquidio: Moved common function list_delete_head to octeon_network.h
  liquidio: Moved common function delete_glists to lio_core.c
  liquidio: Moved common definition octnic_gather to octeon_network.h
  liquidio: Moved common function setup_glists to lio_core.c
  liquidio: enhanced ethtool --set-channels feature

 .../ethernet/cavium/liquidio/cn23xx_pf_device.c    |   6 +-
 .../ethernet/cavium/liquidio/cn23xx_pf_device.h    |   2 +
 drivers/net/ethernet/cavium/liquidio/lio_core.c    | 158 +++++++++++-
 drivers/net/ethernet/cavium/liquidio/lio_ethtool.c | 263 ++++++++++++++++++--
 drivers/net/ethernet/cavium/liquidio/lio_main.c    | 270 ++++-----------------
 drivers/net/ethernet/cavium/liquidio/lio_vf_main.c | 206 +---------------
 .../net/ethernet/cavium/liquidio/liquidio_common.h |   1 +
 .../net/ethernet/cavium/liquidio/octeon_device.c   |  12 +-
 .../net/ethernet/cavium/liquidio/octeon_device.h   |   2 +-
 .../net/ethernet/cavium/liquidio/octeon_network.h  |  60 ++++-
 10 files changed, 528 insertions(+), 452 deletions(-)

-- 
2.9.0

^ permalink raw reply

* [PATCH net-next 1/6] liquidio: Moved common function if_cfg_callback to lio_core.c
From: Felix Manlunas @ 2018-04-25 18:23 UTC (permalink / raw)
  To: davem
  Cc: netdev, raghu.vatsavayi, derek.chickles, satananda.burla,
	felix.manlunas, intiyaz.basha
In-Reply-To: <20180425182301.GA13840@felix-thinkpad.cavium.com>

From: Intiyaz Basha <intiyaz.basha@cavium.com>

Moved common function if_cfg_callback to lio_core.c
and renamed it to lio_if_cfg_callback.

Signed-off-by: Intiyaz Basha <intiyaz.basha@cavium.com>
Acked-by: Derek Chickles <derek.chickles@cavium.com>
Signed-off-by: Felix Manlunas <felix.manlunas@cavium.com>
---
 drivers/net/ethernet/cavium/liquidio/lio_core.c    | 32 ++++++++++++++++++++
 drivers/net/ethernet/cavium/liquidio/lio_main.c    | 35 +---------------------
 drivers/net/ethernet/cavium/liquidio/lio_vf_main.c | 34 +--------------------
 .../net/ethernet/cavium/liquidio/octeon_network.h  |  4 +++
 4 files changed, 38 insertions(+), 67 deletions(-)

diff --git a/drivers/net/ethernet/cavium/liquidio/lio_core.c b/drivers/net/ethernet/cavium/liquidio/lio_core.c
index 2a94eee..66fc375 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_core.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_core.c
@@ -29,6 +29,38 @@
 /* OOM task polling interval */
 #define LIO_OOM_POLL_INTERVAL_MS 250
 
+/**
+ * \brief Callback for getting interface configuration
+ * @param status status of request
+ * @param buf pointer to resp structure
+ */
+void lio_if_cfg_callback(struct octeon_device *oct,
+			 u32 status __attribute__((unused)), void *buf)
+{
+	struct octeon_soft_command *sc = (struct octeon_soft_command *)buf;
+	struct liquidio_if_cfg_context *ctx;
+	struct liquidio_if_cfg_resp *resp;
+
+	resp = (struct liquidio_if_cfg_resp *)sc->virtrptr;
+	ctx = (struct liquidio_if_cfg_context *)sc->ctxptr;
+
+	oct = lio_get_device(ctx->octeon_id);
+	if (resp->status)
+		dev_err(&oct->pci_dev->dev, "nic if cfg instruction failed. Status: %llx\n",
+			CVM_CAST64(resp->status));
+	WRITE_ONCE(ctx->cond, 1);
+
+	snprintf(oct->fw_info.liquidio_firmware_version, 32, "%s",
+		 resp->cfg_info.liquidio_firmware_version);
+
+	/* This barrier is required to be sure that the response has been
+	 * written fully before waking up the handler
+	 */
+	wmb();
+
+	wake_up_interruptible(&ctx->wc);
+}
+
 int liquidio_set_feature(struct net_device *netdev, int cmd, u16 param1)
 {
 	struct lio *lio = GET_LIO(netdev);
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index f3891ae..e78b3d8 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -1942,39 +1942,6 @@ static int load_firmware(struct octeon_device *oct)
 }
 
 /**
- * \brief Callback for getting interface configuration
- * @param status status of request
- * @param buf pointer to resp structure
- */
-static void if_cfg_callback(struct octeon_device *oct,
-			    u32 status __attribute__((unused)),
-			    void *buf)
-{
-	struct octeon_soft_command *sc = (struct octeon_soft_command *)buf;
-	struct liquidio_if_cfg_resp *resp;
-	struct liquidio_if_cfg_context *ctx;
-
-	resp = (struct liquidio_if_cfg_resp *)sc->virtrptr;
-	ctx = (struct liquidio_if_cfg_context *)sc->ctxptr;
-
-	oct = lio_get_device(ctx->octeon_id);
-	if (resp->status)
-		dev_err(&oct->pci_dev->dev, "nic if cfg instruction failed. Status: 0x%llx (0x%08x)\n",
-			CVM_CAST64(resp->status), status);
-	WRITE_ONCE(ctx->cond, 1);
-
-	snprintf(oct->fw_info.liquidio_firmware_version, 32, "%s",
-		 resp->cfg_info.liquidio_firmware_version);
-
-	/* This barrier is required to be sure that the response has been
-	 * written fully before waking up the handler
-	 */
-	wmb();
-
-	wake_up_interruptible(&ctx->wc);
-}
-
-/**
  * \brief Poll routine for checking transmit queue status
  * @param work work_struct data structure
  */
@@ -3556,7 +3523,7 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
 					    OPCODE_NIC_IF_CFG, 0,
 					    if_cfg.u64, 0);
 
-		sc->callback = if_cfg_callback;
+		sc->callback = lio_if_cfg_callback;
 		sc->callback_arg = sc;
 		sc->wait_time = 3000;
 
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
index f92dfa4..83d8bf6 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
@@ -1059,38 +1059,6 @@ static void free_netsgbuf_with_resp(void *buf)
 }
 
 /**
- * \brief Callback for getting interface configuration
- * @param status status of request
- * @param buf pointer to resp structure
- */
-static void if_cfg_callback(struct octeon_device *oct,
-			    u32 status __attribute__((unused)), void *buf)
-{
-	struct octeon_soft_command *sc = (struct octeon_soft_command *)buf;
-	struct liquidio_if_cfg_context *ctx;
-	struct liquidio_if_cfg_resp *resp;
-
-	resp = (struct liquidio_if_cfg_resp *)sc->virtrptr;
-	ctx = (struct liquidio_if_cfg_context *)sc->ctxptr;
-
-	oct = lio_get_device(ctx->octeon_id);
-	if (resp->status)
-		dev_err(&oct->pci_dev->dev, "nic if cfg instruction failed. Status: %llx\n",
-			CVM_CAST64(resp->status));
-	WRITE_ONCE(ctx->cond, 1);
-
-	snprintf(oct->fw_info.liquidio_firmware_version, 32, "%s",
-		 resp->cfg_info.liquidio_firmware_version);
-
-	/* This barrier is required to be sure that the response has been
-	 * written fully before waking up the handler
-	 */
-	wmb();
-
-	wake_up_interruptible(&ctx->wc);
-}
-
-/**
  * \brief Net device open for LiquidIO
  * @param netdev network device
  */
@@ -2156,7 +2124,7 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
 					    OPCODE_NIC_IF_CFG, 0, if_cfg.u64,
 					    0);
 
-		sc->callback = if_cfg_callback;
+		sc->callback = lio_if_cfg_callback;
 		sc->callback_arg = sc;
 		sc->wait_time = 5000;
 
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_network.h b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
index 4069710..ad5195c 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_network.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
@@ -197,6 +197,10 @@ int lio_wait_for_clean_oq(struct octeon_device *oct);
  */
 void liquidio_set_ethtool_ops(struct net_device *netdev);
 
+void lio_if_cfg_callback(struct octeon_device *oct,
+			 u32 status __attribute__((unused)),
+			 void *buf);
+
 /**
  * \brief Net device change_mtu
  * @param netdev network device
-- 
2.9.0

^ permalink raw reply related

* [PATCH net-next 2/6] liquidio: Moved common function list_delete_head to octeon_network.h
From: Felix Manlunas @ 2018-04-25 18:23 UTC (permalink / raw)
  To: davem
  Cc: netdev, raghu.vatsavayi, derek.chickles, satananda.burla,
	felix.manlunas, intiyaz.basha
In-Reply-To: <20180425182301.GA13840@felix-thinkpad.cavium.com>

From: Intiyaz Basha <intiyaz.basha@cavium.com>

Moved common function list_delete_head to octeon_network.h
and renamed it to lio_list_delete_head

Signed-off-by: Intiyaz Basha <intiyaz.basha@cavium.com>
Acked-by: Derek Chickles <derek.chickles@cavium.com>
Signed-off-by: Felix Manlunas <felix.manlunas@cavium.com>
---
 drivers/net/ethernet/cavium/liquidio/lio_main.c    | 23 ++------------------
 drivers/net/ethernet/cavium/liquidio/lio_vf_main.c | 25 +++-------------------
 .../net/ethernet/cavium/liquidio/octeon_network.h  | 19 ++++++++++++++++
 3 files changed, 24 insertions(+), 43 deletions(-)

diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index e78b3d8..8b3ab98 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -542,25 +542,6 @@ static inline int check_txq_status(struct lio *lio)
 }
 
 /**
- * Remove the node at the head of the list. The list would be empty at
- * the end of this call if there are no more nodes in the list.
- */
-static inline struct list_head *list_delete_head(struct list_head *root)
-{
-	struct list_head *node;
-
-	if ((root->prev == root) && (root->next == root))
-		node = NULL;
-	else
-		node = root->next;
-
-	if (node)
-		list_del(node);
-
-	return node;
-}
-
-/**
  * \brief Delete gather lists
  * @param lio per-network private data
  */
@@ -578,7 +559,7 @@ static void delete_glists(struct lio *lio)
 	for (i = 0; i < lio->linfo.num_txpciq; i++) {
 		do {
 			g = (struct octnic_gather *)
-				list_delete_head(&lio->glist[i]);
+				lio_list_delete_head(&lio->glist[i]);
 			if (g)
 				kfree(g);
 		} while (g);
@@ -2570,7 +2551,7 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
 
 		spin_lock(&lio->glist_lock[q_idx]);
 		g = (struct octnic_gather *)
-			list_delete_head(&lio->glist[q_idx]);
+			lio_list_delete_head(&lio->glist[q_idx]);
 		spin_unlock(&lio->glist_lock[q_idx]);
 
 		if (!g) {
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
index 83d8bf6..7725b3f 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
@@ -285,25 +285,6 @@ static struct pci_driver liquidio_vf_pci_driver = {
 };
 
 /**
- * Remove the node at the head of the list. The list would be empty at
- * the end of this call if there are no more nodes in the list.
- */
-static struct list_head *list_delete_head(struct list_head *root)
-{
-	struct list_head *node;
-
-	if ((root->prev == root) && (root->next == root))
-		node = NULL;
-	else
-		node = root->next;
-
-	if (node)
-		list_del(node);
-
-	return node;
-}
-
-/**
  * \brief Delete gather lists
  * @param lio per-network private data
  */
@@ -321,7 +302,7 @@ static void delete_glists(struct lio *lio)
 	for (i = 0; i < lio->linfo.num_txpciq; i++) {
 		do {
 			g = (struct octnic_gather *)
-			    list_delete_head(&lio->glist[i]);
+			    lio_list_delete_head(&lio->glist[i]);
 			kfree(g);
 		} while (g);
 
@@ -1629,8 +1610,8 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
 		int i, frags;
 
 		spin_lock(&lio->glist_lock[q_idx]);
-		g = (struct octnic_gather *)list_delete_head(
-		    &lio->glist[q_idx]);
+		g = (struct octnic_gather *)
+			lio_list_delete_head(&lio->glist[q_idx]);
 		spin_unlock(&lio->glist_lock[q_idx]);
 
 		if (!g) {
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_network.h b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
index ad5195c..3cb3d72 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_network.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
@@ -563,4 +563,23 @@ static inline int skb_iq(struct lio *lio, struct sk_buff *skb)
 	return skb->queue_mapping % lio->linfo.num_txpciq;
 }
 
+/**
+ * Remove the node at the head of the list. The list would be empty at
+ * the end of this call if there are no more nodes in the list.
+ */
+static inline struct list_head *lio_list_delete_head(struct list_head *root)
+{
+	struct list_head *node;
+
+	if (root->prev == root && root->next == root)
+		node = NULL;
+	else
+		node = root->next;
+
+	if (node)
+		list_del(node);
+
+	return node;
+}
+
 #endif
-- 
2.9.0

^ permalink raw reply related

* [PATCH net-next 3/6] liquidio: Moved common function delete_glists to lio_core.c
From: Felix Manlunas @ 2018-04-25 18:23 UTC (permalink / raw)
  To: davem
  Cc: netdev, raghu.vatsavayi, derek.chickles, satananda.burla,
	felix.manlunas, intiyaz.basha
In-Reply-To: <20180425182301.GA13840@felix-thinkpad.cavium.com>

From: Intiyaz Basha <intiyaz.basha@cavium.com>

Moved common function delete_glists to lio_core.c
and renamed it to lio_delete_glists

Signed-off-by: Intiyaz Basha <intiyaz.basha@cavium.com>
Acked-by: Derek Chickles <derek.chickles@cavium.com>
Signed-off-by: Felix Manlunas <felix.manlunas@cavium.com>
---
 drivers/net/ethernet/cavium/liquidio/lio_core.c    | 41 ++++++++++++++++++
 drivers/net/ethernet/cavium/liquidio/lio_main.c    | 50 ++--------------------
 drivers/net/ethernet/cavium/liquidio/lio_vf_main.c | 49 ++-------------------
 .../net/ethernet/cavium/liquidio/octeon_network.h  |  2 +
 4 files changed, 51 insertions(+), 91 deletions(-)

diff --git a/drivers/net/ethernet/cavium/liquidio/lio_core.c b/drivers/net/ethernet/cavium/liquidio/lio_core.c
index 66fc375..76aee81 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_core.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_core.c
@@ -61,6 +61,47 @@ void lio_if_cfg_callback(struct octeon_device *oct,
 	wake_up_interruptible(&ctx->wc);
 }
 
+/**
+ * \brief Delete gather lists
+ * @param lio per-network private data
+ */
+void lio_delete_glists(struct lio *lio)
+{
+	struct octnic_gather *g;
+	int i;
+
+	kfree(lio->glist_lock);
+	lio->glist_lock = NULL;
+
+	if (!lio->glist)
+		return;
+
+	for (i = 0; i < lio->linfo.num_txpciq; i++) {
+		do {
+			g = (struct octnic_gather *)
+			    lio_list_delete_head(&lio->glist[i]);
+			kfree(g);
+		} while (g);
+
+		if (lio->glists_virt_base && lio->glists_virt_base[i] &&
+		    lio->glists_dma_base && lio->glists_dma_base[i]) {
+			lio_dma_free(lio->oct_dev,
+				     lio->glist_entry_size * lio->tx_qsize,
+				     lio->glists_virt_base[i],
+				     lio->glists_dma_base[i]);
+		}
+	}
+
+	kfree(lio->glists_virt_base);
+	lio->glists_virt_base = NULL;
+
+	kfree(lio->glists_dma_base);
+	lio->glists_dma_base = NULL;
+
+	kfree(lio->glist);
+	lio->glist = NULL;
+}
+
 int liquidio_set_feature(struct net_device *netdev, int cmd, u16 param1)
 {
 	struct lio *lio = GET_LIO(netdev);
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index 8b3ab98..afe22b2 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -542,48 +542,6 @@ static inline int check_txq_status(struct lio *lio)
 }
 
 /**
- * \brief Delete gather lists
- * @param lio per-network private data
- */
-static void delete_glists(struct lio *lio)
-{
-	struct octnic_gather *g;
-	int i;
-
-	kfree(lio->glist_lock);
-	lio->glist_lock = NULL;
-
-	if (!lio->glist)
-		return;
-
-	for (i = 0; i < lio->linfo.num_txpciq; i++) {
-		do {
-			g = (struct octnic_gather *)
-				lio_list_delete_head(&lio->glist[i]);
-			if (g)
-				kfree(g);
-		} while (g);
-
-		if (lio->glists_virt_base && lio->glists_virt_base[i] &&
-		    lio->glists_dma_base && lio->glists_dma_base[i]) {
-			lio_dma_free(lio->oct_dev,
-				     lio->glist_entry_size * lio->tx_qsize,
-				     lio->glists_virt_base[i],
-				     lio->glists_dma_base[i]);
-		}
-	}
-
-	kfree(lio->glists_virt_base);
-	lio->glists_virt_base = NULL;
-
-	kfree(lio->glists_dma_base);
-	lio->glists_dma_base = NULL;
-
-	kfree(lio->glist);
-	lio->glist = NULL;
-}
-
-/**
  * \brief Setup gather lists
  * @param lio per-network private data
  */
@@ -617,7 +575,7 @@ static int setup_glists(struct octeon_device *oct, struct lio *lio, int num_iqs)
 				       GFP_KERNEL);
 
 	if (!lio->glists_virt_base || !lio->glists_dma_base) {
-		delete_glists(lio);
+		lio_delete_glists(lio);
 		return -ENOMEM;
 	}
 
@@ -634,7 +592,7 @@ static int setup_glists(struct octeon_device *oct, struct lio *lio, int num_iqs)
 				      &lio->glists_dma_base[i]);
 
 		if (!lio->glists_virt_base[i]) {
-			delete_glists(lio);
+			lio_delete_glists(lio);
 			return -ENOMEM;
 		}
 
@@ -656,7 +614,7 @@ static int setup_glists(struct octeon_device *oct, struct lio *lio, int num_iqs)
 		}
 
 		if (j != lio->tx_qsize) {
-			delete_glists(lio);
+			lio_delete_glists(lio);
 			return -ENOMEM;
 		}
 	}
@@ -1452,7 +1410,7 @@ static void liquidio_destroy_nic_device(struct octeon_device *oct, int ifidx)
 
 	cleanup_rx_oom_poll_fn(netdev);
 
-	delete_glists(lio);
+	lio_delete_glists(lio);
 
 	free_netdev(netdev);
 
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
index 7725b3f..dac2d29 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
@@ -285,47 +285,6 @@ static struct pci_driver liquidio_vf_pci_driver = {
 };
 
 /**
- * \brief Delete gather lists
- * @param lio per-network private data
- */
-static void delete_glists(struct lio *lio)
-{
-	struct octnic_gather *g;
-	int i;
-
-	kfree(lio->glist_lock);
-	lio->glist_lock = NULL;
-
-	if (!lio->glist)
-		return;
-
-	for (i = 0; i < lio->linfo.num_txpciq; i++) {
-		do {
-			g = (struct octnic_gather *)
-			    lio_list_delete_head(&lio->glist[i]);
-			kfree(g);
-		} while (g);
-
-		if (lio->glists_virt_base && lio->glists_virt_base[i] &&
-		    lio->glists_dma_base && lio->glists_dma_base[i]) {
-			lio_dma_free(lio->oct_dev,
-				     lio->glist_entry_size * lio->tx_qsize,
-				     lio->glists_virt_base[i],
-				     lio->glists_dma_base[i]);
-		}
-	}
-
-	kfree(lio->glists_virt_base);
-	lio->glists_virt_base = NULL;
-
-	kfree(lio->glists_dma_base);
-	lio->glists_dma_base = NULL;
-
-	kfree(lio->glist);
-	lio->glist = NULL;
-}
-
-/**
  * \brief Setup gather lists
  * @param lio per-network private data
  */
@@ -359,7 +318,7 @@ static int setup_glists(struct lio *lio, int num_iqs)
 				       GFP_KERNEL);
 
 	if (!lio->glists_virt_base || !lio->glists_dma_base) {
-		delete_glists(lio);
+		lio_delete_glists(lio);
 		return -ENOMEM;
 	}
 
@@ -374,7 +333,7 @@ static int setup_glists(struct lio *lio, int num_iqs)
 				      &lio->glists_dma_base[i]);
 
 		if (!lio->glists_virt_base[i]) {
-			delete_glists(lio);
+			lio_delete_glists(lio);
 			return -ENOMEM;
 		}
 
@@ -393,7 +352,7 @@ static int setup_glists(struct lio *lio, int num_iqs)
 		}
 
 		if (j != lio->tx_qsize) {
-			delete_glists(lio);
+			lio_delete_glists(lio);
 			return -ENOMEM;
 		}
 	}
@@ -837,7 +796,7 @@ static void liquidio_destroy_nic_device(struct octeon_device *oct, int ifidx)
 
 	cleanup_link_status_change_wq(netdev);
 
-	delete_glists(lio);
+	lio_delete_glists(lio);
 
 	free_netdev(netdev);
 
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_network.h b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
index 3cb3d72..60db303 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_network.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
@@ -201,6 +201,8 @@ void lio_if_cfg_callback(struct octeon_device *oct,
 			 u32 status __attribute__((unused)),
 			 void *buf);
 
+void lio_delete_glists(struct lio *lio);
+
 /**
  * \brief Net device change_mtu
  * @param netdev network device
-- 
2.9.0

^ permalink raw reply related

* [PATCH net-next 4/6] liquidio: Moved common definition octnic_gather to octeon_network.h
From: Felix Manlunas @ 2018-04-25 18:23 UTC (permalink / raw)
  To: davem
  Cc: netdev, raghu.vatsavayi, derek.chickles, satananda.burla,
	felix.manlunas, intiyaz.basha
In-Reply-To: <20180425182301.GA13840@felix-thinkpad.cavium.com>

From: Intiyaz Basha <intiyaz.basha@cavium.com>

Moving common definition octnic_gather to octeon_network.h

Signed-off-by: Intiyaz Basha <intiyaz.basha@cavium.com>
Acked-by: Derek Chickles <derek.chickles@cavium.com>
Signed-off-by: Felix Manlunas <felix.manlunas@cavium.com>
---
 drivers/net/ethernet/cavium/liquidio/lio_main.c     | 21 ---------------------
 drivers/net/ethernet/cavium/liquidio/lio_vf_main.c  | 18 ------------------
 .../net/ethernet/cavium/liquidio/octeon_network.h   | 21 +++++++++++++++++++++
 3 files changed, 21 insertions(+), 39 deletions(-)

diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index afe22b2..c0bd489 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -144,27 +144,6 @@ union tx_info {
 #define OCTNIC_GSO_MAX_SIZE                                                    \
 	(CN23XX_DEFAULT_INPUT_JABBER - OCTNIC_GSO_MAX_HEADER_SIZE)
 
-/** Structure of a node in list of gather components maintained by
- * NIC driver for each network device.
- */
-struct octnic_gather {
-	/** List manipulation. Next and prev pointers. */
-	struct list_head list;
-
-	/** Size of the gather component at sg in bytes. */
-	int sg_size;
-
-	/** Number of bytes that sg was adjusted to make it 8B-aligned. */
-	int adjust;
-
-	/** Gather component that can accommodate max sized fragment list
-	 *  received from the IP layer.
-	 */
-	struct octeon_sg_entry *sg;
-
-	dma_addr_t sg_dma_ptr;
-};
-
 struct handshake {
 	struct completion init;
 	struct completion started;
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
index dac2d29..8151d11 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
@@ -75,24 +75,6 @@ union tx_info {
 #define OCTNIC_GSO_MAX_SIZE \
 		(CN23XX_DEFAULT_INPUT_JABBER - OCTNIC_GSO_MAX_HEADER_SIZE)
 
-struct octnic_gather {
-	/* List manipulation. Next and prev pointers. */
-	struct list_head list;
-
-	/* Size of the gather component at sg in bytes. */
-	int sg_size;
-
-	/* Number of bytes that sg was adjusted to make it 8B-aligned. */
-	int adjust;
-
-	/* Gather component that can accommodate max sized fragment list
-	 * received from the IP layer.
-	 */
-	struct octeon_sg_entry *sg;
-
-	dma_addr_t sg_dma_ptr;
-};
-
 static int
 liquidio_vf_probe(struct pci_dev *pdev, const struct pci_device_id *ent);
 static void liquidio_vf_remove(struct pci_dev *pdev);
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_network.h b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
index 60db303..26961e1 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_network.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
@@ -47,6 +47,27 @@ struct liquidio_if_cfg_resp {
 	u64 status;
 };
 
+/* Structure of a node in list of gather components maintained by
+ * NIC driver for each network device.
+ */
+struct octnic_gather {
+	/* List manipulation. Next and prev pointers. */
+	struct list_head list;
+
+	/* Size of the gather component at sg in bytes. */
+	int sg_size;
+
+	/* Number of bytes that sg was adjusted to make it 8B-aligned. */
+	int adjust;
+
+	/* Gather component that can accommodate max sized fragment list
+	 * received from the IP layer.
+	 */
+	struct octeon_sg_entry *sg;
+
+	dma_addr_t sg_dma_ptr;
+};
+
 struct oct_nic_stats_resp {
 	u64     rh;
 	struct oct_link_stats stats;
-- 
2.9.0

^ permalink raw reply related

* [PATCH net-next 5/6] liquidio: Moved common function setup_glists to lio_core.c
From: Felix Manlunas @ 2018-04-25 18:23 UTC (permalink / raw)
  To: davem
  Cc: netdev, raghu.vatsavayi, derek.chickles, satananda.burla,
	felix.manlunas, intiyaz.basha
In-Reply-To: <20180425182301.GA13840@felix-thinkpad.cavium.com>

From: Intiyaz Basha <intiyaz.basha@cavium.com>

Moved common function setup_glists to lio_core.c
and reamed it to lio_setup_glists

Signed-off-by: Intiyaz Basha <intiyaz.basha@cavium.com>
Acked-by: Derek Chickles <derek.chickles@cavium.com>
Signed-off-by: Felix Manlunas <felix.manlunas@cavium.com>
---
 drivers/net/ethernet/cavium/liquidio/lio_core.c    | 83 +++++++++++++++++++++
 drivers/net/ethernet/cavium/liquidio/lio_main.c    | 85 +---------------------
 drivers/net/ethernet/cavium/liquidio/lio_vf_main.c | 80 +-------------------
 .../net/ethernet/cavium/liquidio/octeon_network.h  |  2 +
 4 files changed, 87 insertions(+), 163 deletions(-)

diff --git a/drivers/net/ethernet/cavium/liquidio/lio_core.c b/drivers/net/ethernet/cavium/liquidio/lio_core.c
index 76aee81..b4f9275 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_core.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_core.c
@@ -29,6 +29,8 @@
 /* OOM task polling interval */
 #define LIO_OOM_POLL_INTERVAL_MS 250
 
+#define OCTNIC_MAX_SG  MAX_SKB_FRAGS
+
 /**
  * \brief Callback for getting interface configuration
  * @param status status of request
@@ -102,6 +104,87 @@ void lio_delete_glists(struct lio *lio)
 	lio->glist = NULL;
 }
 
+/**
+ * \brief Setup gather lists
+ * @param lio per-network private data
+ */
+int lio_setup_glists(struct octeon_device *oct, struct lio *lio, int num_iqs)
+{
+	struct octnic_gather *g;
+	int i, j;
+
+	lio->glist_lock =
+	    kcalloc(num_iqs, sizeof(*lio->glist_lock), GFP_KERNEL);
+	if (!lio->glist_lock)
+		return -ENOMEM;
+
+	lio->glist =
+	    kcalloc(num_iqs, sizeof(*lio->glist), GFP_KERNEL);
+	if (!lio->glist) {
+		kfree(lio->glist_lock);
+		lio->glist_lock = NULL;
+		return -ENOMEM;
+	}
+
+	lio->glist_entry_size =
+		ROUNDUP8((ROUNDUP4(OCTNIC_MAX_SG) >> 2) * OCT_SG_ENTRY_SIZE);
+
+	/* allocate memory to store virtual and dma base address of
+	 * per glist consistent memory
+	 */
+	lio->glists_virt_base = kcalloc(num_iqs, sizeof(*lio->glists_virt_base),
+					GFP_KERNEL);
+	lio->glists_dma_base = kcalloc(num_iqs, sizeof(*lio->glists_dma_base),
+				       GFP_KERNEL);
+
+	if (!lio->glists_virt_base || !lio->glists_dma_base) {
+		lio_delete_glists(lio);
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < num_iqs; i++) {
+		int numa_node = dev_to_node(&oct->pci_dev->dev);
+
+		spin_lock_init(&lio->glist_lock[i]);
+
+		INIT_LIST_HEAD(&lio->glist[i]);
+
+		lio->glists_virt_base[i] =
+			lio_dma_alloc(oct,
+				      lio->glist_entry_size * lio->tx_qsize,
+				      &lio->glists_dma_base[i]);
+
+		if (!lio->glists_virt_base[i]) {
+			lio_delete_glists(lio);
+			return -ENOMEM;
+		}
+
+		for (j = 0; j < lio->tx_qsize; j++) {
+			g = kzalloc_node(sizeof(*g), GFP_KERNEL,
+					 numa_node);
+			if (!g)
+				g = kzalloc(sizeof(*g), GFP_KERNEL);
+			if (!g)
+				break;
+
+			g->sg = lio->glists_virt_base[i] +
+				(j * lio->glist_entry_size);
+
+			g->sg_dma_ptr = lio->glists_dma_base[i] +
+					(j * lio->glist_entry_size);
+
+			list_add_tail(&g->list, &lio->glist[i]);
+		}
+
+		if (j != lio->tx_qsize) {
+			lio_delete_glists(lio);
+			return -ENOMEM;
+		}
+	}
+
+	return 0;
+}
+
 int liquidio_set_feature(struct net_device *netdev, int cmd, u16 param1)
 {
 	struct lio *lio = GET_LIO(netdev);
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index c0bd489..f414cd7 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -138,8 +138,6 @@ union tx_info {
  * by this structure in the NIC module.
  */
 
-#define OCTNIC_MAX_SG  (MAX_SKB_FRAGS)
-
 #define OCTNIC_GSO_MAX_HEADER_SIZE 128
 #define OCTNIC_GSO_MAX_SIZE                                                    \
 	(CN23XX_DEFAULT_INPUT_JABBER - OCTNIC_GSO_MAX_HEADER_SIZE)
@@ -521,87 +519,6 @@ static inline int check_txq_status(struct lio *lio)
 }
 
 /**
- * \brief Setup gather lists
- * @param lio per-network private data
- */
-static int setup_glists(struct octeon_device *oct, struct lio *lio, int num_iqs)
-{
-	int i, j;
-	struct octnic_gather *g;
-
-	lio->glist_lock = kcalloc(num_iqs, sizeof(*lio->glist_lock),
-				  GFP_KERNEL);
-	if (!lio->glist_lock)
-		return -ENOMEM;
-
-	lio->glist = kcalloc(num_iqs, sizeof(*lio->glist),
-			     GFP_KERNEL);
-	if (!lio->glist) {
-		kfree(lio->glist_lock);
-		lio->glist_lock = NULL;
-		return -ENOMEM;
-	}
-
-	lio->glist_entry_size =
-		ROUNDUP8((ROUNDUP4(OCTNIC_MAX_SG) >> 2) * OCT_SG_ENTRY_SIZE);
-
-	/* allocate memory to store virtual and dma base address of
-	 * per glist consistent memory
-	 */
-	lio->glists_virt_base = kcalloc(num_iqs, sizeof(*lio->glists_virt_base),
-					GFP_KERNEL);
-	lio->glists_dma_base = kcalloc(num_iqs, sizeof(*lio->glists_dma_base),
-				       GFP_KERNEL);
-
-	if (!lio->glists_virt_base || !lio->glists_dma_base) {
-		lio_delete_glists(lio);
-		return -ENOMEM;
-	}
-
-	for (i = 0; i < num_iqs; i++) {
-		int numa_node = dev_to_node(&oct->pci_dev->dev);
-
-		spin_lock_init(&lio->glist_lock[i]);
-
-		INIT_LIST_HEAD(&lio->glist[i]);
-
-		lio->glists_virt_base[i] =
-			lio_dma_alloc(oct,
-				      lio->glist_entry_size * lio->tx_qsize,
-				      &lio->glists_dma_base[i]);
-
-		if (!lio->glists_virt_base[i]) {
-			lio_delete_glists(lio);
-			return -ENOMEM;
-		}
-
-		for (j = 0; j < lio->tx_qsize; j++) {
-			g = kzalloc_node(sizeof(*g), GFP_KERNEL,
-					 numa_node);
-			if (!g)
-				g = kzalloc(sizeof(*g), GFP_KERNEL);
-			if (!g)
-				break;
-
-			g->sg = lio->glists_virt_base[i] +
-				(j * lio->glist_entry_size);
-
-			g->sg_dma_ptr = lio->glists_dma_base[i] +
-					(j * lio->glist_entry_size);
-
-			list_add_tail(&g->list, &lio->glist[i]);
-		}
-
-		if (j != lio->tx_qsize) {
-			lio_delete_glists(lio);
-			return -ENOMEM;
-		}
-	}
-
-	return 0;
-}
-
-/**
  * \brief Print link information
  * @param netdev network device
  */
@@ -3637,7 +3554,7 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
 		lio->tx_qsize = octeon_get_tx_qsize(octeon_dev, lio->txq);
 		lio->rx_qsize = octeon_get_rx_qsize(octeon_dev, lio->rxq);
 
-		if (setup_glists(octeon_dev, lio, num_iqueues)) {
+		if (lio_setup_glists(octeon_dev, lio, num_iqueues)) {
 			dev_err(&octeon_dev->pci_dev->dev,
 				"Gather list allocation failed\n");
 			goto setup_nic_dev_fail;
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
index 8151d11..246752a 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
@@ -69,8 +69,6 @@ union tx_info {
 	} s;
 };
 
-#define OCTNIC_MAX_SG  (MAX_SKB_FRAGS)
-
 #define OCTNIC_GSO_MAX_HEADER_SIZE 128
 #define OCTNIC_GSO_MAX_SIZE \
 		(CN23XX_DEFAULT_INPUT_JABBER - OCTNIC_GSO_MAX_HEADER_SIZE)
@@ -267,82 +265,6 @@ static struct pci_driver liquidio_vf_pci_driver = {
 };
 
 /**
- * \brief Setup gather lists
- * @param lio per-network private data
- */
-static int setup_glists(struct lio *lio, int num_iqs)
-{
-	struct octnic_gather *g;
-	int i, j;
-
-	lio->glist_lock =
-	    kzalloc(sizeof(*lio->glist_lock) * num_iqs, GFP_KERNEL);
-	if (!lio->glist_lock)
-		return -ENOMEM;
-
-	lio->glist =
-	    kzalloc(sizeof(*lio->glist) * num_iqs, GFP_KERNEL);
-	if (!lio->glist) {
-		kfree(lio->glist_lock);
-		lio->glist_lock = NULL;
-		return -ENOMEM;
-	}
-
-	lio->glist_entry_size =
-		ROUNDUP8((ROUNDUP4(OCTNIC_MAX_SG) >> 2) * OCT_SG_ENTRY_SIZE);
-
-	/* allocate memory to store virtual and dma base address of
-	 * per glist consistent memory
-	 */
-	lio->glists_virt_base = kcalloc(num_iqs, sizeof(*lio->glists_virt_base),
-					GFP_KERNEL);
-	lio->glists_dma_base = kcalloc(num_iqs, sizeof(*lio->glists_dma_base),
-				       GFP_KERNEL);
-
-	if (!lio->glists_virt_base || !lio->glists_dma_base) {
-		lio_delete_glists(lio);
-		return -ENOMEM;
-	}
-
-	for (i = 0; i < num_iqs; i++) {
-		spin_lock_init(&lio->glist_lock[i]);
-
-		INIT_LIST_HEAD(&lio->glist[i]);
-
-		lio->glists_virt_base[i] =
-			lio_dma_alloc(lio->oct_dev,
-				      lio->glist_entry_size * lio->tx_qsize,
-				      &lio->glists_dma_base[i]);
-
-		if (!lio->glists_virt_base[i]) {
-			lio_delete_glists(lio);
-			return -ENOMEM;
-		}
-
-		for (j = 0; j < lio->tx_qsize; j++) {
-			g = kzalloc(sizeof(*g), GFP_KERNEL);
-			if (!g)
-				break;
-
-			g->sg = lio->glists_virt_base[i] +
-				(j * lio->glist_entry_size);
-
-			g->sg_dma_ptr = lio->glists_dma_base[i] +
-					(j * lio->glist_entry_size);
-
-			list_add_tail(&g->list, &lio->glist[i]);
-		}
-
-		if (j != lio->tx_qsize) {
-			lio_delete_glists(lio);
-			return -ENOMEM;
-		}
-	}
-
-	return 0;
-}
-
-/**
  * \brief Print link information
  * @param netdev network device
  */
@@ -2211,7 +2133,7 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
 		lio->tx_qsize = octeon_get_tx_qsize(octeon_dev, lio->txq);
 		lio->rx_qsize = octeon_get_rx_qsize(octeon_dev, lio->rxq);
 
-		if (setup_glists(lio, num_iqueues)) {
+		if (lio_setup_glists(octeon_dev, lio, num_iqueues)) {
 			dev_err(&octeon_dev->pci_dev->dev,
 				"Gather list allocation failed\n");
 			goto setup_nic_dev_fail;
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_network.h b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
index 26961e1..8894889 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_network.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
@@ -224,6 +224,8 @@ void lio_if_cfg_callback(struct octeon_device *oct,
 
 void lio_delete_glists(struct lio *lio);
 
+int lio_setup_glists(struct octeon_device *oct, struct lio *lio, int num_qs);
+
 /**
  * \brief Net device change_mtu
  * @param netdev network device
-- 
2.9.0

^ permalink raw reply related

* [PATCH net-next 6/6] liquidio: enhanced ethtool --set-channels feature
From: Felix Manlunas @ 2018-04-25 18:23 UTC (permalink / raw)
  To: davem
  Cc: netdev, raghu.vatsavayi, derek.chickles, satananda.burla,
	felix.manlunas, intiyaz.basha
In-Reply-To: <20180425182301.GA13840@felix-thinkpad.cavium.com>

From: Intiyaz Basha <intiyaz.basha@cavium.com>

Enhancing driver to accept max supported queues for ethtool --set-channels

Signed-off-by: Intiyaz Basha <intiyaz.basha@cavium.com>
Acked-by: Derek Chickles <derek.chickles@cavium.com>
Signed-off-by: Felix Manlunas <felix.manlunas@cavium.com>
---
 .../ethernet/cavium/liquidio/cn23xx_pf_device.c    |   6 +-
 .../ethernet/cavium/liquidio/cn23xx_pf_device.h    |   2 +
 drivers/net/ethernet/cavium/liquidio/lio_core.c    |   4 +-
 drivers/net/ethernet/cavium/liquidio/lio_ethtool.c | 263 +++++++++++++++++++--
 drivers/net/ethernet/cavium/liquidio/lio_main.c    |  64 +++--
 drivers/net/ethernet/cavium/liquidio/lio_vf_main.c |   8 +-
 .../net/ethernet/cavium/liquidio/liquidio_common.h |   1 +
 .../net/ethernet/cavium/liquidio/octeon_device.c   |  12 +-
 .../net/ethernet/cavium/liquidio/octeon_device.h   |   2 +-
 .../net/ethernet/cavium/liquidio/octeon_network.h  |  12 +-
 10 files changed, 316 insertions(+), 58 deletions(-)

diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c
index bc9861c..929d485 100644
--- a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c
+++ b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c
@@ -1245,7 +1245,7 @@ static void cn23xx_setup_reg_address(struct octeon_device *oct)
 	    CN23XX_SLI_MAC_PF_INT_ENB64(oct->pcie_port, oct->pf_num);
 }
 
-static int cn23xx_sriov_config(struct octeon_device *oct)
+int cn23xx_sriov_config(struct octeon_device *oct)
 {
 	struct octeon_cn23xx_pf *cn23xx = (struct octeon_cn23xx_pf *)oct->chip;
 	u32 max_rings, total_rings, max_vfs, rings_per_vf;
@@ -1269,8 +1269,8 @@ static int cn23xx_sriov_config(struct octeon_device *oct)
 		break;
 	}
 
-	if (max_rings <= num_present_cpus())
-		num_pf_rings = 1;
+	if (oct->sriov_info.num_pf_rings)
+		num_pf_rings = oct->sriov_info.num_pf_rings;
 	else
 		num_pf_rings = num_present_cpus();
 
diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h
index 63b3de4..e6f31d0 100644
--- a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h
+++ b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h
@@ -61,6 +61,8 @@ u32 cn23xx_pf_get_oq_ticks(struct octeon_device *oct, u32 time_intr_in_us);
 
 void cn23xx_dump_pf_initialized_regs(struct octeon_device *oct);
 
+int cn23xx_sriov_config(struct octeon_device *oct);
+
 int cn23xx_fw_loaded(struct octeon_device *oct);
 
 void cn23xx_tell_vf_its_macaddr_changed(struct octeon_device *oct, int vfidx,
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_core.c b/drivers/net/ethernet/cavium/liquidio/lio_core.c
index b4f9275..7e24b51 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_core.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_core.c
@@ -78,7 +78,7 @@ void lio_delete_glists(struct lio *lio)
 	if (!lio->glist)
 		return;
 
-	for (i = 0; i < lio->linfo.num_txpciq; i++) {
+	for (i = 0; i < lio->oct_dev->num_iqs; i++) {
 		do {
 			g = (struct octnic_gather *)
 			    lio_list_delete_head(&lio->glist[i]);
@@ -1036,8 +1036,8 @@ int octeon_setup_interrupt(struct octeon_device *oct, u32 num_ioqs)
 	int num_ioq_vectors;
 	int irqret, err;
 
-	oct->num_msix_irqs = num_ioqs;
 	if (oct->msix_on) {
+		oct->num_msix_irqs = num_ioqs;
 		if (OCTEON_CN23XX_PF(oct)) {
 			num_interrupts = MAX_IOQ_INTERRUPTS_PER_PF + 1;
 
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
index 9926a12..7ca246e 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
@@ -353,7 +353,14 @@ lio_ethtool_get_channels(struct net_device *dev,
 		rx_count = CFG_GET_NUM_RXQS_NIC_IF(conf6x, lio->ifidx);
 		tx_count = CFG_GET_NUM_TXQS_NIC_IF(conf6x, lio->ifidx);
 	} else if (OCTEON_CN23XX_PF(oct)) {
-		max_combined = lio->linfo.num_txpciq;
+		if (oct->sriov_info.sriov_enabled) {
+			max_combined = lio->linfo.num_txpciq;
+		} else {
+			struct octeon_config *conf23_pf =
+				CHIP_CONF(oct, cn23xx_pf);
+
+			max_combined = CFG_GET_IQ_MAX_Q(conf23_pf);
+		}
 		combined_count = oct->num_iqs;
 	} else if (OCTEON_CN23XX_VF(oct)) {
 		u64 reg_val = 0ULL;
@@ -417,9 +424,15 @@ lio_irq_reallocate_irqs(struct octeon_device *oct, uint32_t num_ioqs)
 
 	kfree(oct->irq_name_storage);
 	oct->irq_name_storage = NULL;
+
+	if (octeon_allocate_ioq_vector(oct, num_ioqs)) {
+		dev_err(&oct->pci_dev->dev, "OCTEON: ioq vector allocation failed\n");
+		return -1;
+	}
+
 	if (octeon_setup_interrupt(oct, num_ioqs)) {
 		dev_info(&oct->pci_dev->dev, "Setup interrupt failed\n");
-		return 1;
+		return -1;
 	}
 
 	/* Enable Octeon device interrupts */
@@ -449,7 +462,16 @@ lio_ethtool_set_channels(struct net_device *dev,
 	combined_count = channel->combined_count;
 
 	if (OCTEON_CN23XX_PF(oct)) {
-		max_combined = channel->max_combined;
+		if (oct->sriov_info.sriov_enabled) {
+			max_combined = lio->linfo.num_txpciq;
+		} else {
+			struct octeon_config *conf23_pf =
+				CHIP_CONF(oct,
+					  cn23xx_pf);
+
+			max_combined =
+				CFG_GET_IQ_MAX_Q(conf23_pf);
+		}
 	} else if (OCTEON_CN23XX_VF(oct)) {
 		u64 reg_val = 0ULL;
 		u64 ctrl = CN23XX_VF_SLI_IQ_PKT_CONTROL64(0);
@@ -477,7 +499,6 @@ lio_ethtool_set_channels(struct net_device *dev,
 	if (lio_reset_queues(dev, combined_count))
 		return -EINVAL;
 
-	lio_irq_reallocate_irqs(oct, combined_count);
 	if (stopped)
 		dev->netdev_ops->ndo_open(dev);
 
@@ -816,12 +837,120 @@ lio_ethtool_get_ringparam(struct net_device *netdev,
 	ering->rx_jumbo_max_pending = 0;
 }
 
+int lio_23xx_reconfigure_queue_count(struct lio *lio)
+{
+	struct octeon_device *oct = lio->oct_dev;
+	struct liquidio_if_cfg_context *ctx;
+	u32 resp_size, ctx_size, data_size;
+	struct liquidio_if_cfg_resp *resp;
+	struct octeon_soft_command *sc;
+	union oct_nic_if_cfg if_cfg;
+	struct lio_version *vdata;
+	u32 ifidx_or_pfnum;
+	int retval;
+	int j;
+
+	resp_size = sizeof(struct liquidio_if_cfg_resp);
+	ctx_size = sizeof(struct liquidio_if_cfg_context);
+	data_size = sizeof(struct lio_version);
+	sc = (struct octeon_soft_command *)
+		octeon_alloc_soft_command(oct, data_size,
+					  resp_size, ctx_size);
+	if (!sc) {
+		dev_err(&oct->pci_dev->dev, "%s: Failed to allocate soft command\n",
+			__func__);
+		return -1;
+	}
+
+	resp = (struct liquidio_if_cfg_resp *)sc->virtrptr;
+	ctx  = (struct liquidio_if_cfg_context *)sc->ctxptr;
+	vdata = (struct lio_version *)sc->virtdptr;
+
+	vdata->major = cpu_to_be16(LIQUIDIO_BASE_MAJOR_VERSION);
+	vdata->minor = cpu_to_be16(LIQUIDIO_BASE_MINOR_VERSION);
+	vdata->micro = cpu_to_be16(LIQUIDIO_BASE_MICRO_VERSION);
+
+	ifidx_or_pfnum = oct->pf_num;
+	WRITE_ONCE(ctx->cond, 0);
+	ctx->octeon_id = lio_get_device_id(oct);
+	init_waitqueue_head(&ctx->wc);
+
+	if_cfg.u64 = 0;
+	if_cfg.s.num_iqueues = oct->sriov_info.num_pf_rings;
+	if_cfg.s.num_oqueues = oct->sriov_info.num_pf_rings;
+	if_cfg.s.base_queue = oct->sriov_info.pf_srn;
+	if_cfg.s.gmx_port_id = oct->pf_num;
+
+	sc->iq_no = 0;
+	octeon_prepare_soft_command(oct, sc, OPCODE_NIC,
+				    OPCODE_NIC_QCOUNT_UPDATE, 0,
+				    if_cfg.u64, 0);
+	sc->callback = lio_if_cfg_callback;
+	sc->callback_arg = sc;
+	sc->wait_time = LIO_IFCFG_WAIT_TIME;
+
+	retval = octeon_send_soft_command(oct, sc);
+	if (retval == IQ_SEND_FAILED) {
+		dev_err(&oct->pci_dev->dev,
+			"iq/oq config failed status: %x\n",
+			retval);
+		goto qcount_update_fail;
+	}
+
+	if (sleep_cond(&ctx->wc, &ctx->cond) == -EINTR) {
+		dev_err(&oct->pci_dev->dev, "Wait interrupted\n");
+		return -1;
+	}
+
+	retval = resp->status;
+	if (retval) {
+		dev_err(&oct->pci_dev->dev, "iq/oq config failed\n");
+		goto qcount_update_fail;
+	}
+
+	octeon_swap_8B_data((u64 *)(&resp->cfg_info),
+			    (sizeof(struct liquidio_if_cfg_info)) >> 3);
+
+	lio->ifidx = ifidx_or_pfnum;
+	lio->linfo.num_rxpciq = hweight64(resp->cfg_info.iqmask);
+	lio->linfo.num_txpciq = hweight64(resp->cfg_info.iqmask);
+	for (j = 0; j < lio->linfo.num_rxpciq; j++) {
+		lio->linfo.rxpciq[j].u64 =
+			resp->cfg_info.linfo.rxpciq[j].u64;
+	}
+
+	for (j = 0; j < lio->linfo.num_txpciq; j++) {
+		lio->linfo.txpciq[j].u64 =
+			resp->cfg_info.linfo.txpciq[j].u64;
+	}
+
+	lio->linfo.hw_addr = resp->cfg_info.linfo.hw_addr;
+	lio->linfo.gmxport = resp->cfg_info.linfo.gmxport;
+	lio->linfo.link.u64 = resp->cfg_info.linfo.link.u64;
+	lio->txq = lio->linfo.txpciq[0].s.q_no;
+	lio->rxq = lio->linfo.rxpciq[0].s.q_no;
+
+	octeon_free_soft_command(oct, sc);
+	dev_info(&oct->pci_dev->dev, "Queue count updated to %d\n",
+		 lio->linfo.num_rxpciq);
+
+	return 0;
+
+qcount_update_fail:
+	octeon_free_soft_command(oct, sc);
+
+	return -1;
+}
+
 static int lio_reset_queues(struct net_device *netdev, uint32_t num_qs)
 {
 	struct lio *lio = GET_LIO(netdev);
 	struct octeon_device *oct = lio->oct_dev;
+	int i, queue_count_update = 0;
 	struct napi_struct *napi, *n;
-	int i, update = 0;
+	int ret;
+
+	schedule_timeout_uninterruptible(msecs_to_jiffies(100));
 
 	if (wait_for_pending_requests(oct))
 		dev_err(&oct->pci_dev->dev, "There were pending requests\n");
@@ -830,7 +959,7 @@ static int lio_reset_queues(struct net_device *netdev, uint32_t num_qs)
 		dev_err(&oct->pci_dev->dev, "IQ had pending instructions\n");
 
 	if (octeon_set_io_queues_off(oct)) {
-		dev_err(&oct->pci_dev->dev, "setting io queues off failed\n");
+		dev_err(&oct->pci_dev->dev, "Setting io queues off failed\n");
 		return -1;
 	}
 
@@ -843,9 +972,40 @@ static int lio_reset_queues(struct net_device *netdev, uint32_t num_qs)
 		netif_napi_del(napi);
 
 	if (num_qs != oct->num_iqs) {
-		netif_set_real_num_rx_queues(netdev, num_qs);
-		netif_set_real_num_tx_queues(netdev, num_qs);
-		update = 1;
+		ret = netif_set_real_num_rx_queues(netdev, num_qs);
+		if (ret) {
+			dev_err(&oct->pci_dev->dev,
+				"Setting real number rx failed\n");
+			return ret;
+		}
+
+		ret = netif_set_real_num_tx_queues(netdev, num_qs);
+		if (ret) {
+			dev_err(&oct->pci_dev->dev,
+				"Setting real number tx failed\n");
+			return ret;
+		}
+
+		/* The value of queue_count_update decides whether it is the
+		 * queue count or the descriptor count that is being
+		 * re-configured.
+		 */
+		queue_count_update = 1;
+	}
+
+	/* Re-configuration of queues can happen in two scenarios, SRIOV enabled
+	 * and SRIOV disabled. Few things like recreating queue zero, resetting
+	 * glists and IRQs are required for both. For the latter, some more
+	 * steps like updating sriov_info for the octeon device need to be done.
+	 */
+	if (queue_count_update) {
+		lio_delete_glists(lio);
+
+		/* Delete mbox for PF which is SRIOV disabled because sriov_info
+		 * will be now changed.
+		 */
+		if ((OCTEON_CN23XX_PF(oct)) && !oct->sriov_info.sriov_enabled)
+			oct->fn_list.free_mbox(oct);
 	}
 
 	for (i = 0; i < MAX_OCTEON_OUTPUT_QUEUES(oct); i++) {
@@ -860,24 +1020,91 @@ static int lio_reset_queues(struct net_device *netdev, uint32_t num_qs)
 		octeon_delete_instr_queue(oct, i);
 	}
 
+	if (queue_count_update) {
+		/* For PF re-configure sriov related information */
+		if ((OCTEON_CN23XX_PF(oct)) &&
+		    !oct->sriov_info.sriov_enabled) {
+			oct->sriov_info.num_pf_rings = num_qs;
+			if (cn23xx_sriov_config(oct)) {
+				dev_err(&oct->pci_dev->dev,
+					"Queue reset aborted: SRIOV config failed\n");
+				return -1;
+			}
+
+			num_qs = oct->sriov_info.num_pf_rings;
+		}
+	}
+
 	if (oct->fn_list.setup_device_regs(oct)) {
 		dev_err(&oct->pci_dev->dev, "Failed to configure device registers\n");
 		return -1;
 	}
 
-	if (liquidio_setup_io_queues(oct, 0, num_qs, num_qs)) {
-		dev_err(&oct->pci_dev->dev, "IO queues initialization failed\n");
-		return -1;
+	/* The following are needed in case of queue count re-configuration and
+	 * not for descriptor count re-configuration.
+	 */
+	if (queue_count_update) {
+		if (octeon_setup_instr_queues(oct))
+			return -1;
+
+		if (octeon_setup_output_queues(oct))
+			return -1;
+
+		/* Recreating mbox for PF that is SRIOV disabled */
+		if (OCTEON_CN23XX_PF(oct) && !oct->sriov_info.sriov_enabled) {
+			if (oct->fn_list.setup_mbox(oct)) {
+				dev_err(&oct->pci_dev->dev, "Mailbox setup failed\n");
+				return -1;
+			}
+		}
+
+		/* Deleting and recreating IRQs whether the interface is SRIOV
+		 * enabled or disabled.
+		 */
+		if (lio_irq_reallocate_irqs(oct, num_qs)) {
+			dev_err(&oct->pci_dev->dev, "IRQs could not be allocated\n");
+			return -1;
+		}
+
+		/* Enable the input and output queues for this Octeon device */
+		if (oct->fn_list.enable_io_queues(oct)) {
+			dev_err(&oct->pci_dev->dev, "Failed to enable input/output queues\n");
+			return -1;
+		}
+
+		for (i = 0; i < oct->num_oqs; i++)
+			writel(oct->droq[i]->max_count,
+			       oct->droq[i]->pkts_credit_reg);
+
+		/* Informing firmware about the new queue count. It is required
+		 * for firmware to allocate more number of queues than those at
+		 * load time.
+		 */
+		if (OCTEON_CN23XX_PF(oct) && !oct->sriov_info.sriov_enabled) {
+			if (lio_23xx_reconfigure_queue_count(lio))
+				return -1;
+		}
 	}
 
-	/* Enable the input and output queues for this Octeon device */
-	if (oct->fn_list.enable_io_queues(oct)) {
-		dev_err(&oct->pci_dev->dev, "Failed to enable input/output queues");
+	/* Once firmware is aware of the new value, queues can be recreated */
+	if (liquidio_setup_io_queues(oct, 0, num_qs, num_qs)) {
+		dev_err(&oct->pci_dev->dev, "I/O queues creation failed\n");
 		return -1;
 	}
 
-	if (update && lio_send_queue_count_update(netdev, num_qs))
-		return -1;
+	if (queue_count_update) {
+		if (lio_setup_glists(oct, lio, num_qs)) {
+			dev_err(&oct->pci_dev->dev, "Gather list allocation failed\n");
+			return -1;
+		}
+
+		/* Send firmware the information about new number of queues
+		 * if the interface is a VF or a PF that is SRIOV enabled.
+		 */
+		if (oct->sriov_info.sriov_enabled || OCTEON_CN23XX_VF(oct))
+			if (lio_send_queue_count_update(netdev, num_qs))
+				return -1;
+	}
 
 	return 0;
 }
@@ -922,7 +1149,7 @@ static int lio_ethtool_set_ringparam(struct net_device *netdev,
 		CFG_SET_NUM_RX_DESCS_NIC_IF(octeon_get_conf(oct), lio->ifidx,
 					    rx_count);
 
-	if (lio_reset_queues(netdev, lio->linfo.num_txpciq))
+	if (lio_reset_queues(netdev, oct->num_iqs))
 		goto err_lio_reset_queues;
 
 	if (stopped)
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index f414cd7..dc801b1 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -497,7 +497,7 @@ static void liquidio_deinit_pci(void)
  */
 static inline int check_txq_status(struct lio *lio)
 {
-	int numqs = lio->netdev->num_tx_queues;
+	int numqs = lio->netdev->real_num_tx_queues;
 	int ret_val = 0;
 	int q, iq;
 
@@ -1521,7 +1521,7 @@ static void free_netsgbuf(void *buf)
 		i++;
 	}
 
-	iq = skb_iq(lio, skb);
+	iq = skb_iq(lio->oct_dev, skb);
 	spin_lock(&lio->glist_lock[iq]);
 	list_add_tail(&g->list, &lio->glist[iq]);
 	spin_unlock(&lio->glist_lock[iq]);
@@ -1564,7 +1564,7 @@ static void free_netsgbuf_with_resp(void *buf)
 		i++;
 	}
 
-	iq = skb_iq(lio, skb);
+	iq = skb_iq(lio->oct_dev, skb);
 
 	spin_lock(&lio->glist_lock[iq]);
 	list_add_tail(&g->list, &lio->glist[iq]);
@@ -1851,11 +1851,6 @@ static int liquidio_open(struct net_device *netdev)
 
 	ifstate_set(lio, LIO_IFSTATE_RUNNING);
 
-	/* Ready for link status updates */
-	lio->intf_open = 1;
-
-	netif_info(lio, ifup, lio->netdev, "Interface Open, ready for traffic\n");
-
 	if (OCTEON_CN23XX_PF(oct)) {
 		if (!oct->msix_on)
 			if (setup_tx_poll_fn(netdev))
@@ -1865,7 +1860,12 @@ static int liquidio_open(struct net_device *netdev)
 			return -1;
 	}
 
-	start_txqs(netdev);
+	netif_tx_start_all_queues(netdev);
+
+	/* Ready for link status updates */
+	lio->intf_open = 1;
+
+	netif_info(lio, ifup, lio->netdev, "Interface Open, ready for traffic\n");
 
 	/* tell Octeon to start forwarding packets to host */
 	send_rx_ctrl_cmd(lio, 1);
@@ -1888,11 +1888,15 @@ static int liquidio_stop(struct net_device *netdev)
 
 	ifstate_reset(lio, LIO_IFSTATE_RUNNING);
 
-	netif_tx_disable(netdev);
+	/* Stop any link updates */
+	lio->intf_open = 0;
+
+	stop_txqs(netdev);
 
 	/* Inform that netif carrier is down */
 	netif_carrier_off(netdev);
-	lio->intf_open = 0;
+	netif_tx_disable(netdev);
+
 	lio->linfo.link.s.link_up = 0;
 	lio->link_changes++;
 
@@ -2312,7 +2316,7 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
 	lio = GET_LIO(netdev);
 	oct = lio->oct_dev;
 
-	q_idx = skb_iq(lio, skb);
+	q_idx = skb_iq(oct, skb);
 	tag = q_idx;
 	iq_no = lio->linfo.txpciq[q_idx].s.q_no;
 
@@ -3278,6 +3282,7 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
 	struct liquidio_if_cfg_resp *resp;
 	struct octdev_props *props;
 	int retval, num_iqueues, num_oqueues;
+	int max_num_queues = 0;
 	union oct_nic_if_cfg if_cfg;
 	unsigned int base_queue;
 	unsigned int gmx_port_id;
@@ -3360,7 +3365,7 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
 
 		sc->callback = lio_if_cfg_callback;
 		sc->callback_arg = sc;
-		sc->wait_time = 3000;
+		sc->wait_time = LIO_IFCFG_WAIT_TIME;
 
 		retval = octeon_send_soft_command(octeon_dev, sc);
 		if (retval == IQ_SEND_FAILED) {
@@ -3414,11 +3419,20 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
 				resp->cfg_info.oqmask);
 			goto setup_nic_dev_fail;
 		}
+
+		if (OCTEON_CN6XXX(octeon_dev)) {
+			max_num_queues = CFG_GET_IQ_MAX_Q(CHIP_CONF(octeon_dev,
+								    cn6xxx));
+		} else if (OCTEON_CN23XX_PF(octeon_dev)) {
+			max_num_queues = CFG_GET_IQ_MAX_Q(CHIP_CONF(octeon_dev,
+								    cn23xx_pf));
+		}
+
 		dev_dbg(&octeon_dev->pci_dev->dev,
-			"interface %d, iqmask %016llx, oqmask %016llx, numiqueues %d, numoqueues %d\n",
+			"interface %d, iqmask %016llx, oqmask %016llx, numiqueues %d, numoqueues %d max_num_queues: %d\n",
 			i, resp->cfg_info.iqmask, resp->cfg_info.oqmask,
-			num_iqueues, num_oqueues);
-		netdev = alloc_etherdev_mq(LIO_SIZE, num_iqueues);
+			num_iqueues, num_oqueues, max_num_queues);
+		netdev = alloc_etherdev_mq(LIO_SIZE, max_num_queues);
 
 		if (!netdev) {
 			dev_err(&octeon_dev->pci_dev->dev, "Device allocation failed\n");
@@ -3433,6 +3447,20 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
 		netdev->netdev_ops = &lionetdevops;
 		SWITCHDEV_SET_OPS(netdev, &lio_pf_switchdev_ops);
 
+		retval = netif_set_real_num_rx_queues(netdev, num_oqueues);
+		if (retval) {
+			dev_err(&octeon_dev->pci_dev->dev,
+				"setting real number rx failed\n");
+			goto setup_nic_dev_fail;
+		}
+
+		retval = netif_set_real_num_tx_queues(netdev, num_iqueues);
+		if (retval) {
+			dev_err(&octeon_dev->pci_dev->dev,
+				"setting real number tx failed\n");
+			goto setup_nic_dev_fail;
+		}
+
 		lio = GET_LIO(netdev);
 
 		memset(lio, 0, sizeof(struct lio));
@@ -4053,7 +4081,9 @@ static int octeon_device_init(struct octeon_device *octeon_dev)
 		}
 		atomic_set(&octeon_dev->status, OCT_DEV_MBOX_SETUP_DONE);
 
-		if (octeon_allocate_ioq_vector(octeon_dev)) {
+		if (octeon_allocate_ioq_vector
+				(octeon_dev,
+				 octeon_dev->sriov_info.num_pf_rings)) {
 			dev_err(&octeon_dev->pci_dev->dev, "OCTEON: ioq vector allocation failed\n");
 			return 1;
 		}
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
index 246752a..4b5ba02 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
@@ -849,7 +849,7 @@ static void free_netsgbuf(void *buf)
 		i++;
 	}
 
-	iq = skb_iq(lio, skb);
+	iq = skb_iq(lio->oct_dev, skb);
 
 	spin_lock(&lio->glist_lock[iq]);
 	list_add_tail(&g->list, &lio->glist[iq]);
@@ -893,7 +893,7 @@ static void free_netsgbuf_with_resp(void *buf)
 		i++;
 	}
 
-	iq = skb_iq(lio, skb);
+	iq = skb_iq(lio->oct_dev, skb);
 
 	spin_lock(&lio->glist_lock[iq]);
 	list_add_tail(&g->list, &lio->glist[iq]);
@@ -1392,7 +1392,7 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
 	lio = GET_LIO(netdev);
 	oct = lio->oct_dev;
 
-	q_idx = skb_iq(lio, skb);
+	q_idx = skb_iq(lio->oct_dev, skb);
 	tag = q_idx;
 	iq_no = lio->linfo.txpciq[q_idx].s.q_no;
 
@@ -2324,7 +2324,7 @@ static int octeon_device_init(struct octeon_device *oct)
 	}
 	atomic_set(&oct->status, OCT_DEV_MBOX_SETUP_DONE);
 
-	if (octeon_allocate_ioq_vector(oct)) {
+	if (octeon_allocate_ioq_vector(oct, oct->sriov_info.rings_per_vf)) {
 		dev_err(&oct->pci_dev->dev, "ioq vector allocation failed\n");
 		return 1;
 	}
diff --git a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
index 34a94da..ba854f1 100644
--- a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
+++ b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
@@ -84,6 +84,7 @@ enum octeon_tag_type {
 #define OPCODE_NIC_IF_CFG              0x09
 #define OPCODE_NIC_VF_DRV_NOTICE       0x0A
 #define OPCODE_NIC_INTRMOD_PARAMS      0x0B
+#define OPCODE_NIC_QCOUNT_UPDATE       0x12
 #define OPCODE_NIC_SET_TRUSTED_VF	0x13
 #define OPCODE_NIC_SYNC_OCTEON_TIME	0x14
 #define VF_DRV_LOADED                  1
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.c b/drivers/net/ethernet/cavium/liquidio/octeon_device.c
index f38abf6..f878a55 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_device.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.c
@@ -824,23 +824,18 @@ int octeon_deregister_device(struct octeon_device *oct)
 }
 
 int
-octeon_allocate_ioq_vector(struct octeon_device  *oct)
+octeon_allocate_ioq_vector(struct octeon_device *oct, u32 num_ioqs)
 {
-	int i, num_ioqs = 0;
 	struct octeon_ioq_vector *ioq_vector;
 	int cpu_num;
 	int size;
-
-	if (OCTEON_CN23XX_PF(oct))
-		num_ioqs = oct->sriov_info.num_pf_rings;
-	else if (OCTEON_CN23XX_VF(oct))
-		num_ioqs = oct->sriov_info.rings_per_vf;
+	int i;
 
 	size = sizeof(struct octeon_ioq_vector) * num_ioqs;
 
 	oct->ioq_vector = vzalloc(size);
 	if (!oct->ioq_vector)
-		return 1;
+		return -1;
 	for (i = 0; i < num_ioqs; i++) {
 		ioq_vector		= &oct->ioq_vector[i];
 		ioq_vector->oct_dev	= oct;
@@ -856,6 +851,7 @@ octeon_allocate_ioq_vector(struct octeon_device  *oct)
 		else
 			ioq_vector->ioq_num	= i;
 	}
+
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.h b/drivers/net/ethernet/cavium/liquidio/octeon_device.h
index 91937cc..9430c0a 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_device.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.h
@@ -867,7 +867,7 @@ void *oct_get_config_info(struct octeon_device *oct, u16 card_type);
 struct octeon_config *octeon_get_conf(struct octeon_device *oct);
 
 void octeon_free_ioq_vector(struct octeon_device *oct);
-int octeon_allocate_ioq_vector(struct octeon_device  *oct);
+int octeon_allocate_ioq_vector(struct octeon_device  *oct, u32 num_ioqs);
 void lio_enable_irq(struct octeon_droq *droq, struct octeon_instr_queue *iq);
 
 /* LiquidIO driver pivate flags */
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_network.h b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
index 8894889..f3bf635 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_network.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
@@ -47,6 +47,8 @@ struct liquidio_if_cfg_resp {
 	u64 status;
 };
 
+#define LIO_IFCFG_WAIT_TIME    3000 /* In milli seconds */
+
 /* Structure of a node in list of gather components maintained by
  * NIC driver for each network device.
  */
@@ -544,7 +546,7 @@ static inline void stop_txqs(struct net_device *netdev)
 {
 	int i;
 
-	for (i = 0; i < netdev->num_tx_queues; i++)
+	for (i = 0; i < netdev->real_num_tx_queues; i++)
 		netif_stop_subqueue(netdev, i);
 }
 
@@ -557,7 +559,7 @@ static inline void wake_txqs(struct net_device *netdev)
 	struct lio *lio = GET_LIO(netdev);
 	int i, qno;
 
-	for (i = 0; i < netdev->num_tx_queues; i++) {
+	for (i = 0; i < netdev->real_num_tx_queues; i++) {
 		qno = lio->linfo.txpciq[i % lio->oct_dev->num_iqs].s.q_no;
 
 		if (__netif_subqueue_stopped(netdev, i)) {
@@ -578,14 +580,14 @@ static inline void start_txqs(struct net_device *netdev)
 	int i;
 
 	if (lio->linfo.link.s.link_up) {
-		for (i = 0; i < netdev->num_tx_queues; i++)
+		for (i = 0; i < netdev->real_num_tx_queues; i++)
 			netif_start_subqueue(netdev, i);
 	}
 }
 
-static inline int skb_iq(struct lio *lio, struct sk_buff *skb)
+static inline int skb_iq(struct octeon_device *oct, struct sk_buff *skb)
 {
-	return skb->queue_mapping % lio->linfo.num_txpciq;
+	return skb->queue_mapping % oct->num_iqs;
 }
 
 /**
-- 
2.9.0

^ permalink raw reply related

* Re: [PATCH 27/40] rtc/proc: switch to proc_create_single_data
From: Alexandre Belloni @ 2018-04-25 18:25 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: linux-rtc, Alessandro Zummo, jfs-discussion, devel, linux-scsi,
	linux-ide, Greg Kroah-Hartman, linux-kernel, linux-afs,
	linux-acpi, netdev, netfilter-devel, Alexander Viro, Jiri Slaby,
	Andrew Morton, linux-ext4, Alexey Dobriyan, megaraidlinux.pdl,
	drbd-dev
In-Reply-To: <20180425154827.32251-28-hch@lst.de>

On 25/04/2018 17:48:14+0200, Christoph Hellwig wrote:
> And stop trying to get a reference on the submodule, procfs code deals
> with release after and unloaded module and thus removed proc entry.
small typo here        ^

> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Acked-by: Alexandre Belloni <alexandre.belloni@bootlin.com>


> ---
>  drivers/rtc/rtc-proc.c | 33 ++-------------------------------
>  1 file changed, 2 insertions(+), 31 deletions(-)
> 
> diff --git a/drivers/rtc/rtc-proc.c b/drivers/rtc/rtc-proc.c
> index 31e7e23cc5be..a9dd9218fae2 100644
> --- a/drivers/rtc/rtc-proc.c
> +++ b/drivers/rtc/rtc-proc.c
> @@ -107,40 +107,11 @@ static int rtc_proc_show(struct seq_file *seq, void *offset)
>  	return 0;
>  }
>  
> -static int rtc_proc_open(struct inode *inode, struct file *file)
> -{
> -	int ret;
> -	struct rtc_device *rtc = PDE_DATA(inode);
> -
> -	if (!try_module_get(rtc->owner))
> -		return -ENODEV;
> -
> -	ret = single_open(file, rtc_proc_show, rtc);
> -	if (ret)
> -		module_put(rtc->owner);
> -	return ret;
> -}
> -
> -static int rtc_proc_release(struct inode *inode, struct file *file)
> -{
> -	int res = single_release(inode, file);
> -	struct rtc_device *rtc = PDE_DATA(inode);
> -
> -	module_put(rtc->owner);
> -	return res;
> -}
> -
> -static const struct file_operations rtc_proc_fops = {
> -	.open		= rtc_proc_open,
> -	.read		= seq_read,
> -	.llseek		= seq_lseek,
> -	.release	= rtc_proc_release,
> -};
> -
>  void rtc_proc_add_device(struct rtc_device *rtc)
>  {
>  	if (is_rtc_hctosys(rtc))
> -		proc_create_data("driver/rtc", 0, NULL, &rtc_proc_fops, rtc);
> +		proc_create_single_data("driver/rtc", 0, NULL, rtc_proc_show,
> +				rtc);
>  }
>  
>  void rtc_proc_del_device(struct rtc_device *rtc)
> -- 
> 2.17.0
> 

-- 
Alexandre Belloni, Bootlin (formerly Free Electrons)
Embedded Linux and Kernel engineering
https://bootlin.com

^ permalink raw reply

* [PATCH net] tcp: ignore Fast Open on repair mode
From: Yuchung Cheng @ 2018-04-25 18:33 UTC (permalink / raw)
  To: davem; +Cc: netdev, edumazet, ncardwell, Yuchung Cheng

The TCP repair sequence of operation is to first set the socket in
repair mode, then inject the TCP stats into the socket with repair
socket options, then call connect() to re-activate the socket. The
connect syscall simply returns and set state to ESTABLISHED
mode. As a result Fast Open is meaningless for TCP repair.

However allowing sendto() system call with MSG_FASTOPEN flag half-way
during the repair operation could unexpectedly cause data to be
sent, before the operation finishes changing the internal TCP stats
(e.g. MSS).  This in turn triggers TCP warnings on inconsistent
packet accounting.

The fix is to simply disallow Fast Open operation once the socket
is in the repair mode.

Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Reviewed-by: Neal Cardwell <ncardwell@google.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
---
 net/ipv4/tcp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 9ce1c726185e..4b18ad41d4df 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1204,7 +1204,8 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
 			uarg->zerocopy = 0;
 	}

-	if (unlikely(flags & MSG_FASTOPEN || inet_sk(sk)->defer_connect)) {
+	if (unlikely(flags & MSG_FASTOPEN || inet_sk(sk)->defer_connect) &&
+	    !tp->repair) {
 		err = tcp_sendmsg_fastopen(sk, msg, &copied_syn, size);
 		if (err == -EINPROGRESS && copied_syn > 0)
 			goto out;
-- 
2.17.0.441.gb46fe60e1d-goog

^ permalink raw reply related

* Re: [Cake] [PATCH net-next v3] Add Common Applications Kept Enhanced (cake) qdisc
From: Toke Høiland-Jørgensen @ 2018-04-25 18:34 UTC (permalink / raw)
  To: Eric Dumazet, Jonathan Morton; +Cc: netdev, cake
In-Reply-To: <ca555f33-4d79-2515-e3ec-9c28d2418fba@gmail.com>

Eric Dumazet <eric.dumazet@gmail.com> writes:

> On 04/25/2018 09:52 AM, Jonathan Morton wrote:
>>> We can see here the high cost of forcing software GSO :/
>>>
>>> Really, this should be done only :
>>> 1) If requested by the admin ( tc .... gso ....)
>>>
>>> 2) If packet size is above a threshold.
>>>  The threshold could be set by the admin, and/or based on a fraction of the bandwidth parameter.
>>>
>>> I totally understand why you prefer to segment yourself for < 100 Mbit links.
>>>
>>> But this makes no sense on 10Gbit+
>> 
>> It is absolutely necessary, so far as I can see, to segment GSO
>> superpackets when overhead compensation is selected - as it very
>> often should be, even on pure Ethernet links. Without that, the
>> calculation of link occupancy time will be wrong. (The actual
>> transmission time of an Ethernet frame is rather more than just 14
>> bytes longer than the underlying IP packet.)
>
> Just fix the overhead compensation computation in the code.
>
> skb in a qdisc have everything you need.
>
> qdisc_pkt_len_init() has initialized qdisc_skb_cb(skb)->pkt_len with
> the exact bytes on the wire, and you have gso_segs to perform any
> adjustement you need to do.

The problem is that may not be the right values. For example, in many
CPEs there's a built-in switch that strips VLAN tags before the packet
actually hits the wire. So we do need to be able to get the actual
packet size. Is it possible to get the sizes of the individual segments
of a GSO packet? That way we could do the calculation for the whole
super-packet...

> Do not kill GSO only because you do not want to deal with it.

It's not (just) that we don't want to deal with it, it is also a very
aggressive optimisation for latency, which makes a lot of sense at
residential internet bandwidths.

>> Another reason to apply GSO segmentation is to achieve maximal
>> smoothness of flow isolation. This should still be achievable within
>> some tolerance at high link rates, but calculating this tolerance is
>> complicated by the triple-isolate algorithm.
>> 
>> If there's a way to obtain the individual packet sizes without
>> incurring the full segmentation overhead, it may be worth considering
>> (at high link rates only). I would want to leave it on by default,
>> because some of Cake's demonstrably superior latency performance
>> depends on seeing the real packets, not the aggregates, and the
>> overhead only becomes significant above 100Mbps on weak MIPS CPUs and
>> 1Gbps on vaguely modern x86 stuff.
>
> Again, these arguments are moot on 10Gbit+.
>
> Lets build the future, do not pretend GSO/TSO is not part of it.

I'm all for being future proof, but we also do want well-tested code.
Reworking this feature to function correctly with GSO is going to take
some work. Would you accept gating GSO splitting on bandwidth for now,
and adding correct overhead compensation for GSO packets later?

something like:

if (shaper_active() && (bandwidth <= 1Gbps || overhead_configured))
  split_gso()

-Toke

^ permalink raw reply

* Re: [PATCH] rds: ib: Fix missing call to rds_ib_dev_put in rds_ib_setup_qp
From: David Miller @ 2018-04-25 18:34 UTC (permalink / raw)
  To: dag.moxnes; +Cc: santosh.shilimkar, netdev, linux-rdma, rds-devel, linux-kernel
In-Reply-To: <1524655321-8379-1-git-send-email-dag.moxnes@oracle.com>

From: Dag Moxnes <dag.moxnes@oracle.com>
Date: Wed, 25 Apr 2018 13:22:01 +0200

> The function rds_ib_setup_qp is calling rds_ib_get_client_data and
> should correspondingly call rds_ib_dev_put. This call was lost in
> the non-error path with the introduction of error handling done in
> commit 3b12f73a5c29 ("rds: ib: add error handle")
> 
> Signed-off-by: Dag Moxnes <dag.moxnes@oracle.com>

Applied, thanks.

^ permalink raw reply

* [RFC bpf-next 0/9] bpf: Add helper to do FIB lookups
From: David Ahern @ 2018-04-25 18:34 UTC (permalink / raw)
  To: netdev, borkmann, ast
  Cc: shm, roopa, brouer, toke, john.fastabend, David Ahern

Provide a helper for doing a FIB and neighbor lookups in the kernel
tables from an XDP program. The helper provides a fastpath for forwarding
packets. If the packet is a local delivery or for any reason is not a
simple lookup and forward, the packet is expected to continue up the stack
for full processing.

Patches 1-6 do some more refactoring to IPv6 with the end goal of
extracting a FIB lookup function that aligns with fib_lookup for IPv4,
basically returning a fib6_info without creating a dst based entry.

Patch 7 adds lookup functions to the ipv6 stub. These are needed since
bpf is built into the kernel and ipv6 may not be built or loaded.

Patch 8 adds the bpf helper and 9 is a sample program.

David Ahern (9):
  net/ipv6: Rename fib6_lookup to fib6_node_lookup
  net/ipv6: Rename rt6_multipath_select
  net/ipv6: Extract table lookup from ip6_pol_route
  net/ipv6: Refactor fib6_rule_action
  net/ipv6: Add fib6_lookup
  net/ipv6: Update fib6 tracepoint to take fib6_info
  net/ipv6: Add fib lookup stubs for use in bpf helper
  bpf: Provide helper to do lookups in kernel FIB table
  samples/bpf: Add examples of ipv4 and ipv6 forwarding in XDP

 include/net/addrconf.h                    |  14 ++
 include/net/ip6_fib.h                     |  21 ++-
 include/trace/events/fib6.h               |  14 +-
 include/uapi/linux/bpf.h                  |  68 ++++++++-
 net/core/filter.c                         | 233 ++++++++++++++++++++++++++++++
 net/ipv6/addrconf_core.c                  |  33 ++++-
 net/ipv6/af_inet6.c                       |   6 +-
 net/ipv6/fib6_rules.c                     | 135 ++++++++++++++---
 net/ipv6/ip6_fib.c                        |  12 +-
 net/ipv6/route.c                          |  76 +++++-----
 samples/bpf/Makefile                      |   4 +
 samples/bpf/xdp_fwd_kern.c                | 110 ++++++++++++++
 samples/bpf/xdp_fwd_user.c                | 136 +++++++++++++++++
 tools/testing/selftests/bpf/bpf_helpers.h |   3 +
 14 files changed, 794 insertions(+), 71 deletions(-)
 create mode 100644 samples/bpf/xdp_fwd_kern.c
 create mode 100644 samples/bpf/xdp_fwd_user.c

-- 
2.11.0

^ permalink raw reply

* [RFC bpf-next 1/9] net/ipv6: Rename fib6_lookup to fib6_node_lookup
From: David Ahern @ 2018-04-25 18:34 UTC (permalink / raw)
  To: netdev, borkmann, ast
  Cc: shm, roopa, brouer, toke, john.fastabend, David Ahern
In-Reply-To: <20180425183449.25134-1-dsahern@gmail.com>

Rename fib6_lookup to fib6_node_lookup to better reflect what it
returns. The fib6_lookup name will be used in a later patch for
an IPv6 equivalent to IPv4's fib_lookup.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 include/net/ip6_fib.h | 6 +++---
 net/ipv6/ip6_fib.c    | 5 +++--
 net/ipv6/route.c      | 8 ++++----
 3 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 1af450d4e923..5a16630179cb 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -376,9 +376,9 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
 				   const struct sk_buff *skb,
 				   int flags, pol_lookup_t lookup);
 
-struct fib6_node *fib6_lookup(struct fib6_node *root,
-			      const struct in6_addr *daddr,
-			      const struct in6_addr *saddr);
+struct fib6_node *fib6_node_lookup(struct fib6_node *root,
+				   const struct in6_addr *daddr,
+				   const struct in6_addr *saddr);
 
 struct fib6_node *fib6_locate(struct fib6_node *root,
 			      const struct in6_addr *daddr, int dst_len,
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 6421c893466e..f6442d6c5603 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -1422,8 +1422,9 @@ static struct fib6_node *fib6_lookup_1(struct fib6_node *root,
 
 /* called with rcu_read_lock() held
  */
-struct fib6_node *fib6_lookup(struct fib6_node *root, const struct in6_addr *daddr,
-			      const struct in6_addr *saddr)
+struct fib6_node *fib6_node_lookup(struct fib6_node *root,
+				   const struct in6_addr *daddr,
+				   const struct in6_addr *saddr)
 {
 	struct fib6_node *fn;
 	struct lookup_args args[] = {
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 0407bbc5a028..e4082c612fcc 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1006,7 +1006,7 @@ static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
 		pn = rcu_dereference(fn->parent);
 		sn = FIB6_SUBTREE(pn);
 		if (sn && sn != fn)
-			fn = fib6_lookup(sn, NULL, saddr);
+			fn = fib6_node_lookup(sn, NULL, saddr);
 		else
 			fn = pn;
 		if (fn->fn_flags & RTN_RTINFO)
@@ -1059,7 +1059,7 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net,
 		flags &= ~RT6_LOOKUP_F_IFACE;
 
 	rcu_read_lock();
-	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
+	fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 restart:
 	f6i = rcu_dereference(fn->leaf);
 	if (!f6i) {
@@ -1814,7 +1814,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 
 	rcu_read_lock();
 
-	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
+	fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 	saved_fn = fn;
 
 	if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
@@ -2417,7 +2417,7 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
 	 */
 
 	rcu_read_lock();
-	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
+	fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 restart:
 	for_each_fib6_node_rt_rcu(fn) {
 		if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
-- 
2.11.0

^ permalink raw reply related

* [RFC bpf-next 2/9] net/ipv6: Rename rt6_multipath_select
From: David Ahern @ 2018-04-25 18:34 UTC (permalink / raw)
  To: netdev, borkmann, ast
  Cc: shm, roopa, brouer, toke, john.fastabend, David Ahern
In-Reply-To: <20180425183449.25134-1-dsahern@gmail.com>

Rename rt6_multipath_select to fib6_multipath_select and export it.
A later patch wants access to it similar to IPv4's fib_select_path.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 include/net/ip6_fib.h |  5 +++++
 net/ipv6/route.c      | 17 +++++++++--------
 2 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 5a16630179cb..80d76d8dc683 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -376,6 +376,11 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
 				   const struct sk_buff *skb,
 				   int flags, pol_lookup_t lookup);
 
+struct fib6_info *fib6_multipath_select(const struct net *net,
+					struct fib6_info *match,
+					struct flowi6 *fl6, int oif,
+					const struct sk_buff *skb, int strict);
+
 struct fib6_node *fib6_node_lookup(struct fib6_node *root,
 				   const struct in6_addr *daddr,
 				   const struct in6_addr *saddr);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index e4082c612fcc..72c784add7a1 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -419,11 +419,11 @@ static bool rt6_check_expired(const struct rt6_info *rt)
 	return false;
 }
 
-static struct fib6_info *rt6_multipath_select(const struct net *net,
-					      struct fib6_info *match,
-					     struct flowi6 *fl6, int oif,
-					     const struct sk_buff *skb,
-					     int strict)
+struct fib6_info *fib6_multipath_select(const struct net *net,
+					struct fib6_info *match,
+					struct flowi6 *fl6, int oif,
+					const struct sk_buff *skb,
+					int strict)
 {
 	struct fib6_info *sibling, *next_sibling;
 
@@ -1068,8 +1068,9 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net,
 		f6i = rt6_device_match(net, f6i, &fl6->saddr,
 				      fl6->flowi6_oif, flags);
 		if (f6i->fib6_nsiblings && fl6->flowi6_oif == 0)
-			f6i = rt6_multipath_select(net, f6i, fl6,
-						   fl6->flowi6_oif, skb, flags);
+			f6i = fib6_multipath_select(net, f6i, fl6,
+						    fl6->flowi6_oif, skb,
+						    flags);
 	}
 	if (f6i == net->ipv6.fib6_null_entry) {
 		fn = fib6_backtrack(fn, &fl6->saddr);
@@ -1823,7 +1824,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 redo_rt6_select:
 	f6i = rt6_select(net, fn, oif, strict);
 	if (f6i->fib6_nsiblings)
-		f6i = rt6_multipath_select(net, f6i, fl6, oif, skb, strict);
+		f6i = fib6_multipath_select(net, f6i, fl6, oif, skb, strict);
 	if (f6i == net->ipv6.fib6_null_entry) {
 		fn = fib6_backtrack(fn, &fl6->saddr);
 		if (fn)
-- 
2.11.0

^ permalink raw reply related

* [RFC bpf-next 3/9] net/ipv6: Extract table lookup from ip6_pol_route
From: David Ahern @ 2018-04-25 18:34 UTC (permalink / raw)
  To: netdev, borkmann, ast
  Cc: shm, roopa, brouer, toke, john.fastabend, David Ahern
In-Reply-To: <20180425183449.25134-1-dsahern@gmail.com>

ip6_pol_route is used for ingress and egress FIB lookups. Refactor it
moving the table lookup into a separate fib6_table_lookup that can be
invoked separately and export the new function.

ip6_pol_route now calls fib6_table_lookup and uses the result to generate
a dst based rt6_info.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 include/net/ip6_fib.h |  4 ++++
 net/ipv6/route.c      | 39 +++++++++++++++++++++++++--------------
 2 files changed, 29 insertions(+), 14 deletions(-)

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 80d76d8dc683..4f7b8f59ea6d 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -376,6 +376,10 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
 				   const struct sk_buff *skb,
 				   int flags, pol_lookup_t lookup);
 
+/* called with rcu lock held; caller needs to select path */
+struct fib6_info *fib6_table_lookup(struct net *net, struct fib6_table *table,
+				    int oif, struct flowi6 *fl6, int strict);
+
 struct fib6_info *fib6_multipath_select(const struct net *net,
 					struct fib6_info *match,
 					struct flowi6 *fl6, int oif,
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 72c784add7a1..353645ed3d2c 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1799,21 +1799,12 @@ void rt6_age_exceptions(struct fib6_info *rt,
 	rcu_read_unlock_bh();
 }
 
-struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
-			       int oif, struct flowi6 *fl6,
-			       const struct sk_buff *skb, int flags)
+/* must be called with rcu lock held */
+struct fib6_info *fib6_table_lookup(struct net *net, struct fib6_table *table,
+				    int oif, struct flowi6 *fl6, int strict)
 {
 	struct fib6_node *fn, *saved_fn;
 	struct fib6_info *f6i;
-	struct rt6_info *rt;
-	int strict = 0;
-
-	strict |= flags & RT6_LOOKUP_F_IFACE;
-	strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
-	if (net->ipv6.devconf_all->forwarding == 0)
-		strict |= RT6_LOOKUP_F_REACHABLE;
-
-	rcu_read_lock();
 
 	fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 	saved_fn = fn;
@@ -1823,8 +1814,6 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 
 redo_rt6_select:
 	f6i = rt6_select(net, fn, oif, strict);
-	if (f6i->fib6_nsiblings)
-		f6i = fib6_multipath_select(net, f6i, fl6, oif, skb, strict);
 	if (f6i == net->ipv6.fib6_null_entry) {
 		fn = fib6_backtrack(fn, &fl6->saddr);
 		if (fn)
@@ -1837,6 +1826,28 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 		}
 	}
 
+	return f6i;
+}
+
+struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
+			       int oif, struct flowi6 *fl6,
+			       const struct sk_buff *skb, int flags)
+{
+	struct fib6_info *f6i;
+	struct rt6_info *rt;
+	int strict = 0;
+
+	strict |= flags & RT6_LOOKUP_F_IFACE;
+	strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
+	if (net->ipv6.devconf_all->forwarding == 0)
+		strict |= RT6_LOOKUP_F_REACHABLE;
+
+	rcu_read_lock();
+
+	f6i = fib6_table_lookup(net, table, oif, fl6, strict);
+	if (f6i->fib6_nsiblings)
+		f6i = fib6_multipath_select(net, f6i, fl6, oif, skb, strict);
+
 	if (f6i == net->ipv6.fib6_null_entry) {
 		rt = net->ipv6.ip6_null_entry;
 		rcu_read_unlock();
-- 
2.11.0

^ permalink raw reply related

* [RFC bpf-next 4/9] net/ipv6: Refactor fib6_rule_action
From: David Ahern @ 2018-04-25 18:34 UTC (permalink / raw)
  To: netdev, borkmann, ast
  Cc: shm, roopa, brouer, toke, john.fastabend, David Ahern
In-Reply-To: <20180425183449.25134-1-dsahern@gmail.com>

Move source address lookup from fib6_rule_action to a helper. It will be
used in a later patch by a second variant for fib6_rule_action.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 net/ipv6/fib6_rules.c | 52 ++++++++++++++++++++++++++++++---------------------
 1 file changed, 31 insertions(+), 21 deletions(-)

diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index df113c7b5fc8..d9c0bfd304f0 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -96,6 +96,31 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
 	return &net->ipv6.ip6_null_entry->dst;
 }
 
+static int fib6_rule_saddr(struct net *net, struct fib_rule *rule, int flags,
+			   struct flowi6 *flp6, const struct net_device *dev)
+{
+	struct fib6_rule *r = (struct fib6_rule *)rule;
+
+	/* If we need to find a source address for this traffic,
+	 * we check the result if it meets requirement of the rule.
+	 */
+	if ((rule->flags & FIB_RULE_FIND_SADDR) &&
+	    r->src.plen && !(flags & RT6_LOOKUP_F_HAS_SADDR)) {
+		struct in6_addr saddr;
+
+		if (ipv6_dev_get_saddr(net, dev, &flp6->daddr,
+				       rt6_flags2srcprefs(flags), &saddr))
+			return -EAGAIN;
+
+		if (!ipv6_prefix_equal(&saddr, &r->src.addr, r->src.plen))
+			return -EAGAIN;
+
+		flp6->saddr = saddr;
+	}
+
+	return 0;
+}
+
 static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
 			    int flags, struct fib_lookup_arg *arg)
 {
@@ -134,27 +159,12 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
 
 	rt = lookup(net, table, flp6, arg->lookup_data, flags);
 	if (rt != net->ipv6.ip6_null_entry) {
-		struct fib6_rule *r = (struct fib6_rule *)rule;
-
-		/*
-		 * If we need to find a source address for this traffic,
-		 * we check the result if it meets requirement of the rule.
-		 */
-		if ((rule->flags & FIB_RULE_FIND_SADDR) &&
-		    r->src.plen && !(flags & RT6_LOOKUP_F_HAS_SADDR)) {
-			struct in6_addr saddr;
-
-			if (ipv6_dev_get_saddr(net,
-					       ip6_dst_idev(&rt->dst)->dev,
-					       &flp6->daddr,
-					       rt6_flags2srcprefs(flags),
-					       &saddr))
-				goto again;
-			if (!ipv6_prefix_equal(&saddr, &r->src.addr,
-					       r->src.plen))
-				goto again;
-			flp6->saddr = saddr;
-		}
+		err = fib6_rule_saddr(net, rule, flags, flp6,
+				      ip6_dst_idev(&rt->dst)->dev);
+
+		if (err == -EAGAIN)
+			goto again;
+
 		err = rt->dst.error;
 		if (err != -EAGAIN)
 			goto out;
-- 
2.11.0

^ permalink raw reply related

* [RFC bpf-next 5/9] net/ipv6: Add fib6_lookup
From: David Ahern @ 2018-04-25 18:34 UTC (permalink / raw)
  To: netdev, borkmann, ast
  Cc: shm, roopa, brouer, toke, john.fastabend, David Ahern
In-Reply-To: <20180425183449.25134-1-dsahern@gmail.com>

Add IPv6 equivalent to fib_lookup. Does a fib lookup, including rules,
but returns a FIB entry, fib6_info, rather than a dst based rt6_info.
fib6_lookup is on the order of 40% faster than any of the dst based
lookup methods without custom rules and 25% faster with custom rules.

Since the lookup function has a completely different signature,
fib6_rule_action is split into 2 paths: the existing one is
renamed __fib6_rule_action and a new one for the fib6_info path
is added. fib6_rule_action decides which to call based on the
lookup_ptr. If it is fib6_table_lookup then the new path is taken.

Caller must hold rcu lock as no reference is taken on the returned
fib entry.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 include/net/ip6_fib.h |  6 ++++
 net/ipv6/fib6_rules.c | 83 +++++++++++++++++++++++++++++++++++++++++++++++++--
 net/ipv6/ip6_fib.c    |  7 +++++
 3 files changed, 94 insertions(+), 2 deletions(-)

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 4f7b8f59ea6d..d920dd00139b 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -376,6 +376,12 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
 				   const struct sk_buff *skb,
 				   int flags, pol_lookup_t lookup);
 
+/* called with rcu lock held; can return error pointer
+ * caller needs to select path
+ */
+struct fib6_info *fib6_lookup(struct net *net, int oif, struct flowi6 *fl6,
+			      int flags);
+
 /* called with rcu lock held; caller needs to select path */
 struct fib6_info *fib6_table_lookup(struct net *net, struct fib6_table *table,
 				    int oif, struct flowi6 *fl6, int strict);
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index d9c0bfd304f0..47da07aabda8 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -60,6 +60,39 @@ unsigned int fib6_rules_seq_read(struct net *net)
 	return fib_rules_seq_read(net, AF_INET6);
 }
 
+/* called with rcu lock held; no reference taken on fib6_info */
+struct fib6_info *fib6_lookup(struct net *net, int oif, struct flowi6 *fl6,
+			      int flags)
+{
+	struct fib6_info *f6i;
+	int err;
+
+	if (net->ipv6.fib6_has_custom_rules) {
+		struct fib_lookup_arg arg = {
+			.lookup_ptr = fib6_table_lookup,
+			.lookup_data = &oif,
+			.flags = FIB_LOOKUP_NOREF,
+		};
+
+		l3mdev_update_flow(net, flowi6_to_flowi(fl6));
+
+		err = fib_rules_lookup(net->ipv6.fib6_rules_ops,
+				       flowi6_to_flowi(fl6), flags, &arg);
+		if (err)
+			return ERR_PTR(err);
+
+		f6i = arg.result ? : net->ipv6.fib6_null_entry;
+	} else {
+		f6i = fib6_table_lookup(net, net->ipv6.fib6_local_tbl,
+					oif, fl6, flags);
+		if (!f6i || f6i == net->ipv6.fib6_null_entry)
+			f6i = fib6_table_lookup(net, net->ipv6.fib6_main_tbl,
+						oif, fl6, flags);
+	}
+
+	return f6i;
+}
+
 struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
 				   const struct sk_buff *skb,
 				   int flags, pol_lookup_t lookup)
@@ -121,8 +154,45 @@ static int fib6_rule_saddr(struct net *net, struct fib_rule *rule, int flags,
 	return 0;
 }
 
-static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
-			    int flags, struct fib_lookup_arg *arg)
+static int fib6_rule_action_alt(struct fib_rule *rule, struct flowi *flp,
+				int flags, struct fib_lookup_arg *arg)
+{
+	struct flowi6 *flp6 = &flp->u.ip6;
+	struct net *net = rule->fr_net;
+	struct fib6_table *table;
+	struct fib6_info *f6i;
+	int err = 0, *oif;
+	u32 tb_id;
+
+	switch (rule->action) {
+	case FR_ACT_TO_TBL:
+		break;
+	case FR_ACT_UNREACHABLE:
+		return -ENETUNREACH;
+	case FR_ACT_PROHIBIT:
+		return -EACCES;
+	case FR_ACT_BLACKHOLE:
+	default:
+		return -EINVAL;
+	}
+
+	tb_id = fib_rule_get_table(rule, arg);
+	table = fib6_get_table(net, tb_id);
+	if (!table)
+		return -EAGAIN;
+
+	oif = (int *)arg->lookup_data;
+	f6i = fib6_table_lookup(net, table, *oif, flp6, flags);
+	if (f6i != net->ipv6.fib6_null_entry)
+		err = fib6_rule_saddr(net, rule, flags, flp6,
+				      fib6_info_nh_dev(f6i));
+
+	arg->result = f6i;
+	return err;
+}
+
+static int __fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
+			      int flags, struct fib_lookup_arg *arg)
 {
 	struct flowi6 *flp6 = &flp->u.ip6;
 	struct rt6_info *rt = NULL;
@@ -182,6 +252,15 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
 	return err;
 }
 
+static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
+			    int flags, struct fib_lookup_arg *arg)
+{
+	if (arg->lookup_ptr == fib6_table_lookup)
+		return fib6_rule_action_alt(rule, flp, flags, arg);
+
+	return __fib6_rule_action(rule, flp, flags, arg);
+}
+
 static bool fib6_rule_suppress(struct fib_rule *rule, struct fib_lookup_arg *arg)
 {
 	struct rt6_info *rt = (struct rt6_info *) arg->result;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index f6442d6c5603..422ce5ca7b33 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -354,6 +354,13 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
 	return &rt->dst;
 }
 
+/* called with rcu lock held; no reference taken on fib6_info */
+struct fib6_info *fib6_lookup(struct net *net, int oif, struct flowi6 *fl6,
+			      int flags)
+{
+	return fib6_table_lookup(net, net->ipv6.fib6_main_tbl, oif, fl6, flags);
+}
+
 static void __net_init fib6_tables_init(struct net *net)
 {
 	fib6_link_table(net, net->ipv6.fib6_main_tbl);
-- 
2.11.0

^ permalink raw reply related

* [RFC bpf-next 6/9] net/ipv6: Update fib6 tracepoint to take fib6_info
From: David Ahern @ 2018-04-25 18:34 UTC (permalink / raw)
  To: netdev, borkmann, ast
  Cc: shm, roopa, brouer, toke, john.fastabend, David Ahern
In-Reply-To: <20180425183449.25134-1-dsahern@gmail.com>

Similar to IPv4, IPv6 should use the FIB lookup result in the
tracepoint.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 include/trace/events/fib6.h | 14 +++++++-------
 net/ipv6/route.c            | 14 ++++++--------
 2 files changed, 13 insertions(+), 15 deletions(-)

diff --git a/include/trace/events/fib6.h b/include/trace/events/fib6.h
index 7e8d48a81b91..1b8d951e3c12 100644
--- a/include/trace/events/fib6.h
+++ b/include/trace/events/fib6.h
@@ -12,10 +12,10 @@
 
 TRACE_EVENT(fib6_table_lookup,
 
-	TP_PROTO(const struct net *net, const struct rt6_info *rt,
+	TP_PROTO(const struct net *net, const struct fib6_info *f6i,
 		 struct fib6_table *table, const struct flowi6 *flp),
 
-	TP_ARGS(net, rt, table, flp),
+	TP_ARGS(net, f6i, table, flp),
 
 	TP_STRUCT__entry(
 		__field(	u32,	tb_id		)
@@ -48,20 +48,20 @@ TRACE_EVENT(fib6_table_lookup,
 		in6 = (struct in6_addr *)__entry->dst;
 		*in6 = flp->daddr;
 
-		if (rt->rt6i_idev) {
-			__assign_str(name, rt->rt6i_idev->dev->name);
+		if (f6i->fib6_nh.nh_dev) {
+			__assign_str(name, f6i->fib6_nh.nh_dev);
 		} else {
 			__assign_str(name, "");
 		}
-		if (rt == net->ipv6.ip6_null_entry) {
+		if (f6i == net->ipv6.fib6_null_entry) {
 			struct in6_addr in6_zero = {};
 
 			in6 = (struct in6_addr *)__entry->gw;
 			*in6 = in6_zero;
 
-		} else if (rt) {
+		} else if (f6i) {
 			in6 = (struct in6_addr *)__entry->gw;
-			*in6 = rt->rt6i_gateway;
+			*in6 = f6i->fib6_nh.nh_gw;
 		}
 	),
 
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 353645ed3d2c..e4163d19d905 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1078,6 +1078,8 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net,
 			goto restart;
 	}
 
+	trace_fib6_table_lookup(net, f6i, table, fl6);
+
 	/* Search through exception table */
 	rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
 	if (rt) {
@@ -1096,8 +1098,6 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net,
 
 	rcu_read_unlock();
 
-	trace_fib6_table_lookup(net, rt, table, fl6);
-
 	return rt;
 }
 
@@ -1826,6 +1826,8 @@ struct fib6_info *fib6_table_lookup(struct net *net, struct fib6_table *table,
 		}
 	}
 
+	trace_fib6_table_lookup(net, f6i, table, fl6);
+
 	return f6i;
 }
 
@@ -1852,7 +1854,6 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 		rt = net->ipv6.ip6_null_entry;
 		rcu_read_unlock();
 		dst_hold(&rt->dst);
-		trace_fib6_table_lookup(net, rt, table, fl6);
 		return rt;
 	}
 
@@ -1863,7 +1864,6 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 			dst_use_noref(&rt->dst, jiffies);
 
 		rcu_read_unlock();
-		trace_fib6_table_lookup(net, rt, table, fl6);
 		return rt;
 	} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
 			    !(f6i->fib6_flags & RTF_GATEWAY))) {
@@ -1889,9 +1889,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 			dst_hold(&uncached_rt->dst);
 		}
 
-		trace_fib6_table_lookup(net, uncached_rt, table, fl6);
 		return uncached_rt;
-
 	} else {
 		/* Get a percpu copy */
 
@@ -1905,7 +1903,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 
 		local_bh_enable();
 		rcu_read_unlock();
-		trace_fib6_table_lookup(net, pcpu_rt, table, fl6);
+
 		return pcpu_rt;
 	}
 }
@@ -2483,7 +2481,7 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
 
 	rcu_read_unlock();
 
-	trace_fib6_table_lookup(net, ret, table, fl6);
+	trace_fib6_table_lookup(net, rt, table, fl6);
 	return ret;
 };
 
-- 
2.11.0

^ permalink raw reply related

* [RFC bpf-next 7/9] net/ipv6: Add fib lookup stubs for use in bpf helper
From: David Ahern @ 2018-04-25 18:34 UTC (permalink / raw)
  To: netdev, borkmann, ast
  Cc: shm, roopa, brouer, toke, john.fastabend, David Ahern
In-Reply-To: <20180425183449.25134-1-dsahern@gmail.com>

Add stubs to retrieve a handle to an IPv6 FIB table, fib6_get_table,
a stub to do a lookup in a specific table, fib6_table_lookup, and
a stub for a full route lookup.

The stubs are needed for core bpf code to handle the case when the
IPv6 module is not builtin.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 include/net/addrconf.h   | 14 ++++++++++++++
 net/ipv6/addrconf_core.c | 33 ++++++++++++++++++++++++++++++++-
 net/ipv6/af_inet6.c      |  6 +++++-
 3 files changed, 51 insertions(+), 2 deletions(-)

diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 8312cc25a3af..ff766ab207e0 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -223,6 +223,20 @@ struct ipv6_stub {
 				 const struct in6_addr *addr);
 	int (*ipv6_dst_lookup)(struct net *net, struct sock *sk,
 			       struct dst_entry **dst, struct flowi6 *fl6);
+
+	struct fib6_table *(*fib6_get_table)(struct net *net, u32 id);
+	struct fib6_info *(*fib6_lookup)(struct net *net, int oif,
+					 struct flowi6 *fl6, int flags);
+	struct fib6_info *(*fib6_table_lookup)(struct net *net,
+					      struct fib6_table *table,
+					      int oif, struct flowi6 *fl6,
+					      int flags);
+	struct fib6_info *(*fib6_multipath_select)(const struct net *net,
+						   struct fib6_info *f6i,
+						   struct flowi6 *fl6, int oif,
+						   const struct sk_buff *skb,
+						   int strict);
+
 	void (*udpv6_encap_enable)(void);
 	void (*ndisc_send_na)(struct net_device *dev, const struct in6_addr *daddr,
 			      const struct in6_addr *solicited_addr,
diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c
index 32b564dfd02a..2fe754fd4f5e 100644
--- a/net/ipv6/addrconf_core.c
+++ b/net/ipv6/addrconf_core.c
@@ -134,8 +134,39 @@ static int eafnosupport_ipv6_dst_lookup(struct net *net, struct sock *u1,
 	return -EAFNOSUPPORT;
 }
 
+static struct fib6_table *eafnosupport_fib6_get_table(struct net *net, u32 id)
+{
+	return NULL;
+}
+
+static struct fib6_info *
+eafnosupport_fib6_table_lookup(struct net *net, struct fib6_table *table,
+			       int oif, struct flowi6 *fl6, int flags)
+{
+	return NULL;
+}
+
+static struct fib6_info *
+eafnosupport_fib6_lookup(struct net *net, int oif, struct flowi6 *fl6,
+			 int flags)
+{
+	return NULL;
+}
+
+static struct fib6_info *
+eafnosupport_fib6_multipath_select(const struct net *net, struct fib6_info *f6i,
+				   struct flowi6 *fl6, int oif,
+				   const struct sk_buff *skb, int strict)
+{
+	return f6i;
+}
+
 const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) {
-	.ipv6_dst_lookup = eafnosupport_ipv6_dst_lookup,
+	.ipv6_dst_lookup   = eafnosupport_ipv6_dst_lookup,
+	.fib6_get_table    = eafnosupport_fib6_get_table,
+	.fib6_table_lookup = eafnosupport_fib6_table_lookup,
+	.fib6_lookup       = eafnosupport_fib6_lookup,
+	.fib6_multipath_select = eafnosupport_fib6_multipath_select,
 };
 EXPORT_SYMBOL_GPL(ipv6_stub);
 
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 36d622c477b1..c0e8255d50bb 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -887,7 +887,11 @@ static struct pernet_operations inet6_net_ops = {
 static const struct ipv6_stub ipv6_stub_impl = {
 	.ipv6_sock_mc_join = ipv6_sock_mc_join,
 	.ipv6_sock_mc_drop = ipv6_sock_mc_drop,
-	.ipv6_dst_lookup = ip6_dst_lookup,
+	.ipv6_dst_lookup   = ip6_dst_lookup,
+	.fib6_get_table	   = fib6_get_table,
+	.fib6_table_lookup = fib6_table_lookup,
+	.fib6_lookup       = fib6_lookup,
+	.fib6_multipath_select = fib6_multipath_select,
 	.udpv6_encap_enable = udpv6_encap_enable,
 	.ndisc_send_na = ndisc_send_na,
 	.nd_tbl	= &nd_tbl,
-- 
2.11.0

^ permalink raw reply related

* [RFC bpf-next 8/9] bpf: Provide helper to do lookups in kernel FIB table
From: David Ahern @ 2018-04-25 18:34 UTC (permalink / raw)
  To: netdev, borkmann, ast
  Cc: shm, roopa, brouer, toke, john.fastabend, David Ahern
In-Reply-To: <20180425183449.25134-1-dsahern@gmail.com>

Provide a helper for doing a FIB and neighbor lookup in the kernel
tables from an XDP program. The helper provides a fastpath for forwarding
packets. If the packet is a local delivery or for any reason is not a
simple lookup and forward, the packet continues up the stack.

If it is to be forwarded, the forwarding can be done directly if the
neighbor is already known. If the neighbor does not exist, the first
few packets go up the stack for neighbor resolution. Once resolved, the
xdp program provides the fast path.

On successful lookup the nexthop dmac, current device smac and egress
device index are returned.

The API supports IPv4, IPv6 and MPLS protocols, but only IPv4 and IPv6
are implemented in this patch. The API includes layer 4 parameters if
the XDP program chooses to do deep packet inspection to allow compare
against ACLs implemented as FIB rules.

Header rewrite is left to the XDP program.

The lookup takes 2 flags:
- BPF_FIB_LOOKUP_DIRECT to do a lookup that bypasses FIB rules and goes
  straight to the table associated with the device (expert setting for
  those looking to maximize throughput)

- BPF_FIB_LOOKUP_OUTPUT to do a lookup from the egress perspective.
  Default is an ingress lookup.

Initial performance numbers collected by Jesper, forwarded packets/sec:

       Full stack    XDP FIB lookup    XDP Direct lookup
IPv4   1,947,969       7,074,156          7,415,333
IPv6   1,728,000       6,165,504          7,262,720


Signed-off-by: David Ahern <dsahern@gmail.com>
---
 include/uapi/linux/bpf.h |  68 +++++++++++++-
 net/core/filter.c        | 233 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 300 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index e6679393b687..82601c132b9f 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -10,6 +10,8 @@
 
 #include <linux/types.h>
 #include <linux/bpf_common.h>
+#include <linux/if_ether.h>
+#include <linux/in6.h>
 
 /* Extended instruction set based on top of classic BPF */
 
@@ -783,6 +785,17 @@ union bpf_attr {
  *     @size: size of 'struct bpf_xfrm_state'
  *     @flags: room for future extensions
  *     Return: 0 on success or negative error
+ *
+ * int bpf_fib_lookup(ctx, params, plen, flags)
+ *     Do a FIB lookup based on given parameters
+ *     @ctx:     pointer to context of type xdp_md
+ *     @params:  pointer to bpf_fib_lookup
+ *     @plen:    size of params argument
+ *     @flags:   u32 bitmask of BPF_FIB_LOOKUP_* flags
+ *     Return: egress device index if packet is to be forwarded,
+ *             0 for local delivery (anything that needs to be handled
+ *             by the full stack), or negative on error.
+ *             If index is > 0, output data in bpf_fib_lookup is set
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -851,7 +864,9 @@ union bpf_attr {
 	FN(msg_pull_data),		\
 	FN(bind),			\
 	FN(xdp_adjust_tail),		\
-	FN(skb_get_xfrm_state),
+	FN(skb_get_xfrm_state),		\
+	FN(fib_lookup),			\
+
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -1255,4 +1270,55 @@ struct bpf_raw_tracepoint_args {
 	__u64 args[0];
 };
 
+/* DIRECT:  Skip the FIB rules and go to FIB table associated with device
+ * OUTPUT:  Do lookup from egress perspective; default is ingress
+ */
+#define BPF_FIB_LOOKUP_DIRECT  BIT(0)
+#define BPF_FIB_LOOKUP_OUTPUT  BIT(1)
+
+struct bpf_fib_lookup {
+	/* input */
+	__u8	family;   /* network family, AF_INET, AF_INET6, AF_MPLS */
+
+	/* set if lookup is to consider L4 data - e.g., FIB rules */
+	__u8	l4_protocol;
+	__be16	sport;
+	__be16	dport;
+
+	/* total length of packet from network header - used for MTU check */
+	__u16	tot_len;
+	__u32	ifindex;  /* L3 device index for lookup */
+
+	union {
+		/* inputs to lookup */
+		__u8	tos;		/* AF_INET  */
+		__be32	flowlabel;	/* AF_INET6 */
+
+		/* output: metric of fib result */
+		__u32 rt_metric;
+	};
+
+	union {
+		__be32		mpls_in;
+		__be32		ipv4_src;
+		struct in6_addr	ipv6_src;
+	};
+
+	/* input to bpf_fib_lookup, *dst is destination address.
+	 * output: bpf_fib_lookup sets to gateway address
+	 */
+	union {
+		/* return for MPLS lookups */
+		__be32		mpls_out[4];  /* support up to 4 labels */
+		__be32		ipv4_dst;
+		struct in6_addr	ipv6_dst;
+	};
+
+	/* output */
+	__be16	h_vlan_proto;
+	__be16	h_vlan_TCI;
+	__u8	smac[ETH_ALEN];
+	__u8	dmac[ETH_ALEN];
+};
+
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/net/core/filter.c b/net/core/filter.c
index 8e45c6c7ab08..37602b2fb94a 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -59,6 +59,10 @@
 #include <net/tcp.h>
 #include <net/xfrm.h>
 #include <linux/bpf_trace.h>
+#include <linux/inetdevice.h>
+#include <net/ip_fib.h>
+#include <net/flow.h>
+#include <net/arp.h>
 
 /**
  *	sk_filter_trim_cap - run a packet through a socket filter
@@ -3787,6 +3791,231 @@ static const struct bpf_func_proto bpf_skb_get_xfrm_state_proto = {
 };
 #endif
 
+#if IS_ENABLED(CONFIG_INET) || IS_ENABLED(CONFIG_IPV6)
+static int bpf_fib_set_fwd_params(struct bpf_fib_lookup *params,
+				  const struct neighbour *neigh,
+				  const struct net_device *dev)
+{
+	memcpy(params->dmac, neigh->ha, ETH_ALEN);
+	memcpy(params->smac, dev->dev_addr, ETH_ALEN);
+	params->h_vlan_TCI = 0;
+	params->h_vlan_proto = 0;
+
+	return dev->ifindex;
+}
+#endif
+
+#if IS_ENABLED(CONFIG_INET)
+static int bpf_ipv4_fib_lookup(struct xdp_buff *ctx,
+			       struct bpf_fib_lookup *params, u32 flags)
+{
+	struct net *net = dev_net(ctx->rxq->dev);
+	struct in_device *in_dev;
+	struct neighbour *neigh;
+	struct net_device *dev;
+	struct fib_result res;
+	struct fib_nh *nh;
+	struct flowi4 fl4;
+	int err;
+
+	dev = dev_get_by_index_rcu(net, params->ifindex);
+	if (unlikely(!dev))
+		return -ENODEV;
+
+	/* verify forwarding is enabled on this interface */
+	in_dev = __in_dev_get_rcu(dev);
+	if (unlikely(!in_dev || !IN_DEV_FORWARD(in_dev)))
+		return 0;
+
+	if (flags & BPF_FIB_LOOKUP_OUTPUT) {
+		fl4.flowi4_iif = 1;
+		fl4.flowi4_oif = params->ifindex;
+	} else {
+		fl4.flowi4_iif = params->ifindex;
+		fl4.flowi4_oif = 0;
+	}
+	fl4.flowi4_tos = params->tos & IPTOS_RT_MASK;
+	fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
+	fl4.flowi4_flags = 0;
+
+	fl4.flowi4_proto = params->l4_protocol;
+	fl4.daddr = params->ipv4_dst;
+	fl4.saddr = params->ipv4_src;
+	fl4.fl4_sport = params->sport;
+	fl4.fl4_dport = params->dport;
+
+	if (flags & BPF_FIB_LOOKUP_DIRECT) {
+		u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN;
+		struct fib_table *tb;
+
+		tb = fib_get_table(net, tbid);
+		if (unlikely(!tb))
+			return 0;
+
+		err = fib_table_lookup(tb, &fl4, &res, FIB_LOOKUP_NOREF);
+	} else {
+		fl4.flowi4_mark = 0;
+		fl4.flowi4_secid = 0;
+		fl4.flowi4_tun_key.tun_id = 0;
+		fl4.flowi4_uid = sock_net_uid(net, NULL);
+
+		err = fib_lookup(net, &fl4, &res, FIB_LOOKUP_NOREF);
+	}
+
+	if (err || res.type != RTN_UNICAST)
+		return 0;
+
+	if (res.fi->fib_nhs > 1)
+		fib_select_path(net, &res, &fl4, NULL);
+
+	nh = &res.fi->fib_nh[res.nh_sel];
+
+	/* do not handle lwt encaps right now */
+	if (nh->nh_lwtstate)
+		return 0;
+
+	dev = nh->nh_dev;
+	if (unlikely(!dev))
+		return 0;
+
+	if (nh->nh_gw)
+		params->ipv4_dst = nh->nh_gw;
+
+	params->rt_metric = res.fi->fib_priority;
+
+	/* xdp and cls_bpf programs are run in RCU-bh so
+	 * rcu_read_lock_bh is not needed here
+	 */
+	neigh = __ipv4_neigh_lookup_noref(dev, (__force u32)params->ipv4_dst);
+	if (neigh)
+		return bpf_fib_set_fwd_params(params, neigh, dev);
+
+	return 0;
+}
+#endif
+
+#if IS_ENABLED(CONFIG_IPV6)
+static int bpf_ipv6_fib_lookup(struct xdp_buff *ctx,
+			       struct bpf_fib_lookup *params, u32 flags)
+{
+	struct net *net = dev_net(ctx->rxq->dev);
+	struct neighbour *neigh;
+	struct net_device *dev;
+	struct fib6_info *f6i;
+	struct flowi6 fl6;
+	int strict = 0;
+	int oif;
+
+	/* link local addresses are never forwarded */
+	if (rt6_need_strict(&params->ipv6_dst) ||
+	    rt6_need_strict(&params->ipv6_src))
+		return 0;
+
+	dev = dev_get_by_index_rcu(net, params->ifindex);
+	if (unlikely(!dev))
+		return -ENODEV;
+
+	if (flags & BPF_FIB_LOOKUP_OUTPUT) {
+		fl6.flowi6_iif = 1;
+		oif = fl6.flowi6_oif = params->ifindex;
+	} else {
+		oif = fl6.flowi6_iif = params->ifindex;
+		fl6.flowi6_oif = 0;
+		strict = RT6_LOOKUP_F_HAS_SADDR;
+	}
+	fl6.flowlabel = params->flowlabel;
+	fl6.flowi6_scope = 0;
+	fl6.flowi6_flags = 0;
+	fl6.mp_hash = 0;
+
+	fl6.flowi6_proto = params->l4_protocol;
+	fl6.daddr = params->ipv6_dst;
+	fl6.saddr = params->ipv6_src;
+	fl6.fl6_sport = params->sport;
+	fl6.fl6_dport = params->dport;
+
+	if (flags & BPF_FIB_LOOKUP_DIRECT) {
+		u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN;
+		struct fib6_table *tb;
+
+		tb = ipv6_stub->fib6_get_table(net, tbid);
+		if (unlikely(!tb))
+			return 0;
+
+		f6i = ipv6_stub->fib6_table_lookup(net, tb, oif, &fl6, strict);
+	} else {
+		fl6.flowi6_mark = 0;
+		fl6.flowi6_secid = 0;
+		fl6.flowi6_tun_key.tun_id = 0;
+		fl6.flowi6_uid = sock_net_uid(net, NULL);
+
+		f6i = ipv6_stub->fib6_lookup(net, oif, &fl6, strict);
+	}
+
+	if (unlikely(IS_ERR_OR_NULL(f6i) || f6i == net->ipv6.fib6_null_entry))
+		return 0;
+
+	if (unlikely(f6i->fib6_flags & RTF_REJECT ||
+	    f6i->fib6_type != RTN_UNICAST))
+		return 0;
+
+	if (f6i->fib6_nsiblings && fl6.flowi6_oif == 0)
+		f6i = ipv6_stub->fib6_multipath_select(net, f6i, &fl6,
+						       fl6.flowi6_oif, NULL,
+						       strict);
+
+	if (f6i->fib6_nh.nh_lwtstate)
+		return 0;
+
+	if (f6i->fib6_flags & RTF_GATEWAY)
+		params->ipv6_dst = f6i->fib6_nh.nh_gw;
+
+	dev = f6i->fib6_nh.nh_dev;
+	params->rt_metric = f6i->fib6_metric;
+
+	/* xdp and cls_bpf programs are run in RCU-bh so rcu_read_lock_bh is
+	 * not needed here. Can not use __ipv6_neigh_lookup_noref here
+	 * because we need to get nd_tbl via the stub
+	 */
+	neigh = ___neigh_lookup_noref(ipv6_stub->nd_tbl, neigh_key_eq128,
+				      ndisc_hashfn, &params->ipv6_dst, dev);
+	if (neigh)
+		return bpf_fib_set_fwd_params(params, neigh, dev);
+
+	return 0;
+}
+#endif
+
+BPF_CALL_4(bpf_fib_lookup, struct xdp_buff *, ctx,
+	   struct bpf_fib_lookup *, params, int, plen, u32, flags)
+{
+	if (plen < sizeof(*params))
+		return -EINVAL;
+
+	switch (params->family) {
+#if IS_ENABLED(CONFIG_INET)
+	case AF_INET:
+		return bpf_ipv4_fib_lookup(ctx, params, flags);
+#endif
+#if IS_ENABLED(CONFIG_IPV6)
+	case AF_INET6:
+		return bpf_ipv6_fib_lookup(ctx, params, flags);
+#endif
+	}
+	return -ENOTSUPP;
+}
+
+static const struct bpf_func_proto bpf_fib_lookup_proto = {
+	.func		= bpf_fib_lookup,
+	.gpl_only	= true,
+	.pkt_access	= true,
+	.ret_type	= RET_INTEGER,
+	.arg1_type      = ARG_PTR_TO_CTX,
+	.arg2_type      = ARG_PTR_TO_MEM,
+	.arg3_type      = ARG_CONST_SIZE,
+	.arg4_type	= ARG_ANYTHING,
+};
+
 static const struct bpf_func_proto *
 bpf_base_func_proto(enum bpf_func_id func_id)
 {
@@ -3861,6 +4090,8 @@ sk_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_get_socket_cookie_proto;
 	case BPF_FUNC_get_socket_uid:
 		return &bpf_get_socket_uid_proto;
+	case BPF_FUNC_fib_lookup:
+		return &bpf_fib_lookup_proto;
 	default:
 		return bpf_base_func_proto(func_id);
 	}
@@ -3957,6 +4188,8 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_xdp_redirect_map_proto;
 	case BPF_FUNC_xdp_adjust_tail:
 		return &bpf_xdp_adjust_tail_proto;
+	case BPF_FUNC_fib_lookup:
+		return &bpf_fib_lookup_proto;
 	default:
 		return bpf_base_func_proto(func_id);
 	}
-- 
2.11.0

^ permalink raw reply related

* [RFC bpf-next 9/9] samples/bpf: Add examples of ipv4 and ipv6 forwarding in XDP
From: David Ahern @ 2018-04-25 18:34 UTC (permalink / raw)
  To: netdev, borkmann, ast
  Cc: shm, roopa, brouer, toke, john.fastabend, David Ahern
In-Reply-To: <20180425183449.25134-1-dsahern@gmail.com>

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 samples/bpf/Makefile                      |   4 +
 samples/bpf/xdp_fwd_kern.c                | 110 ++++++++++++++++++++++++
 samples/bpf/xdp_fwd_user.c                | 136 ++++++++++++++++++++++++++++++
 tools/testing/selftests/bpf/bpf_helpers.h |   3 +
 4 files changed, 253 insertions(+)
 create mode 100644 samples/bpf/xdp_fwd_kern.c
 create mode 100644 samples/bpf/xdp_fwd_user.c

diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index aa8c392e2e52..c90d2ff74873 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -45,6 +45,7 @@ hostprogs-y += xdp_rxq_info
 hostprogs-y += syscall_tp
 hostprogs-y += cpustat
 hostprogs-y += xdp_adjust_tail
+hostprogs-y += xdp_fwd
 
 # Libbpf dependencies
 LIBBPF := ../../tools/lib/bpf/bpf.o ../../tools/lib/bpf/nlattr.o
@@ -97,6 +98,7 @@ xdp_rxq_info-objs := bpf_load.o $(LIBBPF) xdp_rxq_info_user.o
 syscall_tp-objs := bpf_load.o $(LIBBPF) syscall_tp_user.o
 cpustat-objs := bpf_load.o $(LIBBPF) cpustat_user.o
 xdp_adjust_tail-objs := bpf_load.o $(LIBBPF) xdp_adjust_tail_user.o
+xdp_fwd-objs := bpf_load.o $(LIBBPF) xdp_fwd_user.o
 
 # Tell kbuild to always build the programs
 always := $(hostprogs-y)
@@ -151,6 +153,7 @@ always += xdp2skb_meta_kern.o
 always += syscall_tp_kern.o
 always += cpustat_kern.o
 always += xdp_adjust_tail_kern.o
+always += xdp_fwd_kern.o
 
 HOSTCFLAGS += -I$(objtree)/usr/include
 HOSTCFLAGS += -I$(srctree)/tools/lib/
@@ -197,6 +200,7 @@ HOSTLOADLIBES_xdp_rxq_info += -lelf
 HOSTLOADLIBES_syscall_tp += -lelf
 HOSTLOADLIBES_cpustat += -lelf
 HOSTLOADLIBES_xdp_adjust_tail += -lelf
+HOSTLOADLIBES_xdp_fwd += -lelf
 
 # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
 #  make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
diff --git a/samples/bpf/xdp_fwd_kern.c b/samples/bpf/xdp_fwd_kern.c
new file mode 100644
index 000000000000..4012c073fc45
--- /dev/null
+++ b/samples/bpf/xdp_fwd_kern.c
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2017-18 David Ahern <dsahern@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+#define KBUILD_MODNAME "foo"
+#include <uapi/linux/bpf.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/if_vlan.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+
+#include "bpf_helpers.h"
+
+#define IPV6_FLOWINFO_MASK              cpu_to_be32(0x0FFFFFFF)
+
+struct bpf_map_def SEC("maps") tx_port = {
+	.type = BPF_MAP_TYPE_DEVMAP,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 64,
+};
+
+static __always_inline int xdp_fwd_flags(struct xdp_md *ctx, u32 flags)
+{
+	void *data_end = (void *)(long)ctx->data_end;
+	void *data = (void *)(long)ctx->data;
+	struct bpf_fib_lookup fib_params;
+	struct ethhdr *eth = data;
+	int out_index;
+	u16 h_proto;
+	u64 nh_off;
+
+	nh_off = sizeof(*eth);
+	if (data + nh_off > data_end)
+		return XDP_DROP;
+
+	__builtin_memset(&fib_params, 0, sizeof(fib_params));
+
+	h_proto = eth->h_proto;
+	if (h_proto == htons(ETH_P_IP)) {
+		struct iphdr *iph = data + nh_off;
+
+		if (iph + 1 > data_end)
+			return XDP_DROP;
+
+		fib_params.family	= AF_INET;
+		fib_params.tos		= iph->tos;
+		fib_params.l4_protocol	= iph->protocol;
+		fib_params.sport	= 0;
+		fib_params.dport	= 0;
+		fib_params.tot_len	= ntohs(iph->tot_len);
+		fib_params.ipv4_src	= iph->saddr;
+		fib_params.ipv4_dst	= iph->daddr;
+	} else if (h_proto == htons(ETH_P_IPV6)) {
+		struct ipv6hdr *iph = data + nh_off;
+
+		if (iph + 1 > data_end)
+			return XDP_DROP;
+
+		fib_params.family	= AF_INET6;
+		fib_params.flowlabel	= *(__be32 *)iph & IPV6_FLOWINFO_MASK;
+		fib_params.l4_protocol	= iph->nexthdr;
+		fib_params.sport	= 0;
+		fib_params.dport	= 0;
+		fib_params.tot_len	= ntohs(iph->payload_len);
+		fib_params.ipv6_src	= iph->saddr;
+		fib_params.ipv6_dst	= iph->daddr;
+	} else {
+		return XDP_PASS;
+	}
+
+	fib_params.ifindex = ctx->ingress_ifindex;
+
+	out_index = bpf_fib_lookup(ctx, &fib_params, sizeof(fib_params), flags);
+
+	/* verify egress index has xdp support */
+	// TO-DO bpf_map_lookup_elem(&tx_port, &key) fails with
+	//       cannot pass map_type 14 into func bpf_map_lookup_elem#1:
+	if (out_index > 0) {
+		memcpy(eth->h_dest, fib_params.dmac, ETH_ALEN);
+		memcpy(eth->h_source, fib_params.smac, ETH_ALEN);
+		return bpf_redirect_map(&tx_port, out_index, 0);
+	}
+
+	return XDP_PASS;
+}
+
+SEC("xdp_fwd")
+int xdp_fwd_prog(struct xdp_md *ctx)
+{
+	return xdp_fwd_flags(ctx, 0);
+}
+
+SEC("xdp_fwd_direct")
+int xdp_fwd_direct_prog(struct xdp_md *ctx)
+{
+	return xdp_fwd_flags(ctx, BPF_FIB_LOOKUP_DIRECT);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp_fwd_user.c b/samples/bpf/xdp_fwd_user.c
new file mode 100644
index 000000000000..9c6606f57126
--- /dev/null
+++ b/samples/bpf/xdp_fwd_user.c
@@ -0,0 +1,136 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2017-18 David Ahern <dsahern@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#include <linux/bpf.h>
+#include <linux/if_link.h>
+#include <linux/limits.h>
+#include <net/if.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <libgen.h>
+
+#include "bpf_load.h"
+#include "bpf_util.h"
+#include "libbpf.h"
+
+
+static int do_attach(int idx, int fd, const char *name)
+{
+	int err;
+
+	err = bpf_set_link_xdp_fd(idx, fd, 0);
+	if (err < 0)
+		printf("ERROR: failed to attach program to %s\n", name);
+
+	return err;
+}
+
+static int do_detach(int idx, const char *name)
+{
+	int err;
+
+	err = bpf_set_link_xdp_fd(idx, -1, 0);
+	if (err < 0)
+		printf("ERROR: failed to detach program from %s\n", name);
+
+	return err;
+}
+
+static void usage(const char *prog)
+{
+	fprintf(stderr,
+		"usage: %s [OPTS] interface-list\n"
+		"\nOPTS:\n"
+		"    -d    detach program\n"
+		"    -D    direct table lookups (skip fib rules)\n",
+		prog);
+}
+
+int main(int argc, char **argv)
+{
+	char filename[PATH_MAX];
+	int opt, i, idx, err;
+	int prog_id = 0;
+	int attach = 1;
+	int ret = 0;
+
+	while ((opt = getopt(argc, argv, ":dD")) != -1) {
+		switch (opt) {
+		case 'd':
+			attach = 0;
+			break;
+		case 'D':
+			prog_id = 1;
+			break;
+		default:
+			usage(basename(argv[0]));
+			return 1;
+		}
+	}
+
+	if (optind == argc) {
+		usage(basename(argv[0]));
+		return 1;
+	}
+
+	if (attach) {
+		snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+		if (access(filename, O_RDONLY) < 0) {
+			printf("error accessing file %s: %s\n",
+				filename, strerror(errno));
+			return 1;
+		}
+
+		if (load_bpf_file(filename)) {
+			printf("%s", bpf_log_buf);
+			return 1;
+		}
+
+		if (!prog_fd[prog_id]) {
+			printf("load_bpf_file: %s\n", strerror(errno));
+			return 1;
+		}
+	}
+	if (attach) {
+		for (i = 1; i < 64; ++i)
+			bpf_map_update_elem(map_fd[0], &i, &i, 0);
+	}
+
+	for (i = optind; i < argc; ++i) {
+		idx = if_nametoindex(argv[i]);
+		if (!idx)
+			idx = strtoul(argv[i], NULL, 0);
+
+		if (!idx) {
+			fprintf(stderr, "Invalid arg\n");
+			return 1;
+		}
+		if (!attach) {
+			err = do_detach(idx, argv[i]);
+			if (err)
+				ret = err;
+		} else {
+			err = do_attach(idx, prog_fd[prog_id], argv[i]);
+			if (err)
+				ret = err;
+		}
+	}
+
+	return ret;
+}
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index 69d7b918e66a..4407766a5950 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -101,6 +101,9 @@ static int (*bpf_xdp_adjust_tail)(void *ctx, int offset) =
 static int (*bpf_skb_get_xfrm_state)(void *ctx, int index, void *state,
 				     int size, int flags) =
 	(void *) BPF_FUNC_skb_get_xfrm_state;
+static int (*bpf_fib_lookup)(void *ctx, struct bpf_fib_lookup *params,
+			     int plen, __u32 flags) =
+	(void *) BPF_FUNC_fib_lookup;
 
 /* llvm builtin functions that eBPF C program may use to
  * emit BPF_LD_ABS and BPF_LD_IND instructions
-- 
2.11.0

^ permalink raw reply related

* Re: [PATCH net-next v3] Add Common Applications Kept Enhanced (cake) qdisc
From: Toke Høiland-Jørgensen @ 2018-04-25 18:35 UTC (permalink / raw)
  To: Eric Dumazet, netdev; +Cc: cake, Dave Taht
In-Reply-To: <c3412b75-156d-3e2c-0f3f-781f2f8bb042@gmail.com>

Eric Dumazet <eric.dumazet@gmail.com> writes:

> On 04/25/2018 09:17 AM, Toke Høiland-Jørgensen wrote:
>
>> Or am I to interpret that as a hard NAK on having this feature in CAKE
>> (even if we fix the issues you pointed out)?
>
> No strong NACK, as long as the current code is fixed.
>
> Right now, a malicious packet will kill the box or something bad like
> that.

Yeah, that's not good, obviously. I may just pull it out for now and add
it back in a separate patch once we've had time to fix it :)

-Toke

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox