Netdev List

Netdev List
 help / color / mirror / Atom feed

* [patch net-next 0/3] mlxsw: Introduce support for "pass" gact action offloading
From: Jiri Pirko @ 2017-09-25  8:58 UTC (permalink / raw)
  To: netdev; +Cc: davem, idosch, mlxsw, jhs, xiyou.wangcong

From: Jiri Pirko <jiri@mellanox.com>

Very simple patchset adds ability for user to insert filters with "pass"
gact action and offload it. That allows scenarios like this:

$ tc filter add dev enp3s0np19 ingress protocol ip pref 10 flower skip_sw dst_ip 192.168.101.0/24 action drop
$ tc filter add dev enp3s0np19 ingress protocol ip pref 9 flower skip_sw dst_ip 192.168.101.1 action pass

Jiri Pirko (3):
  mlxsw: spectrum_acl: Propagate errors from
    mlxsw_afa_block_jump/continue
  net: sched: introduce helper to identify gact pass action
  mlxsw: spectrum_flower: Offload "ok" termination action

 .../net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c    | 14 ++++++++------
 .../net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h    |  4 ++--
 drivers/net/ethernet/mellanox/mlxsw/spectrum.h             |  6 +++---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c         | 10 +++++-----
 drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c    |  6 +++++-
 drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c      | 10 ++++++++--
 include/net/tc_act/tc_gact.h                               |  5 +++++
 7 files changed, 36 insertions(+), 19 deletions(-)

-- 
2.9.5

^ permalink raw reply

* [patch net-next 1/3] mlxsw: spectrum_acl: Propagate errors from mlxsw_afa_block_jump/continue
From: Jiri Pirko @ 2017-09-25  8:58 UTC (permalink / raw)
  To: netdev; +Cc: davem, idosch, mlxsw, jhs, xiyou.wangcong
In-Reply-To: <20170925085822.1358-1-jiri@resnulli.us>

From: Jiri Pirko <jiri@mellanox.com>

Propagate error instead of doing WARN_ON right away.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
---
 .../net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c    | 14 ++++++++------
 .../net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h    |  4 ++--
 drivers/net/ethernet/mellanox/mlxsw/spectrum.h             |  6 +++---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c         | 10 +++++-----
 drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c    |  6 +++++-
 drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c      |  4 +++-
 6 files changed, 26 insertions(+), 18 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
index ab3ffe7a..bc55d0e 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
@@ -399,23 +399,25 @@ u32 mlxsw_afa_block_first_set_kvdl_index(struct mlxsw_afa_block *block)
 }
 EXPORT_SYMBOL(mlxsw_afa_block_first_set_kvdl_index);
 
-void mlxsw_afa_block_continue(struct mlxsw_afa_block *block)
+int mlxsw_afa_block_continue(struct mlxsw_afa_block *block)
 {
-	if (WARN_ON(block->finished))
-		return;
+	if (block->finished)
+		return -EINVAL;
 	mlxsw_afa_set_goto_set(block->cur_set,
 			       MLXSW_AFA_SET_GOTO_BINDING_CMD_NONE, 0);
 	block->finished = true;
+	return 0;
 }
 EXPORT_SYMBOL(mlxsw_afa_block_continue);
 
-void mlxsw_afa_block_jump(struct mlxsw_afa_block *block, u16 group_id)
+int mlxsw_afa_block_jump(struct mlxsw_afa_block *block, u16 group_id)
 {
-	if (WARN_ON(block->finished))
-		return;
+	if (block->finished)
+		return -EINVAL;
 	mlxsw_afa_set_goto_set(block->cur_set,
 			       MLXSW_AFA_SET_GOTO_BINDING_CMD_JUMP, group_id);
 	block->finished = true;
+	return 0;
 }
 EXPORT_SYMBOL(mlxsw_afa_block_jump);
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h
index 501819c..06b0be4 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h
@@ -57,8 +57,8 @@ void mlxsw_afa_block_destroy(struct mlxsw_afa_block *block);
 int mlxsw_afa_block_commit(struct mlxsw_afa_block *block);
 char *mlxsw_afa_block_first_set(struct mlxsw_afa_block *block);
 u32 mlxsw_afa_block_first_set_kvdl_index(struct mlxsw_afa_block *block);
-void mlxsw_afa_block_continue(struct mlxsw_afa_block *block);
-void mlxsw_afa_block_jump(struct mlxsw_afa_block *block, u16 group_id);
+int mlxsw_afa_block_continue(struct mlxsw_afa_block *block);
+int mlxsw_afa_block_jump(struct mlxsw_afa_block *block, u16 group_id);
 int mlxsw_afa_block_append_drop(struct mlxsw_afa_block *block);
 int mlxsw_afa_block_append_trap(struct mlxsw_afa_block *block, u16 trap_id);
 int mlxsw_afa_block_append_fwd(struct mlxsw_afa_block *block,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index d06f7fe..ae67e60 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -470,9 +470,9 @@ void mlxsw_sp_acl_rulei_keymask_buf(struct mlxsw_sp_acl_rule_info *rulei,
 				    enum mlxsw_afk_element element,
 				    const char *key_value,
 				    const char *mask_value, unsigned int len);
-void mlxsw_sp_acl_rulei_act_continue(struct mlxsw_sp_acl_rule_info *rulei);
-void mlxsw_sp_acl_rulei_act_jump(struct mlxsw_sp_acl_rule_info *rulei,
-				 u16 group_id);
+int mlxsw_sp_acl_rulei_act_continue(struct mlxsw_sp_acl_rule_info *rulei);
+int mlxsw_sp_acl_rulei_act_jump(struct mlxsw_sp_acl_rule_info *rulei,
+				u16 group_id);
 int mlxsw_sp_acl_rulei_act_drop(struct mlxsw_sp_acl_rule_info *rulei);
 int mlxsw_sp_acl_rulei_act_trap(struct mlxsw_sp_acl_rule_info *rulei);
 int mlxsw_sp_acl_rulei_act_fwd(struct mlxsw_sp *mlxsw_sp,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
index eede75f..93dcd31 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
@@ -378,15 +378,15 @@ void mlxsw_sp_acl_rulei_keymask_buf(struct mlxsw_sp_acl_rule_info *rulei,
 				 key_value, mask_value, len);
 }
 
-void mlxsw_sp_acl_rulei_act_continue(struct mlxsw_sp_acl_rule_info *rulei)
+int mlxsw_sp_acl_rulei_act_continue(struct mlxsw_sp_acl_rule_info *rulei)
 {
-	mlxsw_afa_block_continue(rulei->act_block);
+	return mlxsw_afa_block_continue(rulei->act_block);
 }
 
-void mlxsw_sp_acl_rulei_act_jump(struct mlxsw_sp_acl_rule_info *rulei,
-				 u16 group_id)
+int mlxsw_sp_acl_rulei_act_jump(struct mlxsw_sp_acl_rule_info *rulei,
+				u16 group_id)
 {
-	mlxsw_afa_block_jump(rulei->act_block, group_id);
+	return mlxsw_afa_block_jump(rulei->act_block, group_id);
 }
 
 int mlxsw_sp_acl_rulei_act_drop(struct mlxsw_sp_acl_rule_info *rulei)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
index 50b40de..7e8284b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
@@ -608,7 +608,10 @@ mlxsw_sp_acl_tcam_region_catchall_add(struct mlxsw_sp *mlxsw_sp,
 		goto err_rulei_create;
 	}
 
-	mlxsw_sp_acl_rulei_act_continue(rulei);
+	err = mlxsw_sp_acl_rulei_act_continue(rulei);
+	if (WARN_ON(err))
+		goto err_rulei_act_continue;
+
 	err = mlxsw_sp_acl_rulei_commit(rulei);
 	if (err)
 		goto err_rulei_commit;
@@ -623,6 +626,7 @@ mlxsw_sp_acl_tcam_region_catchall_add(struct mlxsw_sp *mlxsw_sp,
 
 err_rule_insert:
 err_rulei_commit:
+err_rulei_act_continue:
 	mlxsw_sp_acl_rulei_destroy(rulei);
 err_rulei_create:
 	parman_item_remove(region->parman, parman_prio, parman_item);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
index 8aace9a..f1cedcc 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
@@ -84,7 +84,9 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp,
 				return PTR_ERR(ruleset);
 
 			group_id = mlxsw_sp_acl_ruleset_group_id(ruleset);
-			mlxsw_sp_acl_rulei_act_jump(rulei, group_id);
+			err = mlxsw_sp_acl_rulei_act_jump(rulei, group_id);
+			if (err)
+				return err;
 		} else if (is_tcf_mirred_egress_redirect(a)) {
 			int ifindex = tcf_mirred_ifindex(a);
 			struct net_device *out_dev;
-- 
2.9.5

^ permalink raw reply related

* [patch net-next 2/3] net: sched: introduce helper to identify gact pass action
From: Jiri Pirko @ 2017-09-25  8:58 UTC (permalink / raw)
  To: netdev; +Cc: davem, idosch, mlxsw, jhs, xiyou.wangcong
In-Reply-To: <20170925085822.1358-1-jiri@resnulli.us>

From: Jiri Pirko <jiri@mellanox.com>

Introduce a helper called is_tcf_gact_pass which could be used to
tell if the action is gact pass or not.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
---
 include/net/tc_act/tc_gact.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/include/net/tc_act/tc_gact.h b/include/net/tc_act/tc_gact.h
index 41afe1c..d979a0d 100644
--- a/include/net/tc_act/tc_gact.h
+++ b/include/net/tc_act/tc_gact.h
@@ -33,6 +33,11 @@ static inline bool __is_tcf_gact_act(const struct tc_action *a, int act,
 	return false;
 }
 
+static inline bool is_tcf_gact_ok(const struct tc_action *a)
+{
+	return __is_tcf_gact_act(a, TC_ACT_OK, false);
+}
+
 static inline bool is_tcf_gact_shot(const struct tc_action *a)
 {
 	return __is_tcf_gact_act(a, TC_ACT_SHOT, false);
-- 
2.9.5

^ permalink raw reply related

* [patch net-next 3/3] mlxsw: spectrum_flower: Offload "ok" termination action
From: Jiri Pirko @ 2017-09-25  8:58 UTC (permalink / raw)
  To: netdev; +Cc: davem, idosch, mlxsw, jhs, xiyou.wangcong
In-Reply-To: <20170925085822.1358-1-jiri@resnulli.us>

From: Jiri Pirko <jiri@mellanox.com>

If action is "gact_ok", offload it to HW.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
index f1cedcc..2f0e578 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
@@ -63,7 +63,11 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp,
 
 	tcf_exts_to_list(exts, &actions);
 	list_for_each_entry(a, &actions, list) {
-		if (is_tcf_gact_shot(a)) {
+		if (is_tcf_gact_ok(a)) {
+			err = mlxsw_sp_acl_rulei_act_continue(rulei);
+			if (err)
+				return err;
+		} else if (is_tcf_gact_shot(a)) {
 			err = mlxsw_sp_acl_rulei_act_drop(rulei);
 			if (err)
 				return err;
-- 
2.9.5

^ permalink raw reply related

* Re: [patch net-next v2 01/12] fib: notifier: Add VIF add and delete event types
From: Nikolay Aleksandrov @ 2017-09-25  9:20 UTC (permalink / raw)
  To: Jiri Pirko, netdev; +Cc: davem, yotamg, idosch, mlxsw, andrew
In-Reply-To: <20170924172212.10096-2-jiri@resnulli.us>

On 24/09/17 20:22, Jiri Pirko wrote:
> From: Yotam Gigi <yotamg@mellanox.com>
> 
> In order for an interface to forward packets according to the kernel
> multicast routing table, it must be configured with a VIF index according
> to the mroute user API. The VIF index is then used to refer to that
> interface in the mroute user API, for example, to set the iif and oifs of
> an MFC entry.
> 
> In order to allow drivers to be aware and offload multicast routes, they
> have to be aware of the VIF add and delete notifications.
> 
> Due to the fact that a specific VIF can be deleted and re-added pointing to
> another netdevice, and the MFC routes that point to it will forward the
> matching packets to the new netdevice, a driver willing to offload MFC
> cache entries must be aware of the VIF add and delete events in addition to
> MFC routes notifications.
> 
> Signed-off-by: Yotam Gigi <yotamg@mellanox.com>
> Reviewed-by: Ido Schimmel <idosch@mellanox.com>
> Signed-off-by: Jiri Pirko <jiri@mellanox.com>
> ---
>  include/net/fib_notifier.h | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/include/net/fib_notifier.h b/include/net/fib_notifier.h
> index 669b971..54cd6b8 100644
> --- a/include/net/fib_notifier.h
> +++ b/include/net/fib_notifier.h
> @@ -20,6 +20,8 @@ enum fib_event_type {
>  	FIB_EVENT_RULE_DEL,
>  	FIB_EVENT_NH_ADD,
>  	FIB_EVENT_NH_DEL,
> +	FIB_EVENT_VIF_ADD,
> +	FIB_EVENT_VIF_DEL,
>  };
>  
>  struct fib_notifier_ops {
> 

Reviewed-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>

^ permalink raw reply

* Re: [patch net-next v2 02/12] ipmr: Add reference count to MFC entries
From: Nikolay Aleksandrov @ 2017-09-25  9:27 UTC (permalink / raw)
  To: Jiri Pirko, netdev; +Cc: davem, yotamg, idosch, mlxsw, andrew
In-Reply-To: <20170924172212.10096-3-jiri@resnulli.us>

On 24/09/17 20:22, Jiri Pirko wrote:
> From: Yotam Gigi <yotamg@mellanox.com>
> 
> Next commits will introduce MFC notifications through the atomic
> fib_notification chain, thus allowing modules to be aware of MFC entries.
> 
> Due to the fact that modules may need to hold a reference to an MFC entry,
> add reference count to MFC entries to prevent them from being freed while
> these modules use them.
> 
> The reference counting is done only on resolved MFC entries currently.
> 
> Signed-off-by: Yotam Gigi <yotamg@mellanox.com>
> Reviewed-by: Ido Schimmel <idosch@mellanox.com>
> Signed-off-by: Jiri Pirko <jiri@mellanox.com>
> ---
> v1->v2:
>  - Add comment for the mfc_cache.mfc_un.res.refcount field, similarly to
>    all other fields in the struct
> ---
>  include/linux/mroute.h | 21 +++++++++++++++++++++
>  net/ipv4/ipmr.c        |  8 +++++---
>  2 files changed, 26 insertions(+), 3 deletions(-)
> 

Reviewed-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>

^ permalink raw reply

* Re: [patch net-next v2 03/12] ipmr: Add FIB notification access functions
From: Nikolay Aleksandrov @ 2017-09-25  9:35 UTC (permalink / raw)
  To: Jiri Pirko, netdev; +Cc: davem, yotamg, idosch, mlxsw, andrew
In-Reply-To: <20170924172212.10096-4-jiri@resnulli.us>

On 24/09/17 20:22, Jiri Pirko wrote:
> From: Yotam Gigi <yotamg@mellanox.com>
> 
> Make the ipmr module register as a FIB notifier. To do that, implement both
> the ipmr_seq_read and ipmr_dump ops.
> 
> The ipmr_seq_read op returns a sequence counter that is incremented on
> every notification related operation done by the ipmr. To implement that,
> add a sequence counter in the netns_ipv4 struct and increment it whenever a
> new MFC route or VIF are added or deleted. The sequence operations are
> protected by the RTNL lock.
> 
> The ipmr_dump iterates the list of MFC routes and the list of VIF entries
> and sends notifications about them. The entries dump is done under RCU
> where the VIF dump uses the mrt_lock too, as the vif->dev field can change
> under RCU.
> 
> Signed-off-by: Yotam Gigi <yotamg@mellanox.com>
> Reviewed-by: Ido Schimmel <idosch@mellanox.com>
> Signed-off-by: Jiri Pirko <jiri@mellanox.com>
> ---
> v1->v2:
>  - Take the mrt_lock when dumping VIF entries.
> ---
>  include/linux/mroute.h   |  15 ++++++
>  include/net/netns/ipv4.h |   3 ++
>  net/ipv4/ipmr.c          | 137 ++++++++++++++++++++++++++++++++++++++++++++++-
>  3 files changed, 153 insertions(+), 2 deletions(-)
> 

LGTM,

Reviewed-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>

^ permalink raw reply

* Re: [patch net-next v2 05/12] net: ipmr: Add MFC offload indication
From: Nikolay Aleksandrov @ 2017-09-25  9:36 UTC (permalink / raw)
  To: Jiri Pirko, netdev; +Cc: davem, yotamg, idosch, mlxsw, andrew
In-Reply-To: <20170924172212.10096-6-jiri@resnulli.us>

On 24/09/17 20:22, Jiri Pirko wrote:
> From: Yotam Gigi <yotamg@mellanox.com>
> 
> Allow drivers, registered to the fib notification chain indicate whether a
> multicast MFC route is offloaded or not, similarly to unicast routes. The
> indication of whether a route is offloaded is done using the mfc_flags
> field on an mfc_cache struct, and the information is sent to the userspace
> via the RTNetlink interface only.
> 
> Currently, MFC routes are either offloaded or not, thus there is no need to
> add per-VIF offload indication.
> 
> Signed-off-by: Yotam Gigi <yotamg@mellanox.com>
> Reviewed-by: Ido Schimmel <idosch@mellanox.com>
> Signed-off-by: Jiri Pirko <jiri@mellanox.com>
> ---
> v1->v2:
>  - Add comment for the MFC_OFFLOAD flag
> ---
>  include/linux/mroute.h | 2 ++
>  net/ipv4/ipmr.c        | 3 +++
>  2 files changed, 5 insertions(+)
> 
> diff --git a/include/linux/mroute.h b/include/linux/mroute.h
> index 54c5cb8..5566580 100644
> --- a/include/linux/mroute.h
> +++ b/include/linux/mroute.h
> @@ -90,9 +90,11 @@ struct mr_table {
>  
>  /* mfc_flags:
>   * MFC_STATIC - the entry was added statically (not by a routing daemon)
> + * MFC_OFFLOAD - the entry was offloaded to the hardware
>   */
>  enum {
>  	MFC_STATIC = BIT(0),
> +	MFC_OFFLOAD = BIT(1),
>  };
>  
>  struct mfc_cache_cmp_arg {
> diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
> index ba71bc4..2a795d2 100644
> --- a/net/ipv4/ipmr.c
> +++ b/net/ipv4/ipmr.c
> @@ -2268,6 +2268,9 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
>  	    nla_put_u32(skb, RTA_IIF, mrt->vif_table[c->mfc_parent].dev->ifindex) < 0)
>  		return -EMSGSIZE;
>  
> +	if (c->mfc_flags & MFC_OFFLOAD)
> +		rtm->rtm_flags |= RTNH_F_OFFLOAD;
> +
>  	if (!(mp_attr = nla_nest_start(skb, RTA_MULTIPATH)))
>  		return -EMSGSIZE;
>  
> 

Thanks!

Reviewed-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>

^ permalink raw reply

* Re: [patch net-next v2 06/12] net: mroute: Check if rule is a default rule
From: Nikolay Aleksandrov @ 2017-09-25  9:38 UTC (permalink / raw)
  To: Jiri Pirko, netdev; +Cc: davem, yotamg, idosch, mlxsw, andrew
In-Reply-To: <20170924172212.10096-7-jiri@resnulli.us>

On 24/09/17 20:22, Jiri Pirko wrote:
> From: Yotam Gigi <yotamg@mellanox.com>
> 
> When the ipmr starts, it adds one default FIB rule that matches all packets
> and sends them to the DEFAULT (multicast) FIB table. A more complex rule
> can be added by user to specify that for a specific interface, a packet
> should be look up at either an arbitrary table or according to the l3mdev
> of the interface.
> 
> For drivers willing to offload the ipmr logic into a hardware but don't
> want to offload all the FIB rules functionality, provide a function that
> can indicate whether the FIB rule is the default multicast rule, thus only
> one routing table is needed.
> 
> This way, a driver can register to the FIB notification chain, get
> notifications about FIB rules added and trigger some kind of an internal
> abort mechanism when a non default rule is added by the user.
> 
> Signed-off-by: Yotam Gigi <yotamg@mellanox.com>
> Reviewed-by: Ido Schimmel <idosch@mellanox.com>
> Signed-off-by: Jiri Pirko <jiri@mellanox.com>
> ---
>  include/linux/mroute.h |  7 +++++++
>  net/ipv4/ipmr.c        | 10 ++++++++++
>  2 files changed, 17 insertions(+)
> 

I saw the comment and am fine with the patch either way, so you can
add my:

Reviewed-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>

Thanks

^ permalink raw reply

* Re: [patch net-next v2 03/12] ipmr: Add FIB notification access functions
From: Nikolay Aleksandrov @ 2017-09-25  9:40 UTC (permalink / raw)
  To: Jiri Pirko, netdev; +Cc: davem, yotamg, idosch, mlxsw, andrew
In-Reply-To: <4c09e09c-d184-0d69-110d-a3d5ba721564@cumulusnetworks.com>

On 25/09/17 12:35, Nikolay Aleksandrov wrote:
> On 24/09/17 20:22, Jiri Pirko wrote:
>> From: Yotam Gigi <yotamg@mellanox.com>
>>
>> Make the ipmr module register as a FIB notifier. To do that, implement both
>> the ipmr_seq_read and ipmr_dump ops.
>>
>> The ipmr_seq_read op returns a sequence counter that is incremented on
>> every notification related operation done by the ipmr. To implement that,
>> add a sequence counter in the netns_ipv4 struct and increment it whenever a
>> new MFC route or VIF are added or deleted. The sequence operations are
>> protected by the RTNL lock.
>>
>> The ipmr_dump iterates the list of MFC routes and the list of VIF entries
>> and sends notifications about them. The entries dump is done under RCU
>> where the VIF dump uses the mrt_lock too, as the vif->dev field can change
>> under RCU.
>>
>> Signed-off-by: Yotam Gigi <yotamg@mellanox.com>
>> Reviewed-by: Ido Schimmel <idosch@mellanox.com>
>> Signed-off-by: Jiri Pirko <jiri@mellanox.com>
>> ---
>> v1->v2:
>>  - Take the mrt_lock when dumping VIF entries.
>> ---
>>  include/linux/mroute.h   |  15 ++++++
>>  include/net/netns/ipv4.h |   3 ++
>>  net/ipv4/ipmr.c          | 137 ++++++++++++++++++++++++++++++++++++++++++++++-
>>  3 files changed, 153 insertions(+), 2 deletions(-)
>>
> 
> LGTM,
> 
> Reviewed-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
> 
> 

One note here if you're going to spin another version of the set, you can
consider renaming the call_* functions to either mroute_* or ipmr_* (e.g.
ipmr_call_...). I personally prefer the ipmr prefix.

^ permalink raw reply

* Re: tg3 pxe weirdness
From: Siva Reddy Kallam @ 2017-09-25  9:41 UTC (permalink / raw)
  To: Berend De Schouwer; +Cc: Linux Netdev List

[-- Attachment #1: Type: text/plain, Size: 995 bytes --]

On Fri, Sep 22, 2017 at 9:04 PM, Berend De Schouwer
<berend.de.schouwer@gmail.com> wrote:
> On Fri, 2017-09-22 at 11:51 +0530, Siva Reddy Kallam wrote:
>>
>>
>> Can you please share below details?
>> 1) Model and Manufacturer of the system
>> 2) Linux distro/kernel used?
>
> 4.13.3 gets a little further, but after some more data is transferred
> the tg3 driver still crashes.  This is unfortunately before I've got a
> writeable filesystem.
>
> The last line is:
> tg3 0000:01:00.0: tg3_stop_block timed out, ofs=4c00 enable_bit=2
>
> I've got some ideas to get the full dmesg.
>
> As with the other kernels it works OK on 1Gbps, but not slower
> switches.
I am suspecting with link aware mode, the clock speed could be slow
and boot code does not
complete within the expected time with lower link speeds. So,
Providing a patch to override clock.
Can you please try with attached debug patch and provide us the
feedback with 100M link?
If it solves this issue, we will work on proper changes.

[-- Attachment #2: 0001-tg3-Add-clock-override-support-for-5762.patch --]
[-- Type: application/octet-stream, Size: 2064 bytes --]

From 770bc346fd5d4380c3572b8e313c3d2714227069 Mon Sep 17 00:00:00 2001
From: Siva Reddy Kallam <siva.kallam@broadcom.com>
Date: Mon, 25 Sep 2017 12:37:05 +0530
Subject: [PATCH] tg3: Add clock override support for 5762

This patch add the override support for 5762

Signed-off-by: Siva Reddy Kallam <siva.kallam@broadcom.com>
---
 drivers/net/ethernet/broadcom/tg3.c | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index d8d5f20..cde6244 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -9009,6 +9009,11 @@ static void tg3_override_clk(struct tg3 *tp)
 	case ASIC_REV_5720:
 		tw32(TG3_CPMU_CLCK_ORIDE, CPMU_CLCK_ORIDE_MAC_ORIDE_EN);
 		break;
+	case ASIC_REV_5762:
+		val = tr32(TG3_CPMU_CLCK_ORIDE_ENABLE);
+		tw32(TG3_CPMU_CLCK_ORIDE_ENABLE, val |
+		     TG3_CPMU_MAC_ORIDE_ENABLE);
+		break;
 
 	default:
 		return;
@@ -9131,13 +9136,6 @@ static int tg3_chip_reset(struct tg3 *tp)
 		     tr32(GRC_VCPU_EXT_CTRL) & ~GRC_VCPU_EXT_CTRL_HALT_CPU);
 	}
 
-	/* Set the clock to the highest frequency to avoid timeouts. With link
-	 * aware mode, the clock speed could be slow and bootcode does not
-	 * complete within the expected time. Override the clock to allow the
-	 * bootcode to finish sooner and then restore it.
-	 */
-	tg3_override_clk(tp);
-
 	/* Manage gphy power for all CPMU absent PCIe devices. */
 	if (tg3_flag(tp, 5705_PLUS) && !tg3_flag(tp, CPMU_PRESENT))
 		val |= GRC_MISC_CFG_KEEP_GPHY_POWER;
@@ -9235,6 +9233,13 @@ static int tg3_chip_reset(struct tg3 *tp)
 	if (err)
 		return err;
 
+	/* Set the clock to the highest frequency to avoid timeouts. With link
+	 * aware mode, the clock speed could be slow and bootcode does not
+	 * complete within the expected time. Override the clock to allow the
+	 * bootcode to finish sooner and then restore it.
+	 */
+	tg3_override_clk(tp);
+
 	tw32(GRC_MODE, tp->grc_mode);
 
 	if (tg3_chip_rev_id(tp) == CHIPREV_ID_5705_A0) {
-- 
1.8.3.1


^ permalink raw reply related

* Re: [patch net-next v2 06/12] net: mroute: Check if rule is a default rule
From: Jiri Pirko @ 2017-09-25  9:45 UTC (permalink / raw)
  To: Yunsheng Lin; +Cc: netdev, davem, yotamg, idosch, mlxsw, nikolay, andrew
In-Reply-To: <93dcea9d-94eb-39cf-4102-e5c59fb2dfa5@huawei.com>

Mon, Sep 25, 2017 at 03:28:21AM CEST, linyunsheng@huawei.com wrote:
>Hi, Jiri
>
>On 2017/9/25 1:22, Jiri Pirko wrote:
>> From: Yotam Gigi <yotamg@mellanox.com>
>> 
>> When the ipmr starts, it adds one default FIB rule that matches all packets
>> and sends them to the DEFAULT (multicast) FIB table. A more complex rule
>> can be added by user to specify that for a specific interface, a packet
>> should be look up at either an arbitrary table or according to the l3mdev
>> of the interface.
>> 
>> For drivers willing to offload the ipmr logic into a hardware but don't
>> want to offload all the FIB rules functionality, provide a function that
>> can indicate whether the FIB rule is the default multicast rule, thus only
>> one routing table is needed.
>> 
>> This way, a driver can register to the FIB notification chain, get
>> notifications about FIB rules added and trigger some kind of an internal
>> abort mechanism when a non default rule is added by the user.
>> 
>> Signed-off-by: Yotam Gigi <yotamg@mellanox.com>
>> Reviewed-by: Ido Schimmel <idosch@mellanox.com>
>> Signed-off-by: Jiri Pirko <jiri@mellanox.com>
>> ---
>>  include/linux/mroute.h |  7 +++++++
>>  net/ipv4/ipmr.c        | 10 ++++++++++
>>  2 files changed, 17 insertions(+)
>> 
>> diff --git a/include/linux/mroute.h b/include/linux/mroute.h
>> index 5566580..b072a84 100644
>> --- a/include/linux/mroute.h
>> +++ b/include/linux/mroute.h
>> @@ -5,6 +5,7 @@
>>  #include <linux/pim.h>
>>  #include <linux/rhashtable.h>
>>  #include <net/sock.h>
>> +#include <net/fib_rules.h>
>>  #include <net/fib_notifier.h>
>>  #include <uapi/linux/mroute.h>
>>  
>> @@ -19,6 +20,7 @@ int ip_mroute_getsockopt(struct sock *, int, char __user *, int __user *);
>>  int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg);
>>  int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg);
>>  int ip_mr_init(void);
>> +bool ipmr_rule_default(const struct fib_rule *rule);
>>  #else
>>  static inline int ip_mroute_setsockopt(struct sock *sock, int optname,
>>  				       char __user *optval, unsigned int optlen)
>> @@ -46,6 +48,11 @@ static inline int ip_mroute_opt(int opt)
>>  {
>>  	return 0;
>>  }
>> +
>> +static inline bool ipmr_rule_default(const struct fib_rule *rule)
>> +{
>> +	return true;
>> +}
>>  #endif
>>  
>>  struct vif_device {
>> diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
>> index 2a795d2..a714f55 100644
>> --- a/net/ipv4/ipmr.c
>> +++ b/net/ipv4/ipmr.c
>> @@ -320,6 +320,16 @@ static unsigned int ipmr_rules_seq_read(struct net *net)
>>  }
>>  #endif
>>  
>> +bool ipmr_rule_default(const struct fib_rule *rule)
>> +{
>> +#if IS_ENABLED(CONFIG_FIB_RULES)
>> +	return fib_rule_matchall(rule) && rule->table == RT_TABLE_DEFAULT;
>> +#else
>> +	return true;
>> +#endif
>
>In patch 02, You have the following, can you do the same for the above?
>+#ifdef CONFIG_IP_MROUTE
>+void ipmr_cache_free(struct mfc_cache *mfc_cache);
>+#else
>+static inline void ipmr_cache_free(struct mfc_cache *mfc_cache)
>+{
>+}
>+#endif

I don't believe this is necessary. The solution you described is often
used in headers. But here, I'm ok with the current code.


>
>> +}
>> +EXPORT_SYMBOL(ipmr_rule_default);
>> +
>>  static inline int ipmr_hash_cmp(struct rhashtable_compare_arg *arg,
>>  				const void *ptr)
>>  {
>> 
>

^ permalink raw reply

* Re: [patch net-next v2 03/12] ipmr: Add FIB notification access functions
From: Jiri Pirko @ 2017-09-25  9:47 UTC (permalink / raw)
  To: Nikolay Aleksandrov; +Cc: netdev, davem, yotamg, idosch, mlxsw, andrew
In-Reply-To: <bcf097e3-9c6c-b4bd-ac1e-8675ae87510e@cumulusnetworks.com>

Mon, Sep 25, 2017 at 11:40:16AM CEST, nikolay@cumulusnetworks.com wrote:
>On 25/09/17 12:35, Nikolay Aleksandrov wrote:
>> On 24/09/17 20:22, Jiri Pirko wrote:
>>> From: Yotam Gigi <yotamg@mellanox.com>
>>>
>>> Make the ipmr module register as a FIB notifier. To do that, implement both
>>> the ipmr_seq_read and ipmr_dump ops.
>>>
>>> The ipmr_seq_read op returns a sequence counter that is incremented on
>>> every notification related operation done by the ipmr. To implement that,
>>> add a sequence counter in the netns_ipv4 struct and increment it whenever a
>>> new MFC route or VIF are added or deleted. The sequence operations are
>>> protected by the RTNL lock.
>>>
>>> The ipmr_dump iterates the list of MFC routes and the list of VIF entries
>>> and sends notifications about them. The entries dump is done under RCU
>>> where the VIF dump uses the mrt_lock too, as the vif->dev field can change
>>> under RCU.
>>>
>>> Signed-off-by: Yotam Gigi <yotamg@mellanox.com>
>>> Reviewed-by: Ido Schimmel <idosch@mellanox.com>
>>> Signed-off-by: Jiri Pirko <jiri@mellanox.com>
>>> ---
>>> v1->v2:
>>>  - Take the mrt_lock when dumping VIF entries.
>>> ---
>>>  include/linux/mroute.h   |  15 ++++++
>>>  include/net/netns/ipv4.h |   3 ++
>>>  net/ipv4/ipmr.c          | 137 ++++++++++++++++++++++++++++++++++++++++++++++-
>>>  3 files changed, 153 insertions(+), 2 deletions(-)
>>>
>> 
>> LGTM,
>> 
>> Reviewed-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
>> 
>> 
>
>One note here if you're going to spin another version of the set, you can
>consider renaming the call_* functions to either mroute_* or ipmr_* (e.g.
>ipmr_call_...). I personally prefer the ipmr prefix.

The naming scheme in this patch is aligned with the rest of the code.
Please see "call_netdevice_notifiers" for example.
Please feel free to send a patch to chanche them all.

^ permalink raw reply

* Re: [PATCH net-next] net: mvpp2: phylink support
From: Antoine Tenart @ 2017-09-25  9:55 UTC (permalink / raw)
  To: Russell King - ARM Linux
  Cc: Antoine Tenart, davem, andrew, gregory.clement, thomas.petazzoni,
	miquel.raynal, nadavh, linux-kernel, mw, stefanc, netdev
In-Reply-To: <20170922110731.GG20805@n2100.armlinux.org.uk>

Hi Russell,

On Fri, Sep 22, 2017 at 12:07:31PM +0100, Russell King - ARM Linux wrote:
> On Thu, Sep 21, 2017 at 03:45:22PM +0200, Antoine Tenart wrote:
> > Convert the PPv2 driver to use phylink, which models the MAC to PHY
> > link. The phylink support is made such a way the GoP link IRQ can still
> > be used: the two modes are incompatible and the GoP link IRQ will be
> > used if no PHY is described in the device tree. This is the same
> > behaviour as before.
> 
> This makes no sense.  The point of phylink is to be able to support SFP
> cages, and SFP cages do not have a PHY described in DT.  So, when you
> want to use phylink because of SFP, you can't, because if you omit
> the PHY the driver avoids using phylink.

Yes that's an issue. However we do need to support the GoP link IRQ
which is also needed in some cases where there is no PHY (and when
phylink cannot be used). What would you propose to differentiate those
two cases: no PHY using phylink, and no PHY using the GoP link IRQ?

> > +static void mvpp2_phylink_validate(struct net_device *dev,
> > +				   unsigned long *supported,
> > +				   struct phylink_link_state *state)
> > +{
> > +	__ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
> > +
> > +	phylink_set_port_modes(mask);
> > +
> > +	phylink_set(mask, Autoneg);
> > +	phylink_set(mask, Pause);
> > +	phylink_set(mask, Asym_Pause);
> > +
> > +	phylink_set(mask, 10baseT_Half);
> > +	phylink_set(mask, 10baseT_Full);
> > +	phylink_set(mask, 100baseT_Half);
> > +	phylink_set(mask, 100baseT_Full);
> > +	phylink_set(mask, 1000baseT_Half);
> > +	phylink_set(mask, 1000baseT_Full);
> > +	phylink_set(mask, 1000baseX_Full);
> > +	phylink_set(mask, 10000baseKR_Full);
> > +
> > +	bitmap_and(supported, supported, mask, __ETHTOOL_LINK_MODE_MASK_NBITS);
> > +	bitmap_and(state->advertising, state->advertising, mask,
> > +		   __ETHTOOL_LINK_MODE_MASK_NBITS);
> > +}
> 
> I don't think you've understood this despite the comments in the header
> file.  What you describe above basically means you don't support any
> kind of copper connection at 10G speeds, or any fiber modes at 10G
> speeds either.
> 
> You need to set 10000baseT_Full for copper, 10000baseSR_Full,
> 10000baseLR_Full, 10000baseLRM_Full etc for fiber.  Had you looked at
> my modifications for Marvell's mvpp2x driver you'd have spotted this...

Sure, I'll add these modes as they are supported as well.

> > +static int mvpp2_phylink_mac_link_state(struct net_device *dev,
> > +					struct phylink_link_state *state)
> > +{
> > +	struct mvpp2_port *port = netdev_priv(dev);
> > +	u32 val;
> > +
> > +	if (!phy_interface_mode_is_rgmii(port->phy_interface) &&
> > +	    port->phy_interface != PHY_INTERFACE_MODE_SGMII)
> > +		return 0;
> 
> You're blocking this for 1000base-X and 10G connections, which is not
> correct.  The expectation is that this function returns the current
> MAC state irrespective of the interface mode.

I moved what was already supported in the PPv2 driver and did not
implemented the full set of what is supported. It's not perfect, but it
does move what was already supported.

Any reason not to first move what's already supported to phylink, and
then add more supported modes in separate patches?

> > +static void mvpp2_mac_an_restart(struct net_device *dev)
> > +{
> > +	struct mvpp2_port *port = netdev_priv(dev);
> > +	u32 val;
> > +
> > +	if (!phy_interface_mode_is_rgmii(port->phy_interface) &&
> > +	    port->phy_interface != PHY_INTERFACE_MODE_SGMII)
> > +		return;
> 
> This prevents AN restart in 1000base-X mode, which is exactly the
> mode that you need to do this.  SGMII doesn't care, and RGMII doesn't
> have inband AN.

I'll fix that.

> > +	val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
> > +	val |= MVPP2_GMAC_IN_BAND_RESTART_AN;
> > +	writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
> > +}
> > +
> > +static void mvpp2_mac_config(struct net_device *dev, unsigned int mode,
> > +			     const struct phylink_link_state *state)
> > +{
> > +	struct mvpp2_port *port = netdev_priv(dev);
> > +	u32 val;
> > +
> > +	/* disable current port for reconfiguration */
> > +	mvpp2_interrupts_disable(port);
> > +	netif_carrier_off(port->dev);
> > +	mvpp2_port_disable(port);
> > +	phy_power_off(port->comphy);
> > +
> > +	/* comphy reconfiguration */
> > +	port->phy_interface = state->interface;
> > +	mvpp22_comphy_init(port);
> > +
> > +	/* gop/mac reconfiguration */
> > +	mvpp22_gop_init(port);
> > +	mvpp2_port_mii_set(port);
> > +
> > +	if (!phy_interface_mode_is_rgmii(port->phy_interface) &&
> > +	    port->phy_interface != PHY_INTERFACE_MODE_SGMII)
> > +		return;
> 
> Again, 1000base-X is excluded, which will break it.  You do need
> to avoid touching the GMAC for 10G connections however.

Same comment as above.

> > +	val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
> > +	val &= ~(MVPP2_GMAC_CONFIG_MII_SPEED |
> > +		 MVPP2_GMAC_CONFIG_GMII_SPEED |
> > +		 MVPP2_GMAC_CONFIG_FULL_DUPLEX |
> > +		 MVPP2_GMAC_AN_SPEED_EN |
> > +		 MVPP2_GMAC_AN_DUPLEX_EN);
> > +
> > +	if (state->duplex)
> > +		val |= MVPP2_GMAC_CONFIG_FULL_DUPLEX;
> > +
> > +	if (state->speed == SPEED_1000)
> > +		val |= MVPP2_GMAC_CONFIG_GMII_SPEED;
> > +	else if (state->speed == SPEED_100)
> > +		val |= MVPP2_GMAC_CONFIG_MII_SPEED;
> > +
> > +	writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
> 
> You're assuming that this function only sets the current parameters for
> the MAC.  That's incorrect - it also needs to deal with autonegotiation
> for inband AN, such as SGMII and 1000base-X.

OK.

> > +	if (port->priv->hw_version == MVPP21 && port->flags & MVPP2_F_LOOPBACK)
> > +		mvpp2_port_loopback_set(port, state);
> > +}
> > +
> > +static void mvpp2_mac_link_down(struct net_device *dev, unsigned int mode)
> > +{
> > +	struct mvpp2_port *port = netdev_priv(dev);
> > +	u32 val;
> > +
> > +	netif_tx_stop_all_queues(dev);
> > +	netif_carrier_off(dev);
> > +	mvpp2_ingress_disable(port);
> > +	mvpp2_egress_disable(port);
> > +
> > +	mvpp2_port_disable(port);
> > +	mvpp2_interrupts_disable(port);
> > +
> > +	if (!phylink_autoneg_inband(mode)) {
> > +		val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
> > +		val &= ~MVPP2_GMAC_FORCE_LINK_PASS;
> > +		val |= MVPP2_GMAC_FORCE_LINK_DOWN;
> > +		writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
> > +	}
> 
> Please explain why you think its necessary to force the link down when
> the link is already down - if there's no media connected, we only
> need to stop the ingress and egress.

Agreed, I'll remove this.

> It's certainly wrong to disable interrupts - how do we end up with
> link status changes reported from the MAC to phylink if interrupts
> have been disabled?

This only disables the vector IRQs used for tx/rx queues. That's already
the default when not using a port.

> You guys know that I have working example code for both mvneta and the
> Marvell PP2x driver.  It probably would help if you looked at those
> examples.

I did used your mvneta phylink patch as an example. I'll have a look at
the other one, it'll be helpful.

Thank you for your thoughtful review.
Antoine

-- 
Antoine Ténart, Free Electrons
Embedded Linux and Kernel engineering
http://free-electrons.com

^ permalink raw reply

* Re: [patch net-next v2 03/12] ipmr: Add FIB notification access functions
From: Nikolay Aleksandrov @ 2017-09-25  9:59 UTC (permalink / raw)
  To: Jiri Pirko; +Cc: netdev, davem, yotamg, idosch, mlxsw, andrew
In-Reply-To: <20170925094713.GD1899@nanopsycho>

On 25/09/17 12:47, Jiri Pirko wrote:
> Mon, Sep 25, 2017 at 11:40:16AM CEST, nikolay@cumulusnetworks.com wrote:
>> On 25/09/17 12:35, Nikolay Aleksandrov wrote:
>>> On 24/09/17 20:22, Jiri Pirko wrote:
>>>> From: Yotam Gigi <yotamg@mellanox.com>
>>>>
>>>> Make the ipmr module register as a FIB notifier. To do that, implement both
>>>> the ipmr_seq_read and ipmr_dump ops.
>>>>
>>>> The ipmr_seq_read op returns a sequence counter that is incremented on
>>>> every notification related operation done by the ipmr. To implement that,
>>>> add a sequence counter in the netns_ipv4 struct and increment it whenever a
>>>> new MFC route or VIF are added or deleted. The sequence operations are
>>>> protected by the RTNL lock.
>>>>
>>>> The ipmr_dump iterates the list of MFC routes and the list of VIF entries
>>>> and sends notifications about them. The entries dump is done under RCU
>>>> where the VIF dump uses the mrt_lock too, as the vif->dev field can change
>>>> under RCU.
>>>>
>>>> Signed-off-by: Yotam Gigi <yotamg@mellanox.com>
>>>> Reviewed-by: Ido Schimmel <idosch@mellanox.com>
>>>> Signed-off-by: Jiri Pirko <jiri@mellanox.com>
>>>> ---
>>>> v1->v2:
>>>>  - Take the mrt_lock when dumping VIF entries.
>>>> ---
>>>>  include/linux/mroute.h   |  15 ++++++
>>>>  include/net/netns/ipv4.h |   3 ++
>>>>  net/ipv4/ipmr.c          | 137 ++++++++++++++++++++++++++++++++++++++++++++++-
>>>>  3 files changed, 153 insertions(+), 2 deletions(-)
>>>>
>>>
>>> LGTM,
>>>
>>> Reviewed-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
>>>
>>>
>>
>> One note here if you're going to spin another version of the set, you can
>> consider renaming the call_* functions to either mroute_* or ipmr_* (e.g.
>> ipmr_call_...). I personally prefer the ipmr prefix.
> 
> The naming scheme in this patch is aligned with the rest of the code.

Definitely not aligned with the rest of the ipmr code because it does not
have such calls. Its notifications have a prefix which is not call_.

> Please see "call_netdevice_notifiers" for example.

Sure, I don't care that much which style you choose, that's why I wrote
_consider_, since this code is contained within ipmr and is not exported
anywhere.

> Please feel free to send a patch to chanche them all.
> 

Jumping the gun a little bit here. :-)

^ permalink raw reply

* Re: [patch net-next v2 06/12] net: mroute: Check if rule is a default rule
From: Nikolay Aleksandrov @ 2017-09-25 10:02 UTC (permalink / raw)
  To: Jiri Pirko, Yunsheng Lin; +Cc: netdev, davem, yotamg, idosch, mlxsw, andrew
In-Reply-To: <20170925094508.GC1899@nanopsycho>

On 25/09/17 12:45, Jiri Pirko wrote:
> Mon, Sep 25, 2017 at 03:28:21AM CEST, linyunsheng@huawei.com wrote:
>> Hi, Jiri
>>
>> On 2017/9/25 1:22, Jiri Pirko wrote:
>>> From: Yotam Gigi <yotamg@mellanox.com>
>>>
>>> When the ipmr starts, it adds one default FIB rule that matches all packets
>>> and sends them to the DEFAULT (multicast) FIB table. A more complex rule
>>> can be added by user to specify that for a specific interface, a packet
>>> should be look up at either an arbitrary table or according to the l3mdev
>>> of the interface.
>>>
>>> For drivers willing to offload the ipmr logic into a hardware but don't
>>> want to offload all the FIB rules functionality, provide a function that
>>> can indicate whether the FIB rule is the default multicast rule, thus only
>>> one routing table is needed.
>>>
>>> This way, a driver can register to the FIB notification chain, get
>>> notifications about FIB rules added and trigger some kind of an internal
>>> abort mechanism when a non default rule is added by the user.
>>>
>>> Signed-off-by: Yotam Gigi <yotamg@mellanox.com>
>>> Reviewed-by: Ido Schimmel <idosch@mellanox.com>
>>> Signed-off-by: Jiri Pirko <jiri@mellanox.com>
>>> ---
>>>  include/linux/mroute.h |  7 +++++++
>>>  net/ipv4/ipmr.c        | 10 ++++++++++
>>>  2 files changed, 17 insertions(+)
>>>
>>> diff --git a/include/linux/mroute.h b/include/linux/mroute.h
>>> index 5566580..b072a84 100644
>>> --- a/include/linux/mroute.h
>>> +++ b/include/linux/mroute.h
>>> @@ -5,6 +5,7 @@
>>>  #include <linux/pim.h>
>>>  #include <linux/rhashtable.h>
>>>  #include <net/sock.h>
>>> +#include <net/fib_rules.h>
>>>  #include <net/fib_notifier.h>
>>>  #include <uapi/linux/mroute.h>
>>>  
>>> @@ -19,6 +20,7 @@ int ip_mroute_getsockopt(struct sock *, int, char __user *, int __user *);
>>>  int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg);
>>>  int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg);
>>>  int ip_mr_init(void);
>>> +bool ipmr_rule_default(const struct fib_rule *rule);
>>>  #else
>>>  static inline int ip_mroute_setsockopt(struct sock *sock, int optname,
>>>  				       char __user *optval, unsigned int optlen)
>>> @@ -46,6 +48,11 @@ static inline int ip_mroute_opt(int opt)
>>>  {
>>>  	return 0;
>>>  }
>>> +
>>> +static inline bool ipmr_rule_default(const struct fib_rule *rule)
>>> +{
>>> +	return true;
>>> +}
>>>  #endif
>>>  
>>>  struct vif_device {
>>> diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
>>> index 2a795d2..a714f55 100644
>>> --- a/net/ipv4/ipmr.c
>>> +++ b/net/ipv4/ipmr.c
>>> @@ -320,6 +320,16 @@ static unsigned int ipmr_rules_seq_read(struct net *net)
>>>  }
>>>  #endif
>>>  
>>> +bool ipmr_rule_default(const struct fib_rule *rule)
>>> +{
>>> +#if IS_ENABLED(CONFIG_FIB_RULES)
>>> +	return fib_rule_matchall(rule) && rule->table == RT_TABLE_DEFAULT;
>>> +#else
>>> +	return true;
>>> +#endif
>>
>> In patch 02, You have the following, can you do the same for the above?
>> +#ifdef CONFIG_IP_MROUTE
>> +void ipmr_cache_free(struct mfc_cache *mfc_cache);
>> +#else
>> +static inline void ipmr_cache_free(struct mfc_cache *mfc_cache)
>> +{
>> +}
>> +#endif
> 
> I don't believe this is necessary. The solution you described is often
> used in headers. But here, I'm ok with the current code.
> 

+1

> 
>>
>>> +}
>>> +EXPORT_SYMBOL(ipmr_rule_default);
>>> +
>>>  static inline int ipmr_hash_cmp(struct rhashtable_compare_arg *arg,
>>>  				const void *ptr)
>>>  {
>>>
>>

^ permalink raw reply

* [PATCH net v2] sctp: Fix a big endian bug in sctp_diag_dump()
From: Dan Carpenter @ 2017-09-25 10:19 UTC (permalink / raw)
  To: Vlad Yasevich, Xin Long
  Cc: Neil Horman, David S. Miller, linux-sctp, netdev, kernel-janitors
In-Reply-To: <CADvbK_dCT6Z6JwD+VtjNg6UUkQneU2OLeme9JVqrXjnJJ63cGg@mail.gmail.com>

The sctp_for_each_transport() function takes an pointer to int.  The
cb->args[] array holds longs so it's only using the high 32 bits.  It
works on little endian system but will break on big endian 64 bit
machines.

Fixes: d25adbeb0cdb ("sctp: fix an use-after-free issue in sctp_sock_dump")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
---
v2: The v1 patch changed the function to take a long pointer, but v2
    just changes the caller.

diff --git a/net/sctp/sctp_diag.c b/net/sctp/sctp_diag.c
index 22ed01a76b19..a72a7d925d46 100644
--- a/net/sctp/sctp_diag.c
+++ b/net/sctp/sctp_diag.c
@@ -463,6 +463,7 @@ static void sctp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
 		.r = r,
 		.net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN),
 	};
+	int pos = cb->args[2];

 	/* eps hashtable dumps
 	 * args:
@@ -493,7 +494,8 @@ static void sctp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
 		goto done;

 	sctp_for_each_transport(sctp_sock_filter, sctp_sock_dump,
-				net, (int *)&cb->args[2], &commp);
+				net, &pos, &commp);
+	cb->args[2] = pos;

 done:
 	cb->args[1] = cb->args[4];

^ permalink raw reply related

* [PATCH net-next 0/7] nfp: flower vxlan tunnel offload
From: Simon Horman @ 2017-09-25 10:23 UTC (permalink / raw)
  To: David Miller, Jakub Kicinski; +Cc: netdev, oss-drivers, Simon Horman

From: Simon Horman <simon.horman@netronome.com>

John says:

This patch set allows offloading of TC flower match and set tunnel fields
to the NFP. The initial focus is on VXLAN traffic. Due to the current
state of the NFP firmware, only VXLAN traffic on well known port 4789 is
handled. The match and action fields must explicity set this value to be
supported. Tunnel end point information is also offloaded to the NFP for
both encapsulation and decapsulation. The NFP expects 3 separate data sets
to be supplied.

For decapsulation, 2 separate lists exist; a list of MAC addresses
referenced by an index comprised of the port number, and a list of IP
addresses. These IP addresses are not connected to a MAC or port. The MAC
addresses can be written as a block or one at a time (because they have an
index, previous values can be overwritten) while the IP addresses are
always written as a list of all the available IPs. Because the MAC address
used as a tunnel end point may be associated with a physical port or may
be a virtual netdev like an OVS bridge, we do not know which addresses
should be offloaded. For this reason, all MAC addresses of active netdevs
are offloaded to the NFP. A notifier checks for changes to any currently
offloaded MACs or any new netdevs that may occur. For IP addresses, the
tunnel end point used in the rules is known as the destination IP address
must be specified in the flower classifier rule. When a new IP address
appears in a rule, the IP address is offloaded. The IP is removed from the
offloaded list when all rules matching on that IP are deleted.

For encapsulation, a next hop table is updated on the NFP that contains
the source/dest IPs, MACs and egress port. These are written individually
when requested. If the NFP tries to encapsulate a packet but does not know
the next hop, then is sends a request to the host. The host carries out a
route lookup and populates the given entry on the NFP table. A notifier
also exists to check for any links changing or going down in the kernel
next hop table. If an offloaded next hop entry is removed from the kernel
then it is also removed on the NFP.

The NFP periodically sends a message to the host telling it which tunnel
ports have packets egressing the system. The host uses this information to
update the used value in the neighbour entry. This means that, rather than
expire when it times out, the kernel will send an ARP to check if the link
is still live. From an NFP perspective, this means that valid entries will
not be removed from its next hop table.

John Hurley (7):
  nfp: add helper to get flower cmsg length
  nfp: compile flower vxlan tunnel metadata match fields
  nfp: compile flower vxlan tunnel set actions
  nfp: offload flower vxlan endpoint MAC addresses
  nfp: offload vxlan IPv4 endpoints of flower rules
  nfp: flower vxlan neighbour offload
  nfp: flower vxlan neighbour keep-alive

 drivers/net/ethernet/netronome/nfp/Makefile        |   3 +-
 drivers/net/ethernet/netronome/nfp/flower/action.c | 169 ++++-
 drivers/net/ethernet/netronome/nfp/flower/cmsg.c   |  16 +-
 drivers/net/ethernet/netronome/nfp/flower/cmsg.h   |  87 ++-
 drivers/net/ethernet/netronome/nfp/flower/main.c   |  13 +
 drivers/net/ethernet/netronome/nfp/flower/main.h   |  35 +
 drivers/net/ethernet/netronome/nfp/flower/match.c  |  75 +-
 .../net/ethernet/netronome/nfp/flower/metadata.c   |   2 +-
 .../net/ethernet/netronome/nfp/flower/offload.c    |  74 +-
 .../ethernet/netronome/nfp/flower/tunnel_conf.c    | 811 +++++++++++++++++++++
 10 files changed, 1243 insertions(+), 42 deletions(-)
 create mode 100644 drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c

-- 
2.1.4

^ permalink raw reply

* [PATCH net-next 1/7] nfp: add helper to get flower cmsg length
From: Simon Horman @ 2017-09-25 10:23 UTC (permalink / raw)
  To: David Miller, Jakub Kicinski
  Cc: netdev, oss-drivers, John Hurley, Simon Horman
In-Reply-To: <1506335021-32024-1-git-send-email-simon.horman@netronome.com>

From: John Hurley <john.hurley@netronome.com>

Add a helper function that returns the length of the cmsg data when given
the cmsg skb

Signed-off-by: John Hurley <john.hurley@netronome.com>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
---
 drivers/net/ethernet/netronome/nfp/flower/cmsg.h     | 5 +++++
 drivers/net/ethernet/netronome/nfp/flower/metadata.c | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
index a2ec60344236..7a5ccf0cc7c2 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
@@ -323,6 +323,11 @@ static inline void *nfp_flower_cmsg_get_data(struct sk_buff *skb)
 	return (unsigned char *)skb->data + NFP_FLOWER_CMSG_HLEN;
 }
 
+static inline int nfp_flower_cmsg_get_data_len(struct sk_buff *skb)
+{
+	return skb->len - NFP_FLOWER_CMSG_HLEN;
+}
+
 struct sk_buff *
 nfp_flower_cmsg_mac_repr_start(struct nfp_app *app, unsigned int num_ports);
 void
diff --git a/drivers/net/ethernet/netronome/nfp/flower/metadata.c b/drivers/net/ethernet/netronome/nfp/flower/metadata.c
index 3226ddc55f99..193520ef23f0 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/metadata.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/metadata.c
@@ -140,7 +140,7 @@ nfp_flower_update_stats(struct nfp_app *app, struct nfp_fl_stats_frame *stats)
 
 void nfp_flower_rx_flow_stats(struct nfp_app *app, struct sk_buff *skb)
 {
-	unsigned int msg_len = skb->len - NFP_FLOWER_CMSG_HLEN;
+	unsigned int msg_len = nfp_flower_cmsg_get_data_len(skb);
 	struct nfp_fl_stats_frame *stats_frame;
 	unsigned char *msg;
 	int i;
-- 
2.1.4

^ permalink raw reply related

* [PATCH net-next 2/7] nfp: compile flower vxlan tunnel metadata match fields
From: Simon Horman @ 2017-09-25 10:23 UTC (permalink / raw)
  To: David Miller, Jakub Kicinski
  Cc: netdev, oss-drivers, John Hurley, Simon Horman
In-Reply-To: <1506335021-32024-1-git-send-email-simon.horman@netronome.com>

From: John Hurley <john.hurley@netronome.com>

Compile ovs-tc flower vxlan metadata match fields for offloading. Only
support offload of tunnel data when the VXLAN port specifically matches
well known port 4789.

Signed-off-by: John Hurley <john.hurley@netronome.com>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
---
 drivers/net/ethernet/netronome/nfp/flower/cmsg.h   | 38 ++++++++++++
 drivers/net/ethernet/netronome/nfp/flower/main.h   |  2 +
 drivers/net/ethernet/netronome/nfp/flower/match.c  | 60 +++++++++++++++++--
 .../net/ethernet/netronome/nfp/flower/offload.c    | 70 +++++++++++++++++++---
 4 files changed, 158 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
index 7a5ccf0cc7c2..af9165b3b652 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
@@ -83,6 +83,14 @@
 #define NFP_FL_PUSH_VLAN_CFI		BIT(12)
 #define NFP_FL_PUSH_VLAN_VID		GENMASK(11, 0)
 
+/* Tunnel ports */
+#define NFP_FL_PORT_TYPE_TUN		0x50000000
+
+enum nfp_flower_tun_type {
+	NFP_FL_TUNNEL_NONE =	0,
+	NFP_FL_TUNNEL_VXLAN =	2,
+};
+
 struct nfp_fl_output {
 	__be16 a_op;
 	__be16 flags;
@@ -230,6 +238,36 @@ struct nfp_flower_ipv6 {
 	struct in6_addr ipv6_dst;
 };
 
+/* Flow Frame VXLAN --> Tunnel details (4W/16B)
+ * -----------------------------------------------------------------
+ *    3                   2                   1
+ *  1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                         ipv4_addr_src                         |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                         ipv4_addr_dst                         |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |           tun_flags           |       tos     |       ttl     |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |   gpe_flags   |            Reserved           | Next Protocol |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                     VNI                       |   Reserved    |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+struct nfp_flower_vxlan {
+	__be32 ip_src;
+	__be32 ip_dst;
+	__be16 tun_flags;
+	u8 tos;
+	u8 ttl;
+	u8 gpe_flags;
+	u8 reserved[2];
+	u8 nxt_proto;
+	__be32 tun_id;
+};
+
+#define NFP_FL_TUN_VNI_OFFSET 8
+
 /* The base header for a control message packet.
  * Defines an 8-bit version, and an 8-bit type, padded
  * to a 32-bit word. Rest of the packet is type-specific.
diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h
index c20dd00a1cae..cd695eabce02 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/main.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.h
@@ -58,6 +58,8 @@ struct nfp_app;
 #define NFP_FL_MASK_REUSE_TIME_NS	40000
 #define NFP_FL_MASK_ID_LOCATION		1
 
+#define NFP_FL_VXLAN_PORT		4789
+
 struct nfp_fl_mask_id {
 	struct circ_buf mask_id_free_list;
 	struct timespec64 *last_used;
diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c
index d25b5038c3a2..1fd1bab0611f 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/match.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/match.c
@@ -77,14 +77,17 @@ nfp_flower_compile_meta(struct nfp_flower_meta_one *frame, u8 key_type)
 
 static int
 nfp_flower_compile_port(struct nfp_flower_in_port *frame, u32 cmsg_port,
-			bool mask_version)
+			bool mask_version, enum nfp_flower_tun_type tun_type)
 {
 	if (mask_version) {
 		frame->in_port = cpu_to_be32(~0);
 		return 0;
 	}
 
-	frame->in_port = cpu_to_be32(cmsg_port);
+	if (tun_type)
+		frame->in_port = cpu_to_be32(NFP_FL_PORT_TYPE_TUN | tun_type);
+	else
+		frame->in_port = cpu_to_be32(cmsg_port);
 
 	return 0;
 }
@@ -189,15 +192,53 @@ nfp_flower_compile_ipv6(struct nfp_flower_ipv6 *frame,
 	}
 }
 
+static void
+nfp_flower_compile_vxlan(struct nfp_flower_vxlan *frame,
+			 struct tc_cls_flower_offload *flow,
+			 bool mask_version)
+{
+	struct fl_flow_key *target = mask_version ? flow->mask : flow->key;
+	struct flow_dissector_key_ipv4_addrs *vxlan_ips;
+	struct flow_dissector_key_keyid *vni;
+
+	/* Wildcard TOS/TTL/GPE_FLAGS/NXT_PROTO for now. */
+	memset(frame, 0, sizeof(struct nfp_flower_vxlan));
+
+	if (dissector_uses_key(flow->dissector,
+			       FLOW_DISSECTOR_KEY_ENC_KEYID)) {
+		u32 temp_vni;
+
+		vni = skb_flow_dissector_target(flow->dissector,
+						FLOW_DISSECTOR_KEY_ENC_KEYID,
+						target);
+		temp_vni = be32_to_cpu(vni->keyid) << NFP_FL_TUN_VNI_OFFSET;
+		frame->tun_id = cpu_to_be32(temp_vni);
+	}
+
+	if (dissector_uses_key(flow->dissector,
+			       FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) {
+		vxlan_ips =
+		   skb_flow_dissector_target(flow->dissector,
+					     FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
+					     target);
+		frame->ip_src = vxlan_ips->src;
+		frame->ip_dst = vxlan_ips->dst;
+	}
+}
+
 int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow,
 				  struct nfp_fl_key_ls *key_ls,
 				  struct net_device *netdev,
 				  struct nfp_fl_payload *nfp_flow)
 {
+	enum nfp_flower_tun_type tun_type = NFP_FL_TUNNEL_NONE;
 	int err;
 	u8 *ext;
 	u8 *msk;
 
+	if (key_ls->key_layer & NFP_FLOWER_LAYER_VXLAN)
+		tun_type = NFP_FL_TUNNEL_VXLAN;
+
 	memset(nfp_flow->unmasked_data, 0, key_ls->key_size);
 	memset(nfp_flow->mask_data, 0, key_ls->key_size);
 
@@ -216,14 +257,14 @@ int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow,
 		/* Populate Exact Port data. */
 		err = nfp_flower_compile_port((struct nfp_flower_in_port *)ext,
 					      nfp_repr_get_port_id(netdev),
-					      false);
+					      false, tun_type);
 		if (err)
 			return err;
 
 		/* Populate Mask Port Data. */
 		err = nfp_flower_compile_port((struct nfp_flower_in_port *)msk,
 					      nfp_repr_get_port_id(netdev),
-					      true);
+					      true, tun_type);
 		if (err)
 			return err;
 
@@ -291,5 +332,16 @@ int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow,
 		msk += sizeof(struct nfp_flower_ipv6);
 	}
 
+	if (key_ls->key_layer & NFP_FLOWER_LAYER_VXLAN) {
+		/* Populate Exact VXLAN Data. */
+		nfp_flower_compile_vxlan((struct nfp_flower_vxlan *)ext,
+					 flow, false);
+		/* Populate Mask VXLAN Data. */
+		nfp_flower_compile_vxlan((struct nfp_flower_vxlan *)msk,
+					 flow, true);
+		ext += sizeof(struct nfp_flower_vxlan);
+		msk += sizeof(struct nfp_flower_vxlan);
+	}
+
 	return 0;
 }
diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c
index a18b4d2b1d3e..637372ba8f55 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c
@@ -52,8 +52,25 @@
 	 BIT(FLOW_DISSECTOR_KEY_PORTS) | \
 	 BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | \
 	 BIT(FLOW_DISSECTOR_KEY_VLAN) | \
+	 BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) | \
+	 BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | \
+	 BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | \
+	 BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | \
+	 BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) | \
 	 BIT(FLOW_DISSECTOR_KEY_IP))
 
+#define NFP_FLOWER_WHITELIST_TUN_DISSECTOR \
+	(BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | \
+	 BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) | \
+	 BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | \
+	 BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | \
+	 BIT(FLOW_DISSECTOR_KEY_ENC_PORTS))
+
+#define NFP_FLOWER_WHITELIST_TUN_DISSECTOR_R \
+	(BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | \
+	 BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | \
+	 BIT(FLOW_DISSECTOR_KEY_ENC_PORTS))
+
 static int
 nfp_flower_xmit_flow(struct net_device *netdev,
 		     struct nfp_fl_payload *nfp_flow, u8 mtype)
@@ -125,15 +142,58 @@ nfp_flower_calculate_key_layers(struct nfp_fl_key_ls *ret_key_ls,
 	if (flow->dissector->used_keys & ~NFP_FLOWER_WHITELIST_DISSECTOR)
 		return -EOPNOTSUPP;
 
+	/* If any tun dissector is used then the required set must be used. */
+	if (flow->dissector->used_keys & NFP_FLOWER_WHITELIST_TUN_DISSECTOR &&
+	    (flow->dissector->used_keys & NFP_FLOWER_WHITELIST_TUN_DISSECTOR_R)
+	    != NFP_FLOWER_WHITELIST_TUN_DISSECTOR_R)
+		return -EOPNOTSUPP;
+
+	key_layer_two = 0;
+	key_layer = NFP_FLOWER_LAYER_PORT | NFP_FLOWER_LAYER_MAC;
+	key_size = sizeof(struct nfp_flower_meta_one) +
+		   sizeof(struct nfp_flower_in_port) +
+		   sizeof(struct nfp_flower_mac_mpls);
+
 	if (dissector_uses_key(flow->dissector,
 			       FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
+		struct flow_dissector_key_ipv4_addrs *mask_ipv4 = NULL;
+		struct flow_dissector_key_ports *mask_enc_ports = NULL;
+		struct flow_dissector_key_ports *enc_ports = NULL;
 		struct flow_dissector_key_control *mask_enc_ctl =
 			skb_flow_dissector_target(flow->dissector,
 						  FLOW_DISSECTOR_KEY_ENC_CONTROL,
 						  flow->mask);
-		/* We are expecting a tunnel. For now we ignore offloading. */
-		if (mask_enc_ctl->addr_type)
+		struct flow_dissector_key_control *enc_ctl =
+			skb_flow_dissector_target(flow->dissector,
+						  FLOW_DISSECTOR_KEY_ENC_CONTROL,
+						  flow->key);
+		if (mask_enc_ctl->addr_type != 0xffff ||
+		    enc_ctl->addr_type != FLOW_DISSECTOR_KEY_IPV4_ADDRS)
 			return -EOPNOTSUPP;
+
+		/* These fields are already verified as used. */
+		mask_ipv4 =
+			skb_flow_dissector_target(flow->dissector,
+						  FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
+						  flow->mask);
+		if (mask_ipv4->dst != cpu_to_be32(~0))
+			return -EOPNOTSUPP;
+
+		mask_enc_ports =
+			skb_flow_dissector_target(flow->dissector,
+						  FLOW_DISSECTOR_KEY_ENC_PORTS,
+						  flow->mask);
+		enc_ports =
+			skb_flow_dissector_target(flow->dissector,
+						  FLOW_DISSECTOR_KEY_ENC_PORTS,
+						  flow->key);
+
+		if (mask_enc_ports->dst != cpu_to_be16(~0) ||
+		    enc_ports->dst != htons(NFP_FL_VXLAN_PORT))
+			return -EOPNOTSUPP;
+
+		key_layer |= NFP_FLOWER_LAYER_VXLAN;
+		key_size += sizeof(struct nfp_flower_vxlan);
 	}
 
 	if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
@@ -151,12 +211,6 @@ nfp_flower_calculate_key_layers(struct nfp_fl_key_ls *ret_key_ls,
 						    FLOW_DISSECTOR_KEY_IP,
 						    flow->mask);
 
-	key_layer_two = 0;
-	key_layer = NFP_FLOWER_LAYER_PORT | NFP_FLOWER_LAYER_MAC;
-	key_size = sizeof(struct nfp_flower_meta_one) +
-		   sizeof(struct nfp_flower_in_port) +
-		   sizeof(struct nfp_flower_mac_mpls);
-
 	if (mask_basic && mask_basic->n_proto) {
 		/* Ethernet type is present in the key. */
 		switch (key_basic->n_proto) {
-- 
2.1.4

^ permalink raw reply related

* [PATCH net-next 3/7] nfp: compile flower vxlan tunnel set actions
From: Simon Horman @ 2017-09-25 10:23 UTC (permalink / raw)
  To: David Miller, Jakub Kicinski
  Cc: netdev, oss-drivers, John Hurley, Simon Horman
In-Reply-To: <1506335021-32024-1-git-send-email-simon.horman@netronome.com>

From: John Hurley <john.hurley@netronome.com>

Compile set tunnel actions for tc flower. Only support VXLAN and ensure a
tunnel destination port of 4789 is used.

Signed-off-by: John Hurley <john.hurley@netronome.com>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
---
 drivers/net/ethernet/netronome/nfp/flower/action.c | 169 ++++++++++++++++++---
 drivers/net/ethernet/netronome/nfp/flower/cmsg.h   |  31 +++-
 2 files changed, 179 insertions(+), 21 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c
index db9750695dc7..38f3835ae176 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/action.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/action.c
@@ -37,6 +37,7 @@
 #include <net/tc_act/tc_gact.h>
 #include <net/tc_act/tc_mirred.h>
 #include <net/tc_act/tc_vlan.h>
+#include <net/tc_act/tc_tunnel_key.h>
 
 #include "cmsg.h"
 #include "main.h"
@@ -80,14 +81,27 @@ nfp_fl_push_vlan(struct nfp_fl_push_vlan *push_vlan,
 	push_vlan->vlan_tci = cpu_to_be16(tmp_push_vlan_tci);
 }
 
+static bool nfp_fl_netdev_is_tunnel_type(struct net_device *out_dev,
+					 enum nfp_flower_tun_type tun_type)
+{
+	if (!out_dev->rtnl_link_ops)
+		return false;
+
+	if (!strcmp(out_dev->rtnl_link_ops->kind, "vxlan"))
+		return tun_type == NFP_FL_TUNNEL_VXLAN;
+
+	return false;
+}
+
 static int
 nfp_fl_output(struct nfp_fl_output *output, const struct tc_action *action,
 	      struct nfp_fl_payload *nfp_flow, bool last,
-	      struct net_device *in_dev)
+	      struct net_device *in_dev, enum nfp_flower_tun_type tun_type,
+	      int *tun_out_cnt)
 {
 	size_t act_size = sizeof(struct nfp_fl_output);
+	u16 tmp_output_op, tmp_flags;
 	struct net_device *out_dev;
-	u16 tmp_output_op;
 	int ifindex;
 
 	/* Set action opcode to output action. */
@@ -97,25 +111,114 @@ nfp_fl_output(struct nfp_fl_output *output, const struct tc_action *action,
 
 	output->a_op = cpu_to_be16(tmp_output_op);
 
-	/* Set action output parameters. */
-	output->flags = cpu_to_be16(last ? NFP_FL_OUT_FLAGS_LAST : 0);
-
 	ifindex = tcf_mirred_ifindex(action);
 	out_dev = __dev_get_by_index(dev_net(in_dev), ifindex);
 	if (!out_dev)
 		return -EOPNOTSUPP;
 
-	/* Only offload egress ports are on the same device as the ingress
-	 * port.
+	tmp_flags = last ? NFP_FL_OUT_FLAGS_LAST : 0;
+
+	if (tun_type) {
+		/* Verify the egress netdev matches the tunnel type. */
+		if (!nfp_fl_netdev_is_tunnel_type(out_dev, tun_type))
+			return -EOPNOTSUPP;
+
+		if (*tun_out_cnt)
+			return -EOPNOTSUPP;
+		(*tun_out_cnt)++;
+
+		output->flags = cpu_to_be16(tmp_flags |
+					    NFP_FL_OUT_FLAGS_USE_TUN);
+		output->port = cpu_to_be32(NFP_FL_PORT_TYPE_TUN | tun_type);
+	} else {
+		/* Set action output parameters. */
+		output->flags = cpu_to_be16(tmp_flags);
+
+		/* Only offload if egress ports are on the same device as the
+		 * ingress port.
+		 */
+		if (!switchdev_port_same_parent_id(in_dev, out_dev))
+			return -EOPNOTSUPP;
+
+		output->port = cpu_to_be32(nfp_repr_get_port_id(out_dev));
+		if (!output->port)
+			return -EOPNOTSUPP;
+	}
+	nfp_flow->meta.shortcut = output->port;
+
+	return 0;
+}
+
+static bool nfp_fl_supported_tun_port(const struct tc_action *action)
+{
+	struct ip_tunnel_info *tun = tcf_tunnel_info(action);
+
+	return tun->key.tp_dst == htons(NFP_FL_VXLAN_PORT);
+}
+
+static struct nfp_fl_pre_tunnel *nfp_fl_pre_tunnel(char *act_data, int act_len)
+{
+	size_t act_size = sizeof(struct nfp_fl_pre_tunnel);
+	struct nfp_fl_pre_tunnel *pre_tun_act;
+	u16 tmp_pre_tun_op;
+
+	/* Pre_tunnel action must be first on action list.
+	 * If other actions already exist they need pushed forward.
 	 */
-	if (!switchdev_port_same_parent_id(in_dev, out_dev))
-		return -EOPNOTSUPP;
+	if (act_len)
+		memmove(act_data + act_size, act_data, act_len);
+
+	pre_tun_act = (struct nfp_fl_pre_tunnel *)act_data;
+
+	memset(pre_tun_act, 0, act_size);
+
+	tmp_pre_tun_op =
+		FIELD_PREP(NFP_FL_ACT_LEN_LW, act_size >> NFP_FL_LW_SIZ) |
+		FIELD_PREP(NFP_FL_ACT_JMP_ID, NFP_FL_ACTION_OPCODE_PRE_TUNNEL);
+
+	pre_tun_act->a_op = cpu_to_be16(tmp_pre_tun_op);
 
-	output->port = cpu_to_be32(nfp_repr_get_port_id(out_dev));
-	if (!output->port)
+	return pre_tun_act;
+}
+
+static int
+nfp_fl_set_vxlan(struct nfp_fl_set_vxlan *set_vxlan,
+		 const struct tc_action *action,
+		 struct nfp_fl_pre_tunnel *pre_tun)
+{
+	struct ip_tunnel_info *vxlan = tcf_tunnel_info(action);
+	size_t act_size = sizeof(struct nfp_fl_set_vxlan);
+	u32 tmp_set_vxlan_type_index = 0;
+	u16 tmp_set_vxlan_op;
+	/* Currently support one pre-tunnel so index is always 0. */
+	int pretun_idx = 0;
+
+	if (vxlan->options_len) {
+		/* Do not support options e.g. vxlan gpe. */
 		return -EOPNOTSUPP;
+	}
 
-	nfp_flow->meta.shortcut = output->port;
+	tmp_set_vxlan_op =
+		FIELD_PREP(NFP_FL_ACT_LEN_LW, act_size >> NFP_FL_LW_SIZ) |
+		FIELD_PREP(NFP_FL_ACT_JMP_ID,
+			   NFP_FL_ACTION_OPCODE_SET_IPV4_TUNNEL);
+
+	set_vxlan->a_op = cpu_to_be16(tmp_set_vxlan_op);
+
+	/* Set tunnel type and pre-tunnel index. */
+	tmp_set_vxlan_type_index |=
+		FIELD_PREP(NFP_FL_IPV4_TUNNEL_TYPE, NFP_FL_TUNNEL_VXLAN) |
+		FIELD_PREP(NFP_FL_IPV4_PRE_TUN_INDEX, pretun_idx);
+
+	set_vxlan->tun_type_index = cpu_to_be32(tmp_set_vxlan_type_index);
+
+	set_vxlan->tun_id = vxlan->key.tun_id;
+	set_vxlan->tun_flags = vxlan->key.tun_flags;
+	set_vxlan->ipv4_ttl = vxlan->key.ttl;
+	set_vxlan->ipv4_tos = vxlan->key.tos;
+
+	/* Complete pre_tunnel action. */
+	pre_tun->ipv4_dst = vxlan->key.u.ipv4.dst;
 
 	return 0;
 }
@@ -123,8 +226,11 @@ nfp_fl_output(struct nfp_fl_output *output, const struct tc_action *action,
 static int
 nfp_flower_loop_action(const struct tc_action *a,
 		       struct nfp_fl_payload *nfp_fl, int *a_len,
-		       struct net_device *netdev)
+		       struct net_device *netdev,
+		       enum nfp_flower_tun_type *tun_type, int *tun_out_cnt)
 {
+	struct nfp_fl_pre_tunnel *pre_tun;
+	struct nfp_fl_set_vxlan *s_vxl;
 	struct nfp_fl_push_vlan *psh_v;
 	struct nfp_fl_pop_vlan *pop_v;
 	struct nfp_fl_output *output;
@@ -137,7 +243,8 @@ nfp_flower_loop_action(const struct tc_action *a,
 			return -EOPNOTSUPP;
 
 		output = (struct nfp_fl_output *)&nfp_fl->action_data[*a_len];
-		err = nfp_fl_output(output, a, nfp_fl, true, netdev);
+		err = nfp_fl_output(output, a, nfp_fl, true, netdev, *tun_type,
+				    tun_out_cnt);
 		if (err)
 			return err;
 
@@ -147,7 +254,8 @@ nfp_flower_loop_action(const struct tc_action *a,
 			return -EOPNOTSUPP;
 
 		output = (struct nfp_fl_output *)&nfp_fl->action_data[*a_len];
-		err = nfp_fl_output(output, a, nfp_fl, false, netdev);
+		err = nfp_fl_output(output, a, nfp_fl, false, netdev, *tun_type,
+				    tun_out_cnt);
 		if (err)
 			return err;
 
@@ -170,6 +278,29 @@ nfp_flower_loop_action(const struct tc_action *a,
 
 		nfp_fl_push_vlan(psh_v, a);
 		*a_len += sizeof(struct nfp_fl_push_vlan);
+	} else if (is_tcf_tunnel_set(a) && nfp_fl_supported_tun_port(a)) {
+		/* Pre-tunnel action is required for tunnel encap.
+		 * This checks for next hop entries on NFP.
+		 * If none, the packet falls back before applying other actions.
+		 */
+		if (*a_len + sizeof(struct nfp_fl_pre_tunnel) +
+		    sizeof(struct nfp_fl_set_vxlan) > NFP_FL_MAX_A_SIZ)
+			return -EOPNOTSUPP;
+
+		*tun_type = NFP_FL_TUNNEL_VXLAN;
+		pre_tun = nfp_fl_pre_tunnel(nfp_fl->action_data, *a_len);
+		nfp_fl->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_NULL);
+		*a_len += sizeof(struct nfp_fl_pre_tunnel);
+
+		s_vxl = (struct nfp_fl_set_vxlan *)&nfp_fl->action_data[*a_len];
+		err = nfp_fl_set_vxlan(s_vxl, a, pre_tun);
+		if (err)
+			return err;
+
+		*a_len += sizeof(struct nfp_fl_set_vxlan);
+	} else if (is_tcf_tunnel_release(a)) {
+		/* Tunnel decap is handled by default so accept action. */
+		return 0;
 	} else {
 		/* Currently we do not handle any other actions. */
 		return -EOPNOTSUPP;
@@ -182,18 +313,22 @@ int nfp_flower_compile_action(struct tc_cls_flower_offload *flow,
 			      struct net_device *netdev,
 			      struct nfp_fl_payload *nfp_flow)
 {
-	int act_len, act_cnt, err;
+	int act_len, act_cnt, err, tun_out_cnt;
+	enum nfp_flower_tun_type tun_type;
 	const struct tc_action *a;
 	LIST_HEAD(actions);
 
 	memset(nfp_flow->action_data, 0, NFP_FL_MAX_A_SIZ);
 	nfp_flow->meta.act_len = 0;
+	tun_type = NFP_FL_TUNNEL_NONE;
 	act_len = 0;
 	act_cnt = 0;
+	tun_out_cnt = 0;
 
 	tcf_exts_to_list(flow->exts, &actions);
 	list_for_each_entry(a, &actions, list) {
-		err = nfp_flower_loop_action(a, nfp_flow, &act_len, netdev);
+		err = nfp_flower_loop_action(a, nfp_flow, &act_len, netdev,
+					     &tun_type, &tun_out_cnt);
 		if (err)
 			return err;
 		act_cnt++;
diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
index af9165b3b652..ff42ce8a1e9c 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
@@ -67,10 +67,12 @@
 #define NFP_FL_LW_SIZ			2
 
 /* Action opcodes */
-#define NFP_FL_ACTION_OPCODE_OUTPUT	0
-#define NFP_FL_ACTION_OPCODE_PUSH_VLAN	1
-#define NFP_FL_ACTION_OPCODE_POP_VLAN	2
-#define NFP_FL_ACTION_OPCODE_NUM	32
+#define NFP_FL_ACTION_OPCODE_OUTPUT		0
+#define NFP_FL_ACTION_OPCODE_PUSH_VLAN		1
+#define NFP_FL_ACTION_OPCODE_POP_VLAN		2
+#define NFP_FL_ACTION_OPCODE_SET_IPV4_TUNNEL	6
+#define NFP_FL_ACTION_OPCODE_PRE_TUNNEL		17
+#define NFP_FL_ACTION_OPCODE_NUM		32
 
 #define NFP_FL_ACT_JMP_ID		GENMASK(15, 8)
 #define NFP_FL_ACT_LEN_LW		GENMASK(7, 0)
@@ -85,6 +87,8 @@
 
 /* Tunnel ports */
 #define NFP_FL_PORT_TYPE_TUN		0x50000000
+#define NFP_FL_IPV4_TUNNEL_TYPE		GENMASK(7, 4)
+#define NFP_FL_IPV4_PRE_TUN_INDEX	GENMASK(2, 0)
 
 enum nfp_flower_tun_type {
 	NFP_FL_TUNNEL_NONE =	0,
@@ -123,6 +127,25 @@ struct nfp_flower_meta_one {
 	u16 reserved;
 };
 
+struct nfp_fl_pre_tunnel {
+	__be16 a_op;
+	__be16 reserved;
+	__be32 ipv4_dst;
+	/* reserved for use with IPv6 addresses */
+	__be32 extra[3];
+};
+
+struct nfp_fl_set_vxlan {
+	__be16 a_op;
+	__be16 reserved;
+	__be64 tun_id;
+	__be32 tun_type_index;
+	__be16 tun_flags;
+	u8 ipv4_ttl;
+	u8 ipv4_tos;
+	__be32 extra[2];
+} __packed;
+
 /* Metadata with L2 (1W/4B)
  * ----------------------------------------------------------------
  *    3                   2                   1
-- 
2.1.4

^ permalink raw reply related

* [PATCH net-next 4/7] nfp: offload flower vxlan endpoint MAC addresses
From: Simon Horman @ 2017-09-25 10:23 UTC (permalink / raw)
  To: David Miller, Jakub Kicinski
  Cc: netdev, oss-drivers, John Hurley, Simon Horman
In-Reply-To: <1506335021-32024-1-git-send-email-simon.horman@netronome.com>

From: John Hurley <john.hurley@netronome.com>

Generate a list of MAC addresses of netdevs that could be used as VXLAN
tunnel end points. Give offloaded MACs an index for storage on the NFP in
the ranges:
0x100-0x1ff physical port representors
0x200-0x2ff VF port representors
0x300-0x3ff other offloads (e.g. vxlan netdevs, ovs bridges)

Assign phys and vf indexes based on unique 8 bit values in the port num.
Maintain list of other netdevs to ensure same netdev is not offloaded
twice and each gets a unique ID without exhausting the entries. Because
the IDs are unique but constant for a netdev, any changes are implemented
by overwriting the index on NFP.

Signed-off-by: John Hurley <john.hurley@netronome.com>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
---
 drivers/net/ethernet/netronome/nfp/Makefile        |   3 +-
 drivers/net/ethernet/netronome/nfp/flower/cmsg.c   |   7 -
 drivers/net/ethernet/netronome/nfp/flower/cmsg.h   |   9 +
 drivers/net/ethernet/netronome/nfp/flower/main.c   |  13 +
 drivers/net/ethernet/netronome/nfp/flower/main.h   |  18 +
 drivers/net/ethernet/netronome/nfp/flower/match.c  |   7 +
 .../ethernet/netronome/nfp/flower/tunnel_conf.c    | 374 +++++++++++++++++++++
 7 files changed, 423 insertions(+), 8 deletions(-)
 create mode 100644 drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c

diff --git a/drivers/net/ethernet/netronome/nfp/Makefile b/drivers/net/ethernet/netronome/nfp/Makefile
index 96e579a15cbe..becaacf1554d 100644
--- a/drivers/net/ethernet/netronome/nfp/Makefile
+++ b/drivers/net/ethernet/netronome/nfp/Makefile
@@ -37,7 +37,8 @@ nfp-objs += \
 	    flower/main.o \
 	    flower/match.o \
 	    flower/metadata.o \
-	    flower/offload.o
+	    flower/offload.o \
+	    flower/tunnel_conf.o
 endif
 
 ifeq ($(CONFIG_BPF_SYSCALL),y)
diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c
index c3ca05d10fe1..b756006dba6f 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c
@@ -38,17 +38,10 @@
 #include <net/dst_metadata.h>
 
 #include "main.h"
-#include "../nfpcore/nfp_cpp.h"
 #include "../nfp_net.h"
 #include "../nfp_net_repr.h"
 #include "./cmsg.h"
 
-#define nfp_flower_cmsg_warn(app, fmt, args...)				\
-	do {								\
-		if (net_ratelimit())					\
-			nfp_warn((app)->cpp, fmt, ## args);		\
-	} while (0)
-
 static struct nfp_flower_cmsg_hdr *
 nfp_flower_cmsg_get_hdr(struct sk_buff *skb)
 {
diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
index ff42ce8a1e9c..dc248193c996 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
@@ -39,6 +39,7 @@
 #include <linux/types.h>
 
 #include "../nfp_app.h"
+#include "../nfpcore/nfp_cpp.h"
 
 #define NFP_FLOWER_LAYER_META		BIT(0)
 #define NFP_FLOWER_LAYER_PORT		BIT(1)
@@ -90,6 +91,12 @@
 #define NFP_FL_IPV4_TUNNEL_TYPE		GENMASK(7, 4)
 #define NFP_FL_IPV4_PRE_TUN_INDEX	GENMASK(2, 0)
 
+#define nfp_flower_cmsg_warn(app, fmt, args...)                         \
+	do {                                                            \
+		if (net_ratelimit())                                    \
+			nfp_warn((app)->cpp, fmt, ## args);             \
+	} while (0)
+
 enum nfp_flower_tun_type {
 	NFP_FL_TUNNEL_NONE =	0,
 	NFP_FL_TUNNEL_VXLAN =	2,
@@ -310,6 +317,7 @@ enum nfp_flower_cmsg_type_port {
 	NFP_FLOWER_CMSG_TYPE_FLOW_DEL =		2,
 	NFP_FLOWER_CMSG_TYPE_MAC_REPR =		7,
 	NFP_FLOWER_CMSG_TYPE_PORT_MOD =		8,
+	NFP_FLOWER_CMSG_TYPE_TUN_MAC =		11,
 	NFP_FLOWER_CMSG_TYPE_FLOW_STATS =	15,
 	NFP_FLOWER_CMSG_TYPE_PORT_ECHO =	16,
 	NFP_FLOWER_CMSG_TYPE_MAX =		32,
@@ -343,6 +351,7 @@ enum nfp_flower_cmsg_port_type {
 	NFP_FLOWER_CMSG_PORT_TYPE_UNSPEC =	0x0,
 	NFP_FLOWER_CMSG_PORT_TYPE_PHYS_PORT =	0x1,
 	NFP_FLOWER_CMSG_PORT_TYPE_PCIE_PORT =	0x2,
+	NFP_FLOWER_CMSG_PORT_TYPE_OTHER_PORT =  0x3,
 };
 
 enum nfp_flower_cmsg_port_vnic_type {
diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.c b/drivers/net/ethernet/netronome/nfp/flower/main.c
index 91fe03617106..e46e7c60d491 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/main.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.c
@@ -436,6 +436,16 @@ static void nfp_flower_clean(struct nfp_app *app)
 	app->priv = NULL;
 }
 
+static int nfp_flower_start(struct nfp_app *app)
+{
+	return nfp_tunnel_config_start(app);
+}
+
+static void nfp_flower_stop(struct nfp_app *app)
+{
+	nfp_tunnel_config_stop(app);
+}
+
 const struct nfp_app_type app_flower = {
 	.id		= NFP_APP_FLOWER_NIC,
 	.name		= "flower",
@@ -453,6 +463,9 @@ const struct nfp_app_type app_flower = {
 	.repr_open	= nfp_flower_repr_netdev_open,
 	.repr_stop	= nfp_flower_repr_netdev_stop,
 
+	.start		= nfp_flower_start,
+	.stop		= nfp_flower_stop,
+
 	.ctrl_msg_rx	= nfp_flower_cmsg_rx,
 
 	.sriov_enable	= nfp_flower_sriov_enable,
diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h
index cd695eabce02..9de375acc254 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/main.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.h
@@ -84,6 +84,13 @@ struct nfp_fl_stats_id {
  * @flow_table:		Hash table used to store flower rules
  * @cmsg_work:		Workqueue for control messages processing
  * @cmsg_skbs:		List of skbs for control message processing
+ * @nfp_mac_off_list:	List of MAC addresses to offload
+ * @nfp_mac_index_list:	List of unique 8-bit indexes for non NFP netdevs
+ * @nfp_mac_off_lock:	Lock for the MAC address list
+ * @nfp_mac_index_lock:	Lock for the MAC index list
+ * @nfp_mac_off_ids:	IDA to manage id assignment for offloaded macs
+ * @nfp_mac_off_count:	Number of MACs in address list
+ * @nfp_tun_mac_nb:	Notifier to monitor link state
  */
 struct nfp_flower_priv {
 	struct nfp_app *app;
@@ -96,6 +103,13 @@ struct nfp_flower_priv {
 	DECLARE_HASHTABLE(flow_table, NFP_FLOWER_HASH_BITS);
 	struct work_struct cmsg_work;
 	struct sk_buff_head cmsg_skbs;
+	struct list_head nfp_mac_off_list;
+	struct list_head nfp_mac_index_list;
+	struct mutex nfp_mac_off_lock;
+	struct mutex nfp_mac_index_lock;
+	struct ida nfp_mac_off_ids;
+	int nfp_mac_off_count;
+	struct notifier_block nfp_tun_mac_nb;
 };
 
 struct nfp_fl_key_ls {
@@ -165,4 +179,8 @@ nfp_flower_remove_fl_table(struct nfp_app *app, unsigned long tc_flower_cookie);
 
 void nfp_flower_rx_flow_stats(struct nfp_app *app, struct sk_buff *skb);
 
+int nfp_tunnel_config_start(struct nfp_app *app);
+void nfp_tunnel_config_stop(struct nfp_app *app);
+void nfp_tunnel_write_macs(struct nfp_app *app);
+
 #endif
diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c
index 1fd1bab0611f..cb3ff6c126e8 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/match.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/match.c
@@ -232,6 +232,7 @@ int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow,
 				  struct nfp_fl_payload *nfp_flow)
 {
 	enum nfp_flower_tun_type tun_type = NFP_FL_TUNNEL_NONE;
+	struct nfp_repr *netdev_repr;
 	int err;
 	u8 *ext;
 	u8 *msk;
@@ -341,6 +342,12 @@ int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow,
 					 flow, true);
 		ext += sizeof(struct nfp_flower_vxlan);
 		msk += sizeof(struct nfp_flower_vxlan);
+
+		/* Configure tunnel end point MAC. */
+		if (nfp_netdev_is_nfp_repr(netdev)) {
+			netdev_repr = netdev_priv(netdev);
+			nfp_tunnel_write_macs(netdev_repr->app);
+		}
 	}
 
 	return 0;
diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
new file mode 100644
index 000000000000..34be85803020
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
@@ -0,0 +1,374 @@
+/*
+ * Copyright (C) 2017 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/etherdevice.h>
+#include <linux/idr.h>
+#include <net/dst_metadata.h>
+
+#include "cmsg.h"
+#include "main.h"
+#include "../nfp_net_repr.h"
+#include "../nfp_net.h"
+
+/**
+ * struct nfp_tun_mac_addr - configure MAC address of tunnel EP on NFP
+ * @reserved:	reserved for future use
+ * @count:	number of MAC addresses in the message
+ * @index:	index of MAC address in the lookup table
+ * @addr:	interface MAC address
+ * @addresses:	series of MACs to offload
+ */
+struct nfp_tun_mac_addr {
+	__be16 reserved;
+	__be16 count;
+	struct index_mac_addr {
+		__be16 index;
+		u8 addr[ETH_ALEN];
+	} addresses[];
+};
+
+/**
+ * struct nfp_tun_mac_offload_entry - list of MACs to offload
+ * @index:	index of MAC address for offloading
+ * @addr:	interface MAC address
+ * @list:	list pointer
+ */
+struct nfp_tun_mac_offload_entry {
+	__be16 index;
+	u8 addr[ETH_ALEN];
+	struct list_head list;
+};
+
+#define NFP_MAX_MAC_INDEX       0xff
+
+/**
+ * struct nfp_tun_mac_non_nfp_idx - converts non NFP netdev ifindex to 8-bit id
+ * @ifindex:	netdev ifindex of the device
+ * @index:	index of netdevs mac on NFP
+ * @list:	list pointer
+ */
+struct nfp_tun_mac_non_nfp_idx {
+	int ifindex;
+	u8 index;
+	struct list_head list;
+};
+
+static bool nfp_tun_is_netdev_to_offload(struct net_device *netdev)
+{
+	if (!netdev->rtnl_link_ops)
+		return false;
+	if (!strcmp(netdev->rtnl_link_ops->kind, "openvswitch"))
+		return true;
+	if (!strcmp(netdev->rtnl_link_ops->kind, "vxlan"))
+		return true;
+
+	return false;
+}
+
+static int
+nfp_flower_xmit_tun_conf(struct nfp_app *app, u8 mtype, u16 plen, void *pdata)
+{
+	struct sk_buff *skb;
+	unsigned char *msg;
+
+	skb = nfp_flower_cmsg_alloc(app, plen, mtype);
+	if (!skb)
+		return -ENOMEM;
+
+	msg = nfp_flower_cmsg_get_data(skb);
+	memcpy(msg, pdata, nfp_flower_cmsg_get_data_len(skb));
+
+	nfp_ctrl_tx(app->ctrl, skb);
+	return 0;
+}
+
+void nfp_tunnel_write_macs(struct nfp_app *app)
+{
+	struct nfp_flower_priv *priv = app->priv;
+	struct nfp_tun_mac_offload_entry *entry;
+	struct nfp_tun_mac_addr *payload;
+	struct list_head *ptr, *storage;
+	int mac_count, err, pay_size;
+
+	mutex_lock(&priv->nfp_mac_off_lock);
+	if (!priv->nfp_mac_off_count) {
+		mutex_unlock(&priv->nfp_mac_off_lock);
+		return;
+	}
+
+	pay_size = sizeof(struct nfp_tun_mac_addr) +
+		   sizeof(struct index_mac_addr) * priv->nfp_mac_off_count;
+
+	payload = kzalloc(pay_size, GFP_KERNEL);
+	if (!payload) {
+		mutex_unlock(&priv->nfp_mac_off_lock);
+		return;
+	}
+
+	payload->count = cpu_to_be16(priv->nfp_mac_off_count);
+
+	mac_count = 0;
+	list_for_each_safe(ptr, storage, &priv->nfp_mac_off_list) {
+		entry = list_entry(ptr, struct nfp_tun_mac_offload_entry,
+				   list);
+		payload->addresses[mac_count].index = entry->index;
+		ether_addr_copy(payload->addresses[mac_count].addr,
+				entry->addr);
+		mac_count++;
+	}
+
+	err = nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_MAC,
+				       pay_size, payload);
+
+	kfree(payload);
+
+	if (err) {
+		mutex_unlock(&priv->nfp_mac_off_lock);
+		/* Write failed so retain list for future retry. */
+		return;
+	}
+
+	/* If list was successfully offloaded, flush it. */
+	list_for_each_safe(ptr, storage, &priv->nfp_mac_off_list) {
+		entry = list_entry(ptr, struct nfp_tun_mac_offload_entry,
+				   list);
+		list_del(&entry->list);
+		kfree(entry);
+	}
+
+	priv->nfp_mac_off_count = 0;
+	mutex_unlock(&priv->nfp_mac_off_lock);
+}
+
+static int nfp_tun_get_mac_idx(struct nfp_app *app, int ifindex)
+{
+	struct nfp_flower_priv *priv = app->priv;
+	struct nfp_tun_mac_non_nfp_idx *entry;
+	struct list_head *ptr, *storage;
+	int idx;
+
+	mutex_lock(&priv->nfp_mac_index_lock);
+	list_for_each_safe(ptr, storage, &priv->nfp_mac_index_list) {
+		entry = list_entry(ptr, struct nfp_tun_mac_non_nfp_idx, list);
+		if (entry->ifindex == ifindex) {
+			idx = entry->index;
+			mutex_unlock(&priv->nfp_mac_index_lock);
+			return idx;
+		}
+	}
+
+	idx = ida_simple_get(&priv->nfp_mac_off_ids, 0,
+			     NFP_MAX_MAC_INDEX, GFP_KERNEL);
+	if (idx < 0) {
+		mutex_unlock(&priv->nfp_mac_index_lock);
+		return idx;
+	}
+
+	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry) {
+		mutex_unlock(&priv->nfp_mac_index_lock);
+		return -ENOMEM;
+	}
+	entry->ifindex = ifindex;
+	entry->index = idx;
+	list_add_tail(&entry->list, &priv->nfp_mac_index_list);
+	mutex_unlock(&priv->nfp_mac_index_lock);
+
+	return idx;
+}
+
+static void nfp_tun_del_mac_idx(struct nfp_app *app, int ifindex)
+{
+	struct nfp_flower_priv *priv = app->priv;
+	struct nfp_tun_mac_non_nfp_idx *entry;
+	struct list_head *ptr, *storage;
+
+	mutex_lock(&priv->nfp_mac_index_lock);
+	list_for_each_safe(ptr, storage, &priv->nfp_mac_index_list) {
+		entry = list_entry(ptr, struct nfp_tun_mac_non_nfp_idx, list);
+		if (entry->ifindex == ifindex) {
+			ida_simple_remove(&priv->nfp_mac_off_ids,
+					  entry->index);
+			list_del(&entry->list);
+			kfree(entry);
+			break;
+		}
+	}
+	mutex_unlock(&priv->nfp_mac_index_lock);
+}
+
+static void nfp_tun_add_to_mac_offload_list(struct net_device *netdev,
+					    struct nfp_app *app)
+{
+	struct nfp_flower_priv *priv = app->priv;
+	struct nfp_tun_mac_offload_entry *entry;
+	u16 nfp_mac_idx;
+	int port = 0;
+
+	/* Check if MAC should be offloaded. */
+	if (!is_valid_ether_addr(netdev->dev_addr))
+		return;
+
+	if (nfp_netdev_is_nfp_repr(netdev))
+		port = nfp_repr_get_port_id(netdev);
+	else if (!nfp_tun_is_netdev_to_offload(netdev))
+		return;
+
+	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry) {
+		nfp_flower_cmsg_warn(app, "Mem fail when offloading MAC.\n");
+		return;
+	}
+
+	if (FIELD_GET(NFP_FLOWER_CMSG_PORT_TYPE, port) ==
+	    NFP_FLOWER_CMSG_PORT_TYPE_PHYS_PORT) {
+		nfp_mac_idx = port << 8 | NFP_FLOWER_CMSG_PORT_TYPE_PHYS_PORT;
+	} else if (FIELD_GET(NFP_FLOWER_CMSG_PORT_TYPE, port) ==
+		   NFP_FLOWER_CMSG_PORT_TYPE_PCIE_PORT) {
+		port = FIELD_GET(NFP_FLOWER_CMSG_PORT_VNIC, port);
+		nfp_mac_idx = port << 8 | NFP_FLOWER_CMSG_PORT_TYPE_PCIE_PORT;
+	} else {
+		/* Must assign our own unique 8-bit index. */
+		int idx = nfp_tun_get_mac_idx(app, netdev->ifindex);
+
+		if (idx < 0) {
+			nfp_flower_cmsg_warn(app, "Can't assign non-repr MAC index.\n");
+			kfree(entry);
+			return;
+		}
+		nfp_mac_idx = idx << 8 | NFP_FLOWER_CMSG_PORT_TYPE_OTHER_PORT;
+	}
+
+	entry->index = cpu_to_be16(nfp_mac_idx);
+	ether_addr_copy(entry->addr, netdev->dev_addr);
+
+	mutex_lock(&priv->nfp_mac_off_lock);
+	priv->nfp_mac_off_count++;
+	list_add_tail(&entry->list, &priv->nfp_mac_off_list);
+	mutex_unlock(&priv->nfp_mac_off_lock);
+}
+
+static int nfp_tun_mac_event_handler(struct notifier_block *nb,
+				     unsigned long event, void *ptr)
+{
+	struct nfp_flower_priv *app_priv;
+	struct net_device *netdev;
+	struct nfp_app *app;
+
+	if (event == NETDEV_DOWN || event == NETDEV_UNREGISTER) {
+		app_priv = container_of(nb, struct nfp_flower_priv,
+					nfp_tun_mac_nb);
+		app = app_priv->app;
+		netdev = netdev_notifier_info_to_dev(ptr);
+
+		/* If non-nfp netdev then free its offload index. */
+		if (nfp_tun_is_netdev_to_offload(netdev))
+			nfp_tun_del_mac_idx(app, netdev->ifindex);
+	} else if (event == NETDEV_UP || event == NETDEV_CHANGEADDR ||
+		   event == NETDEV_REGISTER) {
+		app_priv = container_of(nb, struct nfp_flower_priv,
+					nfp_tun_mac_nb);
+		app = app_priv->app;
+		netdev = netdev_notifier_info_to_dev(ptr);
+
+		nfp_tun_add_to_mac_offload_list(netdev, app);
+
+		/* Force a list write to keep NFP up to date. */
+		nfp_tunnel_write_macs(app);
+	}
+	return NOTIFY_OK;
+}
+
+int nfp_tunnel_config_start(struct nfp_app *app)
+{
+	struct nfp_flower_priv *priv = app->priv;
+	struct net_device *netdev;
+	int err;
+
+	/* Initialise priv data for MAC offloading. */
+	priv->nfp_mac_off_count = 0;
+	mutex_init(&priv->nfp_mac_off_lock);
+	INIT_LIST_HEAD(&priv->nfp_mac_off_list);
+	priv->nfp_tun_mac_nb.notifier_call = nfp_tun_mac_event_handler;
+	mutex_init(&priv->nfp_mac_index_lock);
+	INIT_LIST_HEAD(&priv->nfp_mac_index_list);
+	ida_init(&priv->nfp_mac_off_ids);
+
+	err = register_netdevice_notifier(&priv->nfp_tun_mac_nb);
+	if (err)
+		goto err_free_mac_ida;
+
+	/* Parse netdevs already registered for MACs that need offloaded. */
+	rtnl_lock();
+	for_each_netdev(&init_net, netdev)
+		nfp_tun_add_to_mac_offload_list(netdev, app);
+	rtnl_unlock();
+
+	return 0;
+
+err_free_mac_ida:
+	ida_destroy(&priv->nfp_mac_off_ids);
+	return err;
+}
+
+void nfp_tunnel_config_stop(struct nfp_app *app)
+{
+	struct nfp_tun_mac_offload_entry *mac_entry;
+	struct nfp_flower_priv *priv = app->priv;
+	struct nfp_tun_mac_non_nfp_idx *mac_idx;
+	struct list_head *ptr, *storage;
+
+	unregister_netdevice_notifier(&priv->nfp_tun_mac_nb);
+
+	/* Free any memory that may be occupied by MAC list. */
+	mutex_lock(&priv->nfp_mac_off_lock);
+	list_for_each_safe(ptr, storage, &priv->nfp_mac_off_list) {
+		mac_entry = list_entry(ptr, struct nfp_tun_mac_offload_entry,
+				       list);
+		list_del(&mac_entry->list);
+		kfree(mac_entry);
+	}
+	mutex_unlock(&priv->nfp_mac_off_lock);
+
+	/* Free any memory that may be occupied by MAC index list. */
+	mutex_lock(&priv->nfp_mac_index_lock);
+	list_for_each_safe(ptr, storage, &priv->nfp_mac_index_list) {
+		mac_idx = list_entry(ptr, struct nfp_tun_mac_non_nfp_idx,
+				     list);
+		list_del(&mac_idx->list);
+		kfree(mac_idx);
+	}
+	mutex_unlock(&priv->nfp_mac_index_lock);
+
+	ida_destroy(&priv->nfp_mac_off_ids);
+}
-- 
2.1.4

^ permalink raw reply related

* [PATCH net-next 5/7] nfp: offload vxlan IPv4 endpoints of flower rules
From: Simon Horman @ 2017-09-25 10:23 UTC (permalink / raw)
  To: David Miller, Jakub Kicinski
  Cc: netdev, oss-drivers, John Hurley, Simon Horman
In-Reply-To: <1506335021-32024-1-git-send-email-simon.horman@netronome.com>

From: John Hurley <john.hurley@netronome.com>

Maintain a list of IPv4 addresses used as the tunnel destination IP match
fields in currently active flower rules. Offload the entire list of
NFP_FL_IPV4_ADDRS_MAX (even if some are unused) when new IPs are added or
removed. The NFP should only be aware of tunnel end points that are
currently used by rules on the device

Signed-off-by: John Hurley <john.hurley@netronome.com>
Reviewed-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
---
 drivers/net/ethernet/netronome/nfp/flower/cmsg.h   |   1 +
 drivers/net/ethernet/netronome/nfp/flower/main.h   |   7 ++
 drivers/net/ethernet/netronome/nfp/flower/match.c  |  14 ++-
 .../net/ethernet/netronome/nfp/flower/offload.c    |   4 +
 .../ethernet/netronome/nfp/flower/tunnel_conf.c    | 120 +++++++++++++++++++++
 5 files changed, 143 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
index dc248193c996..6540bb1ceefb 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
@@ -318,6 +318,7 @@ enum nfp_flower_cmsg_type_port {
 	NFP_FLOWER_CMSG_TYPE_MAC_REPR =		7,
 	NFP_FLOWER_CMSG_TYPE_PORT_MOD =		8,
 	NFP_FLOWER_CMSG_TYPE_TUN_MAC =		11,
+	NFP_FLOWER_CMSG_TYPE_TUN_IPS =		14,
 	NFP_FLOWER_CMSG_TYPE_FLOW_STATS =	15,
 	NFP_FLOWER_CMSG_TYPE_PORT_ECHO =	16,
 	NFP_FLOWER_CMSG_TYPE_MAX =		32,
diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h
index 9de375acc254..53306af6cfe8 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/main.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.h
@@ -86,8 +86,10 @@ struct nfp_fl_stats_id {
  * @cmsg_skbs:		List of skbs for control message processing
  * @nfp_mac_off_list:	List of MAC addresses to offload
  * @nfp_mac_index_list:	List of unique 8-bit indexes for non NFP netdevs
+ * @nfp_ipv4_off_list:	List of IPv4 addresses to offload
  * @nfp_mac_off_lock:	Lock for the MAC address list
  * @nfp_mac_index_lock:	Lock for the MAC index list
+ * @nfp_ipv4_off_lock:	Lock for the IPv4 address list
  * @nfp_mac_off_ids:	IDA to manage id assignment for offloaded macs
  * @nfp_mac_off_count:	Number of MACs in address list
  * @nfp_tun_mac_nb:	Notifier to monitor link state
@@ -105,8 +107,10 @@ struct nfp_flower_priv {
 	struct sk_buff_head cmsg_skbs;
 	struct list_head nfp_mac_off_list;
 	struct list_head nfp_mac_index_list;
+	struct list_head nfp_ipv4_off_list;
 	struct mutex nfp_mac_off_lock;
 	struct mutex nfp_mac_index_lock;
+	struct mutex nfp_ipv4_off_lock;
 	struct ida nfp_mac_off_ids;
 	int nfp_mac_off_count;
 	struct notifier_block nfp_tun_mac_nb;
@@ -142,6 +146,7 @@ struct nfp_fl_payload {
 	struct rcu_head rcu;
 	spinlock_t lock; /* lock stats */
 	struct nfp_fl_stats stats;
+	__be32 nfp_tun_ipv4_addr;
 	char *unmasked_data;
 	char *mask_data;
 	char *action_data;
@@ -182,5 +187,7 @@ void nfp_flower_rx_flow_stats(struct nfp_app *app, struct sk_buff *skb);
 int nfp_tunnel_config_start(struct nfp_app *app);
 void nfp_tunnel_config_stop(struct nfp_app *app);
 void nfp_tunnel_write_macs(struct nfp_app *app);
+void nfp_tunnel_del_ipv4_off(struct nfp_app *app, __be32 ipv4);
+void nfp_tunnel_add_ipv4_off(struct nfp_app *app, __be32 ipv4);
 
 #endif
diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c
index cb3ff6c126e8..865a815ab92a 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/match.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/match.c
@@ -195,7 +195,7 @@ nfp_flower_compile_ipv6(struct nfp_flower_ipv6 *frame,
 static void
 nfp_flower_compile_vxlan(struct nfp_flower_vxlan *frame,
 			 struct tc_cls_flower_offload *flow,
-			 bool mask_version)
+			 bool mask_version, __be32 *tun_dst)
 {
 	struct fl_flow_key *target = mask_version ? flow->mask : flow->key;
 	struct flow_dissector_key_ipv4_addrs *vxlan_ips;
@@ -223,6 +223,7 @@ nfp_flower_compile_vxlan(struct nfp_flower_vxlan *frame,
 					     target);
 		frame->ip_src = vxlan_ips->src;
 		frame->ip_dst = vxlan_ips->dst;
+		*tun_dst = vxlan_ips->dst;
 	}
 }
 
@@ -232,6 +233,7 @@ int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow,
 				  struct nfp_fl_payload *nfp_flow)
 {
 	enum nfp_flower_tun_type tun_type = NFP_FL_TUNNEL_NONE;
+	__be32 tun_dst, tun_dst_mask = 0;
 	struct nfp_repr *netdev_repr;
 	int err;
 	u8 *ext;
@@ -336,10 +338,10 @@ int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow,
 	if (key_ls->key_layer & NFP_FLOWER_LAYER_VXLAN) {
 		/* Populate Exact VXLAN Data. */
 		nfp_flower_compile_vxlan((struct nfp_flower_vxlan *)ext,
-					 flow, false);
+					 flow, false, &tun_dst);
 		/* Populate Mask VXLAN Data. */
 		nfp_flower_compile_vxlan((struct nfp_flower_vxlan *)msk,
-					 flow, true);
+					 flow, true, &tun_dst_mask);
 		ext += sizeof(struct nfp_flower_vxlan);
 		msk += sizeof(struct nfp_flower_vxlan);
 
@@ -347,6 +349,12 @@ int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow,
 		if (nfp_netdev_is_nfp_repr(netdev)) {
 			netdev_repr = netdev_priv(netdev);
 			nfp_tunnel_write_macs(netdev_repr->app);
+
+			/* Store the tunnel destination in the rule data.
+			 * This must be present and be an exact match.
+			 */
+			nfp_flow->nfp_tun_ipv4_addr = tun_dst;
+			nfp_tunnel_add_ipv4_off(netdev_repr->app, tun_dst);
 		}
 	}
 
diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c
index 637372ba8f55..3d9537ebdea4 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c
@@ -306,6 +306,7 @@ nfp_flower_allocate_new(struct nfp_fl_key_ls *key_layer)
 	if (!flow_pay->action_data)
 		goto err_free_mask;
 
+	flow_pay->nfp_tun_ipv4_addr = 0;
 	flow_pay->meta.flags = 0;
 	spin_lock_init(&flow_pay->lock);
 
@@ -415,6 +416,9 @@ nfp_flower_del_offload(struct nfp_app *app, struct net_device *netdev,
 	if (err)
 		goto err_free_flow;
 
+	if (nfp_flow->nfp_tun_ipv4_addr)
+		nfp_tunnel_del_ipv4_off(app, nfp_flow->nfp_tun_ipv4_addr);
+
 	err = nfp_flower_xmit_flow(netdev, nfp_flow,
 				   NFP_FLOWER_CMSG_TYPE_FLOW_DEL);
 	if (err)
diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
index 34be85803020..185505140f5e 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
@@ -32,6 +32,7 @@
  */
 
 #include <linux/etherdevice.h>
+#include <linux/inetdevice.h>
 #include <linux/idr.h>
 #include <net/dst_metadata.h>
 
@@ -40,6 +41,30 @@
 #include "../nfp_net_repr.h"
 #include "../nfp_net.h"
 
+#define NFP_FL_IPV4_ADDRS_MAX        32
+
+/**
+ * struct nfp_tun_ipv4_addr - set the IP address list on the NFP
+ * @count:	number of IPs populated in the array
+ * @ipv4_addr:	array of IPV4_ADDRS_MAX 32 bit IPv4 addresses
+ */
+struct nfp_tun_ipv4_addr {
+	__be32 count;
+	__be32 ipv4_addr[NFP_FL_IPV4_ADDRS_MAX];
+};
+
+/**
+ * struct nfp_ipv4_addr_entry - cached IPv4 addresses
+ * @ipv4_addr:	IP address
+ * @ref_count:	number of rules currently using this IP
+ * @list:	list pointer
+ */
+struct nfp_ipv4_addr_entry {
+	__be32 ipv4_addr;
+	int ref_count;
+	struct list_head list;
+};
+
 /**
  * struct nfp_tun_mac_addr - configure MAC address of tunnel EP on NFP
  * @reserved:	reserved for future use
@@ -112,6 +137,87 @@ nfp_flower_xmit_tun_conf(struct nfp_app *app, u8 mtype, u16 plen, void *pdata)
 	return 0;
 }
 
+static void nfp_tun_write_ipv4_list(struct nfp_app *app)
+{
+	struct nfp_flower_priv *priv = app->priv;
+	struct nfp_ipv4_addr_entry *entry;
+	struct nfp_tun_ipv4_addr payload;
+	struct list_head *ptr, *storage;
+	int count;
+
+	memset(&payload, 0, sizeof(struct nfp_tun_ipv4_addr));
+	mutex_lock(&priv->nfp_ipv4_off_lock);
+	count = 0;
+	list_for_each_safe(ptr, storage, &priv->nfp_ipv4_off_list) {
+		if (count >= NFP_FL_IPV4_ADDRS_MAX) {
+			mutex_unlock(&priv->nfp_ipv4_off_lock);
+			nfp_flower_cmsg_warn(app, "IPv4 offload exceeds limit.\n");
+			return;
+		}
+		entry = list_entry(ptr, struct nfp_ipv4_addr_entry, list);
+		payload.ipv4_addr[count++] = entry->ipv4_addr;
+	}
+	payload.count = cpu_to_be32(count);
+	mutex_unlock(&priv->nfp_ipv4_off_lock);
+
+	nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_IPS,
+				 sizeof(struct nfp_tun_ipv4_addr),
+				 &payload);
+}
+
+void nfp_tunnel_add_ipv4_off(struct nfp_app *app, __be32 ipv4)
+{
+	struct nfp_flower_priv *priv = app->priv;
+	struct nfp_ipv4_addr_entry *entry;
+	struct list_head *ptr, *storage;
+
+	mutex_lock(&priv->nfp_ipv4_off_lock);
+	list_for_each_safe(ptr, storage, &priv->nfp_ipv4_off_list) {
+		entry = list_entry(ptr, struct nfp_ipv4_addr_entry, list);
+		if (entry->ipv4_addr == ipv4) {
+			entry->ref_count++;
+			mutex_unlock(&priv->nfp_ipv4_off_lock);
+			return;
+		}
+	}
+
+	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry) {
+		mutex_unlock(&priv->nfp_ipv4_off_lock);
+		nfp_flower_cmsg_warn(app, "Mem error when offloading IP address.\n");
+		return;
+	}
+	entry->ipv4_addr = ipv4;
+	entry->ref_count = 1;
+	list_add_tail(&entry->list, &priv->nfp_ipv4_off_list);
+	mutex_unlock(&priv->nfp_ipv4_off_lock);
+
+	nfp_tun_write_ipv4_list(app);
+}
+
+void nfp_tunnel_del_ipv4_off(struct nfp_app *app, __be32 ipv4)
+{
+	struct nfp_flower_priv *priv = app->priv;
+	struct nfp_ipv4_addr_entry *entry;
+	struct list_head *ptr, *storage;
+
+	mutex_lock(&priv->nfp_ipv4_off_lock);
+	list_for_each_safe(ptr, storage, &priv->nfp_ipv4_off_list) {
+		entry = list_entry(ptr, struct nfp_ipv4_addr_entry, list);
+		if (entry->ipv4_addr == ipv4) {
+			entry->ref_count--;
+			if (!entry->ref_count) {
+				list_del(&entry->list);
+				kfree(entry);
+			}
+			break;
+		}
+	}
+	mutex_unlock(&priv->nfp_ipv4_off_lock);
+
+	nfp_tun_write_ipv4_list(app);
+}
+
 void nfp_tunnel_write_macs(struct nfp_app *app)
 {
 	struct nfp_flower_priv *priv = app->priv;
@@ -324,6 +430,10 @@ int nfp_tunnel_config_start(struct nfp_app *app)
 	INIT_LIST_HEAD(&priv->nfp_mac_index_list);
 	ida_init(&priv->nfp_mac_off_ids);
 
+	/* Initialise priv data for IPv4 offloading. */
+	mutex_init(&priv->nfp_ipv4_off_lock);
+	INIT_LIST_HEAD(&priv->nfp_ipv4_off_list);
+
 	err = register_netdevice_notifier(&priv->nfp_tun_mac_nb);
 	if (err)
 		goto err_free_mac_ida;
@@ -346,6 +456,7 @@ void nfp_tunnel_config_stop(struct nfp_app *app)
 	struct nfp_tun_mac_offload_entry *mac_entry;
 	struct nfp_flower_priv *priv = app->priv;
 	struct nfp_tun_mac_non_nfp_idx *mac_idx;
+	struct nfp_ipv4_addr_entry *ip_entry;
 	struct list_head *ptr, *storage;
 
 	unregister_netdevice_notifier(&priv->nfp_tun_mac_nb);
@@ -371,4 +482,13 @@ void nfp_tunnel_config_stop(struct nfp_app *app)
 	mutex_unlock(&priv->nfp_mac_index_lock);
 
 	ida_destroy(&priv->nfp_mac_off_ids);
+
+	/* Free any memory that may be occupied by ipv4 list. */
+	mutex_lock(&priv->nfp_ipv4_off_lock);
+	list_for_each_safe(ptr, storage, &priv->nfp_ipv4_off_list) {
+		ip_entry = list_entry(ptr, struct nfp_ipv4_addr_entry, list);
+		list_del(&ip_entry->list);
+		kfree(ip_entry);
+	}
+	mutex_unlock(&priv->nfp_ipv4_off_lock);
 }
-- 
2.1.4

^ permalink raw reply related

* [PATCH net-next 6/7] nfp: flower vxlan neighbour offload
From: Simon Horman @ 2017-09-25 10:23 UTC (permalink / raw)
  To: David Miller, Jakub Kicinski
  Cc: netdev, oss-drivers, John Hurley, Simon Horman
In-Reply-To: <1506335021-32024-1-git-send-email-simon.horman@netronome.com>

From: John Hurley <john.hurley@netronome.com>

Receive a request when the NFP does not know the next hop for a packet
that is to be encapsulated in a VXLAN tunnel. Do a route lookup, determine
the next hop entry and update neighbour table on NFP. Monitor the kernel
neighbour table for link changes and update NFP with relevant information.
Overwrite routes with zero values on the NFP when they expire.

Signed-off-by: John Hurley <john.hurley@netronome.com>
Reviewed-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
---
 drivers/net/ethernet/netronome/nfp/flower/cmsg.c   |   6 +
 drivers/net/ethernet/netronome/nfp/flower/cmsg.h   |   2 +
 drivers/net/ethernet/netronome/nfp/flower/main.h   |   7 +
 .../ethernet/netronome/nfp/flower/tunnel_conf.c    | 253 +++++++++++++++++++++
 4 files changed, 268 insertions(+)

diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c
index b756006dba6f..862787daaa68 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c
@@ -181,6 +181,12 @@ nfp_flower_cmsg_process_one_rx(struct nfp_app *app, struct sk_buff *skb)
 	case NFP_FLOWER_CMSG_TYPE_FLOW_STATS:
 		nfp_flower_rx_flow_stats(app, skb);
 		break;
+	case NFP_FLOWER_CMSG_TYPE_NO_NEIGH:
+		nfp_tunnel_request_route(app, skb);
+		break;
+	case NFP_FLOWER_CMSG_TYPE_TUN_NEIGH:
+		/* Acks from the NFP that the route is added - ignore. */
+		break;
 	default:
 		nfp_flower_cmsg_warn(app, "Cannot handle invalid repr control type %u\n",
 				     type);
diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
index 6540bb1ceefb..1dc72a1ed577 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
@@ -317,7 +317,9 @@ enum nfp_flower_cmsg_type_port {
 	NFP_FLOWER_CMSG_TYPE_FLOW_DEL =		2,
 	NFP_FLOWER_CMSG_TYPE_MAC_REPR =		7,
 	NFP_FLOWER_CMSG_TYPE_PORT_MOD =		8,
+	NFP_FLOWER_CMSG_TYPE_NO_NEIGH =		10,
 	NFP_FLOWER_CMSG_TYPE_TUN_MAC =		11,
+	NFP_FLOWER_CMSG_TYPE_TUN_NEIGH =	13,
 	NFP_FLOWER_CMSG_TYPE_TUN_IPS =		14,
 	NFP_FLOWER_CMSG_TYPE_FLOW_STATS =	15,
 	NFP_FLOWER_CMSG_TYPE_PORT_ECHO =	16,
diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h
index 53306af6cfe8..93ad969c3653 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/main.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.h
@@ -87,12 +87,15 @@ struct nfp_fl_stats_id {
  * @nfp_mac_off_list:	List of MAC addresses to offload
  * @nfp_mac_index_list:	List of unique 8-bit indexes for non NFP netdevs
  * @nfp_ipv4_off_list:	List of IPv4 addresses to offload
+ * @nfp_neigh_off_list:	List of neighbour offloads
  * @nfp_mac_off_lock:	Lock for the MAC address list
  * @nfp_mac_index_lock:	Lock for the MAC index list
  * @nfp_ipv4_off_lock:	Lock for the IPv4 address list
+ * @nfp_neigh_off_lock:	Lock for the neighbour address list
  * @nfp_mac_off_ids:	IDA to manage id assignment for offloaded macs
  * @nfp_mac_off_count:	Number of MACs in address list
  * @nfp_tun_mac_nb:	Notifier to monitor link state
+ * @nfp_tun_neigh_nb:	Notifier to monitor neighbour state
  */
 struct nfp_flower_priv {
 	struct nfp_app *app;
@@ -108,12 +111,15 @@ struct nfp_flower_priv {
 	struct list_head nfp_mac_off_list;
 	struct list_head nfp_mac_index_list;
 	struct list_head nfp_ipv4_off_list;
+	struct list_head nfp_neigh_off_list;
 	struct mutex nfp_mac_off_lock;
 	struct mutex nfp_mac_index_lock;
 	struct mutex nfp_ipv4_off_lock;
+	struct mutex nfp_neigh_off_lock;
 	struct ida nfp_mac_off_ids;
 	int nfp_mac_off_count;
 	struct notifier_block nfp_tun_mac_nb;
+	struct notifier_block nfp_tun_neigh_nb;
 };
 
 struct nfp_fl_key_ls {
@@ -189,5 +195,6 @@ void nfp_tunnel_config_stop(struct nfp_app *app);
 void nfp_tunnel_write_macs(struct nfp_app *app);
 void nfp_tunnel_del_ipv4_off(struct nfp_app *app, __be32 ipv4);
 void nfp_tunnel_add_ipv4_off(struct nfp_app *app, __be32 ipv4);
+void nfp_tunnel_request_route(struct nfp_app *app, struct sk_buff *skb);
 
 #endif
diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
index 185505140f5e..8c6b88a1306b 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
@@ -33,6 +33,7 @@
 
 #include <linux/etherdevice.h>
 #include <linux/inetdevice.h>
+#include <net/netevent.h>
 #include <linux/idr.h>
 #include <net/dst_metadata.h>
 
@@ -41,6 +42,44 @@
 #include "../nfp_net_repr.h"
 #include "../nfp_net.h"
 
+/**
+ * struct nfp_tun_neigh - neighbour/route entry on the NFP
+ * @dst_ipv4:	destination IPv4 address
+ * @src_ipv4:	source IPv4 address
+ * @dst_addr:	destination MAC address
+ * @src_addr:	source MAC address
+ * @port_id:	NFP port to output packet on - associated with source IPv4
+ */
+struct nfp_tun_neigh {
+	__be32 dst_ipv4;
+	__be32 src_ipv4;
+	u8 dst_addr[ETH_ALEN];
+	u8 src_addr[ETH_ALEN];
+	__be32 port_id;
+};
+
+/**
+ * struct nfp_tun_req_route_ipv4 - NFP requests a route/neighbour lookup
+ * @ingress_port:	ingress port of packet that signalled request
+ * @ipv4_addr:		destination ipv4 address for route
+ * @reserved:		reserved for future use
+ */
+struct nfp_tun_req_route_ipv4 {
+	__be32 ingress_port;
+	__be32 ipv4_addr;
+	__be32 reserved[2];
+};
+
+/**
+ * struct nfp_ipv4_route_entry - routes that are offloaded to the NFP
+ * @ipv4_addr:	destination of route
+ * @list:	list pointer
+ */
+struct nfp_ipv4_route_entry {
+	__be32 ipv4_addr;
+	struct list_head list;
+};
+
 #define NFP_FL_IPV4_ADDRS_MAX        32
 
 /**
@@ -137,6 +176,197 @@ nfp_flower_xmit_tun_conf(struct nfp_app *app, u8 mtype, u16 plen, void *pdata)
 	return 0;
 }
 
+static bool nfp_tun_has_route(struct nfp_app *app, __be32 ipv4_addr)
+{
+	struct nfp_flower_priv *priv = app->priv;
+	struct nfp_ipv4_route_entry *entry;
+	struct list_head *ptr, *storage;
+
+	mutex_lock(&priv->nfp_neigh_off_lock);
+	list_for_each_safe(ptr, storage, &priv->nfp_neigh_off_list) {
+		entry = list_entry(ptr, struct nfp_ipv4_route_entry, list);
+		if (entry->ipv4_addr == ipv4_addr) {
+			mutex_unlock(&priv->nfp_neigh_off_lock);
+			return true;
+		}
+	}
+	mutex_unlock(&priv->nfp_neigh_off_lock);
+	return false;
+}
+
+static void nfp_tun_add_route_to_cache(struct nfp_app *app, __be32 ipv4_addr)
+{
+	struct nfp_flower_priv *priv = app->priv;
+	struct nfp_ipv4_route_entry *entry;
+	struct list_head *ptr, *storage;
+
+	mutex_lock(&priv->nfp_neigh_off_lock);
+	list_for_each_safe(ptr, storage, &priv->nfp_neigh_off_list) {
+		entry = list_entry(ptr, struct nfp_ipv4_route_entry, list);
+		if (entry->ipv4_addr == ipv4_addr) {
+			mutex_unlock(&priv->nfp_neigh_off_lock);
+			return;
+		}
+	}
+	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry) {
+		mutex_unlock(&priv->nfp_neigh_off_lock);
+		nfp_flower_cmsg_warn(app, "Mem error when storing new route.\n");
+		return;
+	}
+
+	entry->ipv4_addr = ipv4_addr;
+	list_add_tail(&entry->list, &priv->nfp_neigh_off_list);
+	mutex_unlock(&priv->nfp_neigh_off_lock);
+}
+
+static void nfp_tun_del_route_from_cache(struct nfp_app *app, __be32 ipv4_addr)
+{
+	struct nfp_flower_priv *priv = app->priv;
+	struct nfp_ipv4_route_entry *entry;
+	struct list_head *ptr, *storage;
+
+	mutex_lock(&priv->nfp_neigh_off_lock);
+	list_for_each_safe(ptr, storage, &priv->nfp_neigh_off_list) {
+		entry = list_entry(ptr, struct nfp_ipv4_route_entry, list);
+		if (entry->ipv4_addr == ipv4_addr) {
+			list_del(&entry->list);
+			kfree(entry);
+			break;
+		}
+	}
+	mutex_unlock(&priv->nfp_neigh_off_lock);
+}
+
+static void
+nfp_tun_write_neigh(struct net_device *netdev, struct nfp_app *app,
+		    struct flowi4 *flow, struct neighbour *neigh)
+{
+	struct nfp_tun_neigh payload;
+
+	/* Only offload representor IPv4s for now. */
+	if (!nfp_netdev_is_nfp_repr(netdev))
+		return;
+
+	memset(&payload, 0, sizeof(struct nfp_tun_neigh));
+	payload.dst_ipv4 = flow->daddr;
+
+	/* If entry has expired send dst IP with all other fields 0. */
+	if (!(neigh->nud_state & NUD_VALID)) {
+		nfp_tun_del_route_from_cache(app, payload.dst_ipv4);
+		/* Trigger ARP to verify invalid neighbour state. */
+		neigh_event_send(neigh, NULL);
+		goto send_msg;
+	}
+
+	/* Have a valid neighbour so populate rest of entry. */
+	payload.src_ipv4 = flow->saddr;
+	ether_addr_copy(payload.src_addr, netdev->dev_addr);
+	neigh_ha_snapshot(payload.dst_addr, neigh, netdev);
+	payload.port_id = cpu_to_be32(nfp_repr_get_port_id(netdev));
+	/* Add destination of new route to NFP cache. */
+	nfp_tun_add_route_to_cache(app, payload.dst_ipv4);
+
+send_msg:
+	nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_NEIGH,
+				 sizeof(struct nfp_tun_neigh),
+				 (unsigned char *)&payload);
+}
+
+static int
+nfp_tun_neigh_event_handler(struct notifier_block *nb, unsigned long event,
+			    void *ptr)
+{
+	struct nfp_flower_priv *app_priv;
+	struct netevent_redirect *redir;
+	struct flowi4 flow = {};
+	struct neighbour *n;
+	struct nfp_app *app;
+	struct rtable *rt;
+	int err;
+
+	switch (event) {
+	case NETEVENT_REDIRECT:
+		redir = (struct netevent_redirect *)ptr;
+		n = redir->neigh;
+		break;
+	case NETEVENT_NEIGH_UPDATE:
+		n = (struct neighbour *)ptr;
+		break;
+	default:
+		return NOTIFY_DONE;
+	}
+
+	flow.daddr = *(__be32 *)n->primary_key;
+
+	/* Only concerned with route changes for representors. */
+	if (!nfp_netdev_is_nfp_repr(n->dev))
+		return NOTIFY_DONE;
+
+	app_priv = container_of(nb, struct nfp_flower_priv, nfp_tun_neigh_nb);
+	app = app_priv->app;
+
+	/* Only concerned with changes to routes already added to NFP. */
+	if (!nfp_tun_has_route(app, flow.daddr))
+		return NOTIFY_DONE;
+
+#if IS_ENABLED(CONFIG_INET)
+	/* Do a route lookup to populate flow data. */
+	rt = ip_route_output_key(dev_net(n->dev), &flow);
+	err = PTR_ERR_OR_ZERO(rt);
+	if (err)
+		return NOTIFY_DONE;
+#else
+	return NOTIFY_DONE;
+#endif
+
+	flow.flowi4_proto = IPPROTO_UDP;
+	nfp_tun_write_neigh(n->dev, app, &flow, n);
+
+	return NOTIFY_OK;
+}
+
+void nfp_tunnel_request_route(struct nfp_app *app, struct sk_buff *skb)
+{
+	struct nfp_tun_req_route_ipv4 *payload;
+	struct net_device *netdev;
+	struct flowi4 flow = {};
+	struct neighbour *n;
+	struct rtable *rt;
+	int err;
+
+	payload = nfp_flower_cmsg_get_data(skb);
+
+	netdev = nfp_app_repr_get(app, be32_to_cpu(payload->ingress_port));
+	if (!netdev)
+		goto route_fail_warning;
+
+	flow.daddr = payload->ipv4_addr;
+	flow.flowi4_proto = IPPROTO_UDP;
+
+#if IS_ENABLED(CONFIG_INET)
+	/* Do a route lookup on same namespace as ingress port. */
+	rt = ip_route_output_key(dev_net(netdev), &flow);
+	err = PTR_ERR_OR_ZERO(rt);
+	if (err)
+		goto route_fail_warning;
+#else
+	goto route_fail_warning;
+#endif
+
+	/* Get the neighbour entry for the lookup */
+	n = dst_neigh_lookup(&rt->dst, &flow.daddr);
+	ip_rt_put(rt);
+	if (!n)
+		goto route_fail_warning;
+	nfp_tun_write_neigh(n->dev, app, &flow, n);
+	neigh_release(n);
+	return;
+
+route_fail_warning:
+	nfp_flower_cmsg_warn(app, "Requested route not found.\n");
+}
+
 static void nfp_tun_write_ipv4_list(struct nfp_app *app)
 {
 	struct nfp_flower_priv *priv = app->priv;
@@ -434,10 +664,19 @@ int nfp_tunnel_config_start(struct nfp_app *app)
 	mutex_init(&priv->nfp_ipv4_off_lock);
 	INIT_LIST_HEAD(&priv->nfp_ipv4_off_list);
 
+	/* Initialise priv data for neighbour offloading. */
+	mutex_init(&priv->nfp_neigh_off_lock);
+	INIT_LIST_HEAD(&priv->nfp_neigh_off_list);
+	priv->nfp_tun_neigh_nb.notifier_call = nfp_tun_neigh_event_handler;
+
 	err = register_netdevice_notifier(&priv->nfp_tun_mac_nb);
 	if (err)
 		goto err_free_mac_ida;
 
+	err = register_netevent_notifier(&priv->nfp_tun_neigh_nb);
+	if (err)
+		goto err_unreg_mac_nb;
+
 	/* Parse netdevs already registered for MACs that need offloaded. */
 	rtnl_lock();
 	for_each_netdev(&init_net, netdev)
@@ -446,6 +685,8 @@ int nfp_tunnel_config_start(struct nfp_app *app)
 
 	return 0;
 
+err_unreg_mac_nb:
+	unregister_netdevice_notifier(&priv->nfp_tun_mac_nb);
 err_free_mac_ida:
 	ida_destroy(&priv->nfp_mac_off_ids);
 	return err;
@@ -455,11 +696,13 @@ void nfp_tunnel_config_stop(struct nfp_app *app)
 {
 	struct nfp_tun_mac_offload_entry *mac_entry;
 	struct nfp_flower_priv *priv = app->priv;
+	struct nfp_ipv4_route_entry *route_entry;
 	struct nfp_tun_mac_non_nfp_idx *mac_idx;
 	struct nfp_ipv4_addr_entry *ip_entry;
 	struct list_head *ptr, *storage;
 
 	unregister_netdevice_notifier(&priv->nfp_tun_mac_nb);
+	unregister_netevent_notifier(&priv->nfp_tun_neigh_nb);
 
 	/* Free any memory that may be occupied by MAC list. */
 	mutex_lock(&priv->nfp_mac_off_lock);
@@ -491,4 +734,14 @@ void nfp_tunnel_config_stop(struct nfp_app *app)
 		kfree(ip_entry);
 	}
 	mutex_unlock(&priv->nfp_ipv4_off_lock);
+
+	/* Free any memory that may be occupied by the route list. */
+	mutex_lock(&priv->nfp_neigh_off_lock);
+	list_for_each_safe(ptr, storage, &priv->nfp_neigh_off_list) {
+		route_entry = list_entry(ptr, struct nfp_ipv4_route_entry,
+					 list);
+		list_del(&route_entry->list);
+		kfree(route_entry);
+	}
+	mutex_unlock(&priv->nfp_neigh_off_lock);
 }
-- 
2.1.4

^ permalink raw reply related

* [PATCH net-next 7/7] nfp: flower vxlan neighbour keep-alive
From: Simon Horman @ 2017-09-25 10:23 UTC (permalink / raw)
  To: David Miller, Jakub Kicinski
  Cc: netdev, oss-drivers, John Hurley, Simon Horman
In-Reply-To: <1506335021-32024-1-git-send-email-simon.horman@netronome.com>

From: John Hurley <john.hurley@netronome.com>

Periodically receive messages containing the destination IPs of tunnels
that have recently forwarded traffic. Update the neighbour entries 'used'
value for these IPs next hop.

This prevents the neighbour entry from expiring on timeout but rather
signals an ARP to verify the connection. From an NFP perspective, packets
will not fall back mid-flow unless the link is verified to be down.

Signed-off-by: John Hurley <john.hurley@netronome.com>
Reviewed-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
---
 drivers/net/ethernet/netronome/nfp/flower/cmsg.c   |  3 +
 drivers/net/ethernet/netronome/nfp/flower/cmsg.h   |  1 +
 drivers/net/ethernet/netronome/nfp/flower/main.h   |  1 +
 .../ethernet/netronome/nfp/flower/tunnel_conf.c    | 64 ++++++++++++++++++++++
 4 files changed, 69 insertions(+)

diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c
index 862787daaa68..6b71c719deba 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c
@@ -184,6 +184,9 @@ nfp_flower_cmsg_process_one_rx(struct nfp_app *app, struct sk_buff *skb)
 	case NFP_FLOWER_CMSG_TYPE_NO_NEIGH:
 		nfp_tunnel_request_route(app, skb);
 		break;
+	case NFP_FLOWER_CMSG_TYPE_ACTIVE_TUNS:
+		nfp_tunnel_keep_alive(app, skb);
+		break;
 	case NFP_FLOWER_CMSG_TYPE_TUN_NEIGH:
 		/* Acks from the NFP that the route is added - ignore. */
 		break;
diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
index 1dc72a1ed577..504ddaa21701 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
@@ -319,6 +319,7 @@ enum nfp_flower_cmsg_type_port {
 	NFP_FLOWER_CMSG_TYPE_PORT_MOD =		8,
 	NFP_FLOWER_CMSG_TYPE_NO_NEIGH =		10,
 	NFP_FLOWER_CMSG_TYPE_TUN_MAC =		11,
+	NFP_FLOWER_CMSG_TYPE_ACTIVE_TUNS =	12,
 	NFP_FLOWER_CMSG_TYPE_TUN_NEIGH =	13,
 	NFP_FLOWER_CMSG_TYPE_TUN_IPS =		14,
 	NFP_FLOWER_CMSG_TYPE_FLOW_STATS =	15,
diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h
index 93ad969c3653..12c319a219d8 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/main.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.h
@@ -196,5 +196,6 @@ void nfp_tunnel_write_macs(struct nfp_app *app);
 void nfp_tunnel_del_ipv4_off(struct nfp_app *app, __be32 ipv4);
 void nfp_tunnel_add_ipv4_off(struct nfp_app *app, __be32 ipv4);
 void nfp_tunnel_request_route(struct nfp_app *app, struct sk_buff *skb);
+void nfp_tunnel_keep_alive(struct nfp_app *app, struct sk_buff *skb);
 
 #endif
diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
index 8c6b88a1306b..c495f8f38506 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
@@ -36,12 +36,36 @@
 #include <net/netevent.h>
 #include <linux/idr.h>
 #include <net/dst_metadata.h>
+#include <net/arp.h>
 
 #include "cmsg.h"
 #include "main.h"
 #include "../nfp_net_repr.h"
 #include "../nfp_net.h"
 
+#define NFP_FL_MAX_ROUTES               32
+
+/**
+ * struct nfp_tun_active_tuns - periodic message of active tunnels
+ * @seq:		sequence number of the message
+ * @count:		number of tunnels report in message
+ * @flags:		options part of the request
+ * @ipv4:		dest IPv4 address of active route
+ * @egress_port:	port the encapsulated packet egressed
+ * @extra:		reserved for future use
+ * @tun_info:		tunnels that have sent traffic in reported period
+ */
+struct nfp_tun_active_tuns {
+	__be32 seq;
+	__be32 count;
+	__be32 flags;
+	struct route_ip_info {
+		__be32 ipv4;
+		__be32 egress_port;
+		__be32 extra[2];
+	} tun_info[];
+};
+
 /**
  * struct nfp_tun_neigh - neighbour/route entry on the NFP
  * @dst_ipv4:	destination IPv4 address
@@ -147,6 +171,46 @@ struct nfp_tun_mac_non_nfp_idx {
 	struct list_head list;
 };
 
+void nfp_tunnel_keep_alive(struct nfp_app *app, struct sk_buff *skb)
+{
+	struct nfp_tun_active_tuns *payload;
+	struct net_device *netdev;
+	int count, i, pay_len;
+	struct neighbour *n;
+	__be32 ipv4_addr;
+	u32 port;
+
+	payload = nfp_flower_cmsg_get_data(skb);
+	count = be32_to_cpu(payload->count);
+	if (count > NFP_FL_MAX_ROUTES) {
+		nfp_flower_cmsg_warn(app, "Tunnel keep-alive request exceeds max routes.\n");
+		return;
+	}
+
+	pay_len = nfp_flower_cmsg_get_data_len(skb);
+	if (pay_len != sizeof(struct nfp_tun_active_tuns) +
+	    sizeof(struct route_ip_info) * count) {
+		nfp_flower_cmsg_warn(app, "Corruption in tunnel keep-alive message.\n");
+		return;
+	}
+
+	for (i = 0; i < count; i++) {
+		ipv4_addr = payload->tun_info[i].ipv4;
+		port = be32_to_cpu(payload->tun_info[i].egress_port);
+		netdev = nfp_app_repr_get(app, port);
+		if (!netdev)
+			continue;
+
+		n = neigh_lookup(&arp_tbl, &ipv4_addr, netdev);
+		if (!n)
+			continue;
+
+		/* Update the used timestamp of neighbour */
+		neigh_event_send(n, NULL);
+		neigh_release(n);
+	}
+}
+
 static bool nfp_tun_is_netdev_to_offload(struct net_device *netdev)
 {
 	if (!netdev->rtnl_link_ops)
-- 
2.1.4

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox