Netdev List
 help / color / mirror / Atom feed
* [patch net-next 6/8] mlxsw: spectrum_dpipe: Add support for IPv6 host table dump
From: Jiri Pirko @ 2017-08-30 12:03 UTC (permalink / raw)
  To: netdev; +Cc: davem, arkadis, idosch, mlxsw
In-Reply-To: <20170830120306.6128-1-jiri@resnulli.us>

From: Arkadi Sharshevsky <arkadis@mellanox.com>

Add support for IPv6 host table dump.

Signed-off-by: Arkadi Sharshevsky <arkadis@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
---
 .../net/ethernet/mellanox/mlxsw/spectrum_dpipe.c   | 75 ++++++++++++++++++++--
 1 file changed, 70 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
index 5924e97..75da2ef 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
@@ -386,8 +386,19 @@ mlxsw_sp_dpipe_table_host_match_action_prepare(struct devlink_dpipe_match *match
 
 	match = &matches[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_DIP];
 	match->type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT;
-	match->header = &devlink_dpipe_header_ipv4;
-	match->field_id = DEVLINK_DPIPE_FIELD_IPV4_DST_IP;
+	switch (type) {
+	case AF_INET:
+		match->header = &devlink_dpipe_header_ipv4;
+		match->field_id = DEVLINK_DPIPE_FIELD_IPV4_DST_IP;
+		break;
+	case AF_INET6:
+		match->header = &devlink_dpipe_header_ipv6;
+		match->field_id = DEVLINK_DPIPE_FIELD_IPV6_DST_IP;
+		break;
+	default:
+		WARN_ON(1);
+		return;
+	}
 
 	action->type = DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY;
 	action->header = &devlink_dpipe_header_ethernet;
@@ -424,7 +435,18 @@ mlxsw_sp_dpipe_table_host_entry_prepare(struct devlink_dpipe_entry *entry,
 	match_value = &match_values[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_DIP];
 
 	match_value->match = match;
-	match_value->value_size = sizeof(u32);
+	switch (type) {
+	case AF_INET:
+		match_value->value_size = sizeof(u32);
+		break;
+	case AF_INET6:
+		match_value->value_size = sizeof(struct in6_addr);
+		break;
+	default:
+		WARN_ON(1);
+		return -EINVAL;
+	}
+
 	match_value->value = kmalloc(match_value->value_size, GFP_KERNEL);
 	if (!match_value->value)
 		return -ENOMEM;
@@ -479,6 +501,20 @@ mlxsw_sp_dpipe_table_host4_entry_fill(struct devlink_dpipe_entry *entry,
 }
 
 static void
+mlxsw_sp_dpipe_table_host6_entry_fill(struct devlink_dpipe_entry *entry,
+				      struct mlxsw_sp_neigh_entry *neigh_entry,
+				      struct mlxsw_sp_rif *rif)
+{
+	struct in6_addr *dip;
+	unsigned char *ha;
+
+	ha = mlxsw_sp_neigh_entry_ha(neigh_entry);
+	dip = mlxsw_sp_neigh6_entry_dip(neigh_entry);
+
+	__mlxsw_sp_dpipe_table_host_entry_fill(entry, rif, ha, dip);
+}
+
+static void
 mlxsw_sp_dpipe_table_host_entry_fill(struct mlxsw_sp *mlxsw_sp,
 				     struct devlink_dpipe_entry *entry,
 				     struct mlxsw_sp_neigh_entry *neigh_entry,
@@ -487,7 +523,18 @@ mlxsw_sp_dpipe_table_host_entry_fill(struct mlxsw_sp *mlxsw_sp,
 {
 	int err;
 
-	mlxsw_sp_dpipe_table_host4_entry_fill(entry, neigh_entry, rif);
+	switch (type) {
+	case AF_INET:
+		mlxsw_sp_dpipe_table_host4_entry_fill(entry, neigh_entry, rif);
+		break;
+	case AF_INET6:
+		mlxsw_sp_dpipe_table_host6_entry_fill(entry, neigh_entry, rif);
+		break;
+	default:
+		WARN_ON(1);
+		return;
+	}
+
 	err = mlxsw_sp_neigh_counter_get(mlxsw_sp, neigh_entry,
 					 &entry->counter);
 	if (!err)
@@ -526,7 +573,13 @@ mlxsw_sp_dpipe_table_host_entries_get(struct mlxsw_sp *mlxsw_sp,
 
 		rif_neigh_count = 0;
 		mlxsw_sp_rif_neigh_for_each(neigh_entry, rif) {
-			if (mlxsw_sp_neigh_entry_type(neigh_entry) != type)
+			int neigh_type = mlxsw_sp_neigh_entry_type(neigh_entry);
+
+			if (neigh_type != type)
+				continue;
+
+			if (neigh_type == AF_INET6 &&
+			    mlxsw_sp_neigh_ipv6_ignore(neigh_entry))
 				continue;
 
 			if (rif_neigh_count < rif_neigh_skip)
@@ -714,6 +767,17 @@ mlxsw_sp_dpipe_table_host6_matches_dump(void *priv, struct sk_buff *skb)
 	return mlxsw_sp_dpipe_table_host_matches_dump(skb, AF_INET6);
 }
 
+static int
+mlxsw_sp_dpipe_table_host6_entries_dump(void *priv, bool counters_enabled,
+					struct devlink_dpipe_dump_ctx *dump_ctx)
+{
+	struct mlxsw_sp *mlxsw_sp = priv;
+
+	return mlxsw_sp_dpipe_table_host_entries_dump(mlxsw_sp,
+						      counters_enabled,
+						      dump_ctx, AF_INET6);
+}
+
 static u64 mlxsw_sp_dpipe_table_host6_size_get(void *priv)
 {
 	struct mlxsw_sp *mlxsw_sp = priv;
@@ -724,6 +788,7 @@ static u64 mlxsw_sp_dpipe_table_host6_size_get(void *priv)
 static struct devlink_dpipe_table_ops mlxsw_sp_host6_ops = {
 	.matches_dump = mlxsw_sp_dpipe_table_host6_matches_dump,
 	.actions_dump = mlxsw_sp_dpipe_table_host_actions_dump,
+	.entries_dump = mlxsw_sp_dpipe_table_host6_entries_dump,
 	.size_get = mlxsw_sp_dpipe_table_host6_size_get,
 };
 
-- 
2.9.3

^ permalink raw reply related

* [patch net-next 7/8] mlxsw: spectrum_router: Add support for setting counters on IPv6 neighbors
From: Jiri Pirko @ 2017-08-30 12:03 UTC (permalink / raw)
  To: netdev; +Cc: davem, arkadis, idosch, mlxsw
In-Reply-To: <20170830120306.6128-1-jiri@resnulli.us>

From: Arkadi Sharshevsky <arkadis@mellanox.com>

Add support for setting counters on IPv6 neighbors based on dpipe's host6
table counter status.

Signed-off-by: Arkadi Sharshevsky <arkadis@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
---
 .../net/ethernet/mellanox/mlxsw/spectrum_router.c  | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index db57c0c..0cf6810 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -1008,21 +1008,33 @@ mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp,
 }
 
 static bool
-mlxsw_sp_neigh4_counter_should_alloc(struct mlxsw_sp *mlxsw_sp)
+mlxsw_sp_neigh_counter_should_alloc(struct mlxsw_sp *mlxsw_sp,
+				    struct mlxsw_sp_neigh_entry *neigh_entry)
 {
 	struct devlink *devlink;
+	const char *table_name;
+
+	switch (mlxsw_sp_neigh_entry_type(neigh_entry)) {
+	case AF_INET:
+		table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST4;
+		break;
+	case AF_INET6:
+		table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST6;
+		break;
+	default:
+		WARN_ON(1);
+		return false;
+	}
 
 	devlink = priv_to_devlink(mlxsw_sp->core);
-	return devlink_dpipe_table_counter_enabled(devlink,
-						   MLXSW_SP_DPIPE_TABLE_NAME_HOST4);
+	return devlink_dpipe_table_counter_enabled(devlink, table_name);
 }
 
 static void
 mlxsw_sp_neigh_counter_alloc(struct mlxsw_sp *mlxsw_sp,
 			     struct mlxsw_sp_neigh_entry *neigh_entry)
 {
-	if (mlxsw_sp_neigh_entry_type(neigh_entry) != AF_INET ||
-	    !mlxsw_sp_neigh4_counter_should_alloc(mlxsw_sp))
+	if (!mlxsw_sp_neigh_counter_should_alloc(mlxsw_sp, neigh_entry))
 		return;
 
 	if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &neigh_entry->counter_index))
-- 
2.9.3

^ permalink raw reply related

* [patch net-next 8/8] mlxsw: spectrum_dpipe: Add support for controlling IPv6 neighbor counters
From: Jiri Pirko @ 2017-08-30 12:03 UTC (permalink / raw)
  To: netdev; +Cc: davem, arkadis, idosch, mlxsw
In-Reply-To: <20170830120306.6128-1-jiri@resnulli.us>

From: Arkadi Sharshevsky <arkadis@mellanox.com>

Add support for controlling IPv6 neighbor counters via dpipe.

Signed-off-by: Arkadi Sharshevsky <arkadis@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
index 75da2ef..51e6846 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
@@ -679,8 +679,15 @@ mlxsw_sp_dpipe_table_host_counters_update(struct mlxsw_sp *mlxsw_sp,
 		if (!rif)
 			continue;
 		mlxsw_sp_rif_neigh_for_each(neigh_entry, rif) {
-			if (mlxsw_sp_neigh_entry_type(neigh_entry) != type)
+			int neigh_type = mlxsw_sp_neigh_entry_type(neigh_entry);
+
+			if (neigh_type != type)
+				continue;
+
+			if (neigh_type == AF_INET6 &&
+			    mlxsw_sp_neigh_ipv6_ignore(neigh_entry))
 				continue;
+
 			mlxsw_sp_neigh_entry_counter_update(mlxsw_sp,
 							    neigh_entry,
 							    enable);
@@ -778,6 +785,14 @@ mlxsw_sp_dpipe_table_host6_entries_dump(void *priv, bool counters_enabled,
 						      dump_ctx, AF_INET6);
 }
 
+static int mlxsw_sp_dpipe_table_host6_counters_update(void *priv, bool enable)
+{
+	struct mlxsw_sp *mlxsw_sp = priv;
+
+	mlxsw_sp_dpipe_table_host_counters_update(mlxsw_sp, enable, AF_INET6);
+	return 0;
+}
+
 static u64 mlxsw_sp_dpipe_table_host6_size_get(void *priv)
 {
 	struct mlxsw_sp *mlxsw_sp = priv;
@@ -789,6 +804,7 @@ static struct devlink_dpipe_table_ops mlxsw_sp_host6_ops = {
 	.matches_dump = mlxsw_sp_dpipe_table_host6_matches_dump,
 	.actions_dump = mlxsw_sp_dpipe_table_host_actions_dump,
 	.entries_dump = mlxsw_sp_dpipe_table_host6_entries_dump,
+	.counters_set_update = mlxsw_sp_dpipe_table_host6_counters_update,
 	.size_get = mlxsw_sp_dpipe_table_host6_size_get,
 };
 
-- 
2.9.3

^ permalink raw reply related

* Re: [patch net-next v2 2/3] net/sched: Change cls_flower to use IDR
From: Jamal Hadi Salim @ 2017-08-30 12:09 UTC (permalink / raw)
  To: Chris Mi, netdev; +Cc: xiyou.wangcong, jiri, davem, mawilcox
In-Reply-To: <1504074719-15147-3-git-send-email-chrism@mellanox.com>

On 17-08-30 02:31 AM, Chris Mi wrote:
> Currently, all filters with the same priority are linked in a doubly
> linked list. Every filter should have a unique handle. To make the
> handle unique, we need to iterate the list every time to see if the
> handle exists or not when inserting a new filter. It is time-consuming.
> For example, it takes about 5m3.169s to insert 64K rules.
> 
> This patch changes cls_flower to use IDR. With this patch, it
> takes about 0m1.127s to insert 64K rules. The improvement is huge.
> 
> But please note that in this testing, all filters share the same action.
> If every filter has a unique action, that is another bottleneck.
> Follow-up patch in this patchset addresses that.
> 
> Signed-off-by: Chris Mi <chrism@mellanox.com>
> Signed-off-by: Jiri Pirko <jiri@mellanox.com>

Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>


BTW: I'd already acked this before but you left it out in this
version.

cheers,
jamal

^ permalink raw reply

* Re: [patch net-next v2 3/3] net/sched: Change act_api and act_xxx modules to use IDR
From: Jamal Hadi Salim @ 2017-08-30 12:11 UTC (permalink / raw)
  To: Chris Mi, netdev; +Cc: xiyou.wangcong, jiri, davem, mawilcox
In-Reply-To: <1504074719-15147-4-git-send-email-chrism@mellanox.com>

On 17-08-30 02:31 AM, Chris Mi wrote:
> Typically, each TC filter has its own action. All the actions of the
> same type are saved in its hash table. But the hash buckets are too
> small that it degrades to a list. And the performance is greatly
> affected. For example, it takes about 0m11.914s to insert 64K rules.
> If we convert the hash table to IDR, it only takes about 0m1.500s.
> The improvement is huge.
> 
> But please note that the test result is based on previous patch that
> cls_flower uses IDR.
> 
> Signed-off-by: Chris Mi <chrism@mellanox.com>
> Signed-off-by: Jiri Pirko <jiri@mellanox.com>

Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>

Also already acked this before but you left it out in this
version. If you make changes to the patch then you will need
a new ACK.

Dont forget to update selftests please.

cheers,
jamal

^ permalink raw reply

* Re: [PATCH v2 2/3] dt-binding: net: sfp binding documentation
From: Sergei Shtylyov @ 2017-08-30 12:13 UTC (permalink / raw)
  To: Baruch Siach
  Cc: Rob Herring, Mark Rutland, Andrew Lunn, Florian Fainelli,
	David S. Miller, Russell King, netdev, devicetree
In-Reply-To: <20170830112557.dhcl3pv7gmuzg22v@tarshish>

On 08/30/2017 02:25 PM, Baruch Siach wrote:

>>>>> Add device-tree binding documentation SFP transceivers. Support for SFP
>>>>> transceivers has been recently introduced (drivers/net/phy/sfp.c).
>>>>>
>>>>> Signed-off-by: Baruch Siach <baruch@tkos.co.il>
>>>>> ---
>>>>> v2:
>>>>>      Rename -gpio properties to -gpios
>>>>>      Rename the rate-select-gpio property to rate-select0-gpios
>>>>>      Add the rate-select1-gpios property
>>>>>      Add examples
>>>>> ---
>>>>>     Documentation/devicetree/bindings/net/sff,sfp.txt | 74 +++++++++++++++++++++++
>>>>>     1 file changed, 74 insertions(+)
>>>>>     create mode 100644 Documentation/devicetree/bindings/net/sff,sfp.txt
>>>>>
>>>>> diff --git a/Documentation/devicetree/bindings/net/sff,sfp.txt b/Documentation/devicetree/bindings/net/sff,sfp.txt
>>>>> new file mode 100644
>>>>> index 000000000000..1d9c786d6287
>>>>> --- /dev/null
>>>>> +++ b/Documentation/devicetree/bindings/net/sff,sfp.txt
>>>>> @@ -0,0 +1,74 @@
>>>>> +Small Form Factor (SFF) Committee Small Form-factor Pluggable (SFP)
>>>>> +Transceiver
>>>>> +
>>>>> +Required properties:
>>>>> +
>>>>> +- compatible : must be "sff,sfp"
>>>>> +
>>>>> +Optional Properties:
>>>>> +
>>>>> +- i2c-bus : phandle of an I2C bus controller for the SFP two wire serial
>>>>> +  interface
>>>>> +
>>>>> +- moddef0-gpios : phandle of the MOD-DEF0 (AKA Mod_ABS) module presence input
>>>>> +  gpio signal
>>>>
>>>>      Your example shows there's GPIO phandle *and* specifier.
>>>
>>> Would "GPIO specifier" be enough here?
>>
>>     No, specifier is the cells following GPIO (or any other) phandle.
> 
> So this should be "GPIO phandle and specifier of ...", is that correct?

    Exactly. The length of the GPIO specifier is determined by the 
"#gpio-cells" prop of the GPIO node your phandle refers to.

> I have found very few (< 4) occurrences of this language in (lots of) '-gpios'
> property descriptions under Documentation/devicetree/bindings/.

    You better see Documentation/devicetree/bindings/gpio/gpio.txt.

> Is this a new
> requirement?

    Not at all, and it's not limited to GPIOs. Nobody reads the specs. :-)

> baruch

MBR, Sergei

^ permalink raw reply

* Re: [PATCH net 0/9] net/sched: init failure fixes
From: Jamal Hadi Salim @ 2017-08-30 12:15 UTC (permalink / raw)
  To: Nikolay Aleksandrov, netdev
  Cc: edumazet, xiyou.wangcong, jiri, roopa, Lucas Bates
In-Reply-To: <1504086545-7777-1-git-send-email-nikolay@cumulusnetworks.com>

On 17-08-30 05:48 AM, Nikolay Aleksandrov wrote:
> Hi all,
> I went over all qdiscs' init, destroy and reset callbacks and found the
> issues fixed in each patch. Mostly they are null pointer dereferences due
> to uninitialized timer (qdisc watchdog) or double frees due to ->destroy
> cleaning up a second time. There's more information in each patch.
> I've tested these by either sending wrong attributes from user-spaces, no
> attributes or by simulating memory alloc failure where applicable. Also
> tried all of the qdiscs as a default qdisc.
> 
> Most of these bugs were present before commit 87b60cfacf9f, I've tried to
> include proper fixes tags in each patch.
> 
> I haven't included individual patch acks in the set, I'd appreciate it if
> you take another look and resend them.
> 


Hi Nik,

For all patches:

Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>

Would you please consider adding all the the tests
you used to create the oopses in selftests? It will ensure this
embarassing bugs get caught should they ever happen again.
If you need help ping Lucas on Cc.

cheers,
jamal

^ permalink raw reply

* Re: Question about ip_defrag
From: Jesper Dangaard Brouer @ 2017-08-30 12:22 UTC (permalink / raw)
  To: Florian Westphal
  Cc: liujian (CE), davem@davemloft.net, edumazet@google.com,
	netdev@vger.kernel.org, Wangkefeng (Kevin), weiyongjun (A),
	brouer
In-Reply-To: <20170830115820.GC9993@breakpoint.cc>

On Wed, 30 Aug 2017 13:58:20 +0200
Florian Westphal <fw@strlen.de> wrote:

> Jesper Dangaard Brouer <brouer@redhat.com> wrote:
> > > I take 2) back.  Its wrong to do this, for large NR_CPU values it
> > > would even overflow.  
> > 
> > Alternatively solution 3:
> > Why do we want to maintain a (4MBytes) memory limit, across all CPUs?
> > Couldn't we just allow each CPU to have a memory limit?  
> 
> Consider ipv4, ipv6, nf ipv6 defrag, 6lowpan, and 8k cpus... This will
> render any limit useless.

With 8K CPUs I agree, that this might be a bad idea!

> > > > To me it looks like we/I have been using the wrong API for comparing
> > > > against percpu_counters.  I guess we should have used __percpu_counter_compare().    
> > > 
> > > Are you sure?  For liujian use case (64 cores) it looks like we would
> > > always fall through to percpu_counter_sum() so we eat spinlock_irqsave
> > > cost for all compares.
> > > 
> > > Before we entertain this we should consider reducing frag_percpu_counter_batch
> > > to a smaller value.  
> > 
> > Yes, I agree, we really need to lower/reduce the frag_percpu_counter_batch.
> > As you say, else the __percpu_counter_compare() call will be useless
> > (around systems with >= 32 CPUs).
> > 
> > I think the bug is in frag_mem_limit().  It just reads the global
> > counter (fbc->count), without considering other CPUs can have upto 130K
> > that haven't been subtracted yet (due to 3M low limit, become dangerous
> > at >=24 CPUs).  The  __percpu_counter_compare() does the right thing,
> > and takes into account the number of (online) CPUs and batch size, to
> > account for this.  
> 
> Right, I think we should at very least use __percpu_counter_compare
> before denying a new frag queue allocation request.
> 
> I'll create a patch.

Oh, I've already started working on a patch, that I'm testing now.  But
if you want to take the assignment then I'm fine with that!.  I just
though that it was my responsibility to fix, given I introduced
percpu_counter usage (back in 2013-01-28 / 6d7b857d541e).

-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer

^ permalink raw reply

* [PATCH net-next v7] openvswitch: enable NSH support
From: Yi Yang @ 2017-08-30 12:39 UTC (permalink / raw)
  To: netdev; +Cc: davem, dev, jbenc, e, blp, jan.scheurich, Yi Yang

v6->v7
 - Remove NSH GSO patches in v6 because Jiri Benc
   reworked it as another patch series and they have
   been merged.
 - Change it to adapt to nsh kernel module added by NSH
   GSO patch series

v5->v6
 - Fix the rest comments for v4.
 - Add NSH GSO support for VxLAN-gpe + NSH and
   Eth + NSH.

v4->v5
 - Fix many comments by Jiri Benc and Eric Garver
   for v4.

v3->v4
 - Add new NSH match field ttl
 - Update NSH header to the latest format
   which will be final format and won't change
   per its author's confirmation.
 - Fix comments for v3.

v2->v3
 - Change OVS_KEY_ATTR_NSH to nested key to handle
   length-fixed attributes and length-variable
   attriubte more flexibly.
 - Remove struct ovs_action_push_nsh completely
 - Add code to handle nested attribute for SET_MASKED
 - Change PUSH_NSH to use the nested OVS_KEY_ATTR_NSH
   to transfer NSH header data.
 - Fix comments and coding style issues by Jiri and Eric

v1->v2
 - Change encap_nsh and decap_nsh to push_nsh and pop_nsh
 - Dynamically allocate struct ovs_action_push_nsh for
   length-variable metadata.

OVS master and 2.8 branch has merged NSH userspace
patch series, this patch is to enable NSH support
in kernel data path in order that OVS can support
NSH in compat mode by porting this.

Signed-off-by: Yi Yang <yi.y.yang@intel.com>
---
 include/net/nsh.h                |   3 +
 include/uapi/linux/openvswitch.h |  28 +++
 net/nsh/nsh.c                    |  41 ++++
 net/openvswitch/actions.c        | 141 ++++++++++++++
 net/openvswitch/flow.c           |  55 ++++++
 net/openvswitch/flow.h           |  11 ++
 net/openvswitch/flow_netlink.c   | 406 ++++++++++++++++++++++++++++++++++++++-
 net/openvswitch/flow_netlink.h   |   4 +
 8 files changed, 688 insertions(+), 1 deletion(-)

diff --git a/include/net/nsh.h b/include/net/nsh.h
index a1eaea2..6c0cd57 100644
--- a/include/net/nsh.h
+++ b/include/net/nsh.h
@@ -304,4 +304,7 @@ static inline void nsh_set_flags_ttl_len(struct nshhdr *nsh, u8 flags,
 			NSH_FLAGS_MASK | NSH_TTL_MASK | NSH_LEN_MASK);
 }
 
+int skb_push_nsh(struct sk_buff *skb, const struct nshhdr *nsh_src,
+		 bool is_eth);
+
 #endif /* __NET_NSH_H */
diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index 156ee4c..91dee5b 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -333,6 +333,7 @@ enum ovs_key_attr {
 	OVS_KEY_ATTR_CT_LABELS,	/* 16-octet connection tracking label */
 	OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4,   /* struct ovs_key_ct_tuple_ipv4 */
 	OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6,   /* struct ovs_key_ct_tuple_ipv6 */
+	OVS_KEY_ATTR_NSH,       /* Nested set of ovs_nsh_key_* */
 
 #ifdef __KERNEL__
 	OVS_KEY_ATTR_TUNNEL_INFO,  /* struct ip_tunnel_info */
@@ -491,6 +492,29 @@ struct ovs_key_ct_tuple_ipv6 {
 	__u8   ipv6_proto;
 };
 
+enum ovs_nsh_key_attr {
+	OVS_NSH_KEY_ATTR_BASE,  /* struct ovs_nsh_key_base. */
+	OVS_NSH_KEY_ATTR_MD1,   /* struct ovs_nsh_key_md1. */
+	OVS_NSH_KEY_ATTR_MD2,   /* variable-length octets for MD type 2. */
+	__OVS_NSH_KEY_ATTR_MAX
+};
+
+#define OVS_NSH_KEY_ATTR_MAX (__OVS_NSH_KEY_ATTR_MAX - 1)
+
+struct ovs_nsh_key_base {
+	__u8 flags;
+	__u8 ttl;
+	__u8 mdtype;
+	__u8 np;
+	__be32 path_hdr;
+};
+
+#define NSH_MD1_CONTEXT_SIZE 4
+
+struct ovs_nsh_key_md1 {
+	__be32 context[NSH_MD1_CONTEXT_SIZE];
+};
+
 /**
  * enum ovs_flow_attr - attributes for %OVS_FLOW_* commands.
  * @OVS_FLOW_ATTR_KEY: Nested %OVS_KEY_ATTR_* attributes specifying the flow
@@ -806,6 +830,8 @@ struct ovs_action_push_eth {
  * packet.
  * @OVS_ACTION_ATTR_POP_ETH: Pop the outermost Ethernet header off the
  * packet.
+ * @OVS_ACTION_ATTR_PUSH_NSH: push NSH header to the packet.
+ * @OVS_ACTION_ATTR_POP_NSH: pop the outermost NSH header off the packet.
  *
  * Only a single header can be set with a single %OVS_ACTION_ATTR_SET.  Not all
  * fields within a header are modifiable, e.g. the IPv4 protocol and fragment
@@ -835,6 +861,8 @@ enum ovs_action_attr {
 	OVS_ACTION_ATTR_TRUNC,        /* u32 struct ovs_action_trunc. */
 	OVS_ACTION_ATTR_PUSH_ETH,     /* struct ovs_action_push_eth. */
 	OVS_ACTION_ATTR_POP_ETH,      /* No argument. */
+	OVS_ACTION_ATTR_PUSH_NSH,     /* Nested OVS_NSH_KEY_ATTR_*. */
+	OVS_ACTION_ATTR_POP_NSH,      /* No argument. */
 
 	__OVS_ACTION_ATTR_MAX,	      /* Nothing past this will be accepted
 				       * from userspace. */
diff --git a/net/nsh/nsh.c b/net/nsh/nsh.c
index 58fb827..ad689b5 100644
--- a/net/nsh/nsh.c
+++ b/net/nsh/nsh.c
@@ -14,6 +14,47 @@
 #include <net/nsh.h>
 #include <net/tun_proto.h>
 
+int skb_push_nsh(struct sk_buff *skb, const struct nshhdr *nsh_src, bool is_eth)
+{
+	struct nshhdr *nsh;
+	size_t length = nsh_hdr_len(nsh_src);
+	u8 next_proto;
+
+	if (is_eth) {
+		next_proto = TUN_P_ETHERNET;
+	} else {
+		next_proto = tun_p_from_eth_p(skb->protocol);
+		if (!next_proto)
+			return -ENOTSUPP;
+	}
+
+	/* Add the NSH header */
+	if (skb_cow_head(skb, length) < 0)
+		return -ENOMEM;
+
+	/* Add the NSH header */
+	if (skb_cow_head(skb, length) < 0)
+		return -ENOMEM;
+
+	if (!skb->inner_protocol) {
+		skb_set_inner_network_header(skb, skb->mac_len);
+		skb_set_inner_protocol(skb, skb->protocol);
+	}
+
+	skb_push(skb, length);
+	nsh = (struct nshhdr *)(skb->data);
+	memcpy(nsh, nsh_src, length);
+	nsh->np = next_proto;
+	nsh->mdtype &= NSH_MDTYPE_MASK;
+
+	skb->protocol = htons(ETH_P_NSH);
+	skb_reset_mac_header(skb);
+	skb_reset_mac_len(skb);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(skb_push_nsh);
+
 static struct sk_buff *nsh_gso_segment(struct sk_buff *skb,
 				       netdev_features_t features)
 {
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index a54a556..e969fad 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -38,11 +38,13 @@
 #include <net/dsfield.h>
 #include <net/mpls.h>
 #include <net/sctp/checksum.h>
+#include <net/tun_proto.h>
 
 #include "datapath.h"
 #include "flow.h"
 #include "conntrack.h"
 #include "vport.h"
+#include "flow_netlink.h"
 
 struct deferred_action {
 	struct sk_buff *skb;
@@ -380,6 +382,57 @@ static int push_eth(struct sk_buff *skb, struct sw_flow_key *key,
 	return 0;
 }
 
+static int push_nsh(struct sk_buff *skb, struct sw_flow_key *key,
+		    const struct nshhdr *nsh_src)
+{
+	bool is_eth = false;
+	int ret;
+
+	if (key->mac_proto == MAC_PROTO_ETHERNET)
+		is_eth = true;
+
+	ret = skb_push_nsh(skb, nsh_src, is_eth);
+	if (ret != 0)
+		return ret;
+
+	key->eth.type = htons(ETH_P_NSH);
+
+	/* safe right before invalidate_flow_key */
+	key->mac_proto = MAC_PROTO_NONE;
+	invalidate_flow_key(key);
+	return 0;
+}
+
+static int pop_nsh(struct sk_buff *skb, struct sw_flow_key *key)
+{
+	struct nshhdr *nsh = (struct nshhdr *)(skb->data);
+	size_t length;
+	u16 inner_proto;
+
+	if (ovs_key_mac_proto(key) != MAC_PROTO_NONE ||
+	    skb->protocol != htons(ETH_P_NSH)) {
+		return -EINVAL;
+	}
+
+	inner_proto = tun_p_to_eth_p(nsh->np);
+	if (!inner_proto)
+		return -ENOTSUPP;
+
+	length = nsh_hdr_len(nsh);
+	skb_pull(skb, length);
+	skb_reset_mac_header(skb);
+	skb_reset_mac_len(skb);
+	skb->protocol = inner_proto;
+
+	/* safe right before invalidate_flow_key */
+	if (inner_proto == htons(ETH_P_TEB))
+		key->mac_proto = MAC_PROTO_ETHERNET;
+	else
+		key->mac_proto = MAC_PROTO_NONE;
+	invalidate_flow_key(key);
+	return 0;
+}
+
 static void update_ip_l4_checksum(struct sk_buff *skb, struct iphdr *nh,
 				  __be32 addr, __be32 new_addr)
 {
@@ -602,6 +655,53 @@ static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key,
 	return 0;
 }
 
+static int set_nsh(struct sk_buff *skb, struct sw_flow_key *flow_key,
+		   const struct ovs_key_nsh *key,
+		   const struct ovs_key_nsh *mask)
+{
+	struct nshhdr *nsh;
+	int err;
+	u8 flags;
+	u8 ttl;
+	int i;
+
+	err = skb_ensure_writable(skb, skb_network_offset(skb) +
+				  sizeof(struct nshhdr));
+	if (unlikely(err))
+		return err;
+
+	nsh = (struct nshhdr *)skb_network_header(skb);
+
+	flags = nsh_get_flags(nsh);
+	flags = OVS_MASKED(flags, key->flags, mask->flags);
+	flow_key->nsh.flags = flags;
+	ttl = nsh_get_ttl(nsh);
+	ttl = OVS_MASKED(ttl, key->ttl, mask->ttl);
+	flow_key->nsh.ttl = ttl;
+	nsh_set_flags_and_ttl(nsh, flags, ttl);
+	nsh->path_hdr = OVS_MASKED(nsh->path_hdr, key->path_hdr,
+				   mask->path_hdr);
+	flow_key->nsh.path_hdr = nsh->path_hdr;
+	switch (nsh->mdtype) {
+	case NSH_M_TYPE1:
+		for (i = 0; i < NSH_MD1_CONTEXT_SIZE; i++) {
+			nsh->md1.context[i] =
+			    OVS_MASKED(nsh->md1.context[i], key->context[i],
+				       mask->context[i]);
+		}
+		memcpy(flow_key->nsh.context, nsh->md1.context,
+		       sizeof(nsh->md1.context));
+		break;
+	case NSH_M_TYPE2:
+		memset(flow_key->nsh.context, 0,
+		       sizeof(flow_key->nsh.context));
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
 /* Must follow skb_ensure_writable() since that can move the skb data. */
 static void set_tp_port(struct sk_buff *skb, __be16 *port,
 			__be16 new_port, __sum16 *check)
@@ -1024,6 +1124,32 @@ static int execute_masked_set_action(struct sk_buff *skb,
 				   get_mask(a, struct ovs_key_ethernet *));
 		break;
 
+	case OVS_KEY_ATTR_NSH: {
+		struct ovs_key_nsh nsh;
+		struct ovs_key_nsh nsh_mask;
+		size_t size = nla_len(a) / 2;
+		struct {
+			struct nlattr nla;
+			u8 data[size];
+		} attr, mask;
+
+		attr.nla.nla_type = nla_type(a);
+		attr.nla.nla_len = NLA_HDRLEN + size;
+		memcpy(attr.data, (char *)(a + 1), size);
+
+		mask.nla = attr.nla;
+		memcpy(mask.data, (char *)(a + 1) + size, size);
+
+		err = nsh_key_from_nlattr(&attr.nla, &nsh);
+		if (err)
+			break;
+		err = nsh_key_from_nlattr(&mask.nla, &nsh_mask);
+		if (err)
+			break;
+		err = set_nsh(skb, flow_key, &nsh, &nsh_mask);
+		break;
+	}
+
 	case OVS_KEY_ATTR_IPV4:
 		err = set_ipv4(skb, flow_key, nla_data(a),
 			       get_mask(a, struct ovs_key_ipv4 *));
@@ -1210,6 +1336,21 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
 		case OVS_ACTION_ATTR_POP_ETH:
 			err = pop_eth(skb, key);
 			break;
+
+		case OVS_ACTION_ATTR_PUSH_NSH: {
+			u8 buffer[NSH_HDR_MAX_LEN];
+			struct nshhdr *nsh_hdr = (struct nshhdr *)buffer;
+			const struct nshhdr *nsh_src = nsh_hdr;
+
+			nsh_hdr_from_nlattr(nla_data(a), nsh_hdr,
+					    NSH_HDR_MAX_LEN);
+			err = push_nsh(skb, key, nsh_src);
+			break;
+		}
+
+		case OVS_ACTION_ATTR_POP_NSH:
+			err = pop_nsh(skb, key);
+			break;
 		}
 
 		if (unlikely(err)) {
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 8c94cef..7a178d1 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -46,6 +46,7 @@
 #include <net/ipv6.h>
 #include <net/mpls.h>
 #include <net/ndisc.h>
+#include <net/nsh.h>
 
 #include "conntrack.h"
 #include "datapath.h"
@@ -490,6 +491,56 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
 	return 0;
 }
 
+static int parse_nsh(struct sk_buff *skb, struct sw_flow_key *key)
+{
+	struct nshhdr *nsh;
+	unsigned int nh_ofs = skb_network_offset(skb);
+	u8 version, length;
+	int err;
+
+	err = check_header(skb, nh_ofs + NSH_BASE_HDR_LEN);
+	if (unlikely(err))
+		return err;
+
+	nsh = (struct nshhdr *)skb_network_header(skb);
+	version = nsh_get_ver(nsh);
+	length = nsh_hdr_len(nsh);
+
+	if (version != 0)
+		return -EINVAL;
+
+	err = check_header(skb, nh_ofs + length);
+	if (unlikely(err))
+		return err;
+
+	nsh = (struct nshhdr *)skb_network_header(skb);
+	key->nsh.flags = nsh_get_flags(nsh);
+	key->nsh.ttl = nsh_get_ttl(nsh);
+	key->nsh.mdtype = nsh->mdtype;
+	key->nsh.np = nsh->np;
+	key->nsh.path_hdr = nsh->path_hdr;
+	switch (key->nsh.mdtype) {
+	case NSH_M_TYPE1:
+		if (length != NSH_M_TYPE1_LEN)
+			return -EINVAL;
+		memcpy(key->nsh.context, nsh->md1.context,
+		       sizeof(nsh->md1));
+		break;
+	case NSH_M_TYPE2:
+		/* Don't support MD type 2 metedata parsing yet */
+		if (length < NSH_BASE_HDR_LEN)
+			return -EINVAL;
+
+		memset(key->nsh.context, 0,
+		       sizeof(nsh->md1));
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 /**
  * key_extract - extracts a flow key from an Ethernet frame.
  * @skb: sk_buff that contains the frame, with skb->data pointing to the
@@ -735,6 +786,10 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
 				memset(&key->tp, 0, sizeof(key->tp));
 			}
 		}
+	} else if (key->eth.type == htons(ETH_P_NSH)) {
+		error = parse_nsh(skb, key);
+		if (error)
+			return error;
 	}
 	return 0;
 }
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index 1875bba..6a3cd9c 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -35,6 +35,7 @@
 #include <net/inet_ecn.h>
 #include <net/ip_tunnels.h>
 #include <net/dst_metadata.h>
+#include <net/nsh.h>
 
 struct sk_buff;
 
@@ -66,6 +67,15 @@ struct vlan_head {
 	(offsetof(struct sw_flow_key, recirc_id) +	\
 	FIELD_SIZEOF(struct sw_flow_key, recirc_id))
 
+struct ovs_key_nsh {
+	u8 flags;
+	u8 ttl;
+	u8 mdtype;
+	u8 np;
+	__be32 path_hdr;
+	__be32 context[NSH_MD1_CONTEXT_SIZE];
+};
+
 struct sw_flow_key {
 	u8 tun_opts[IP_TUNNEL_OPTS_MAX];
 	u8 tun_opts_len;
@@ -144,6 +154,7 @@ struct sw_flow_key {
 			};
 		} ipv6;
 	};
+	struct ovs_key_nsh nsh;         /* network service header */
 	struct {
 		/* Connection tracking fields not packed above. */
 		struct {
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index e8eb427..17df00a 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -48,6 +48,7 @@
 #include <net/ndisc.h>
 #include <net/mpls.h>
 #include <net/vxlan.h>
+#include <net/tun_proto.h>
 
 #include "flow_netlink.h"
 
@@ -78,9 +79,11 @@ static bool actions_may_change_flow(const struct nlattr *actions)
 		case OVS_ACTION_ATTR_HASH:
 		case OVS_ACTION_ATTR_POP_ETH:
 		case OVS_ACTION_ATTR_POP_MPLS:
+		case OVS_ACTION_ATTR_POP_NSH:
 		case OVS_ACTION_ATTR_POP_VLAN:
 		case OVS_ACTION_ATTR_PUSH_ETH:
 		case OVS_ACTION_ATTR_PUSH_MPLS:
+		case OVS_ACTION_ATTR_PUSH_NSH:
 		case OVS_ACTION_ATTR_PUSH_VLAN:
 		case OVS_ACTION_ATTR_SAMPLE:
 		case OVS_ACTION_ATTR_SET:
@@ -322,12 +325,27 @@ size_t ovs_tun_key_attr_size(void)
 		+ nla_total_size(2);   /* OVS_TUNNEL_KEY_ATTR_TP_DST */
 }
 
+size_t ovs_nsh_key_attr_size(void)
+{
+	/* Whenever adding new OVS_NSH_KEY_ FIELDS, we should consider
+	 * updating this function.
+	 */
+	return  nla_total_size(NSH_BASE_HDR_LEN) /* OVS_NSH_KEY_ATTR_BASE */
+		/* OVS_NSH_KEY_ATTR_MD1 and OVS_NSH_KEY_ATTR_MD2 are
+		 * mutually exclusive, so the bigger one can cover
+		 * the small one.
+		 *
+		 * OVS_NSH_KEY_ATTR_MD2
+		 */
+		+ nla_total_size(NSH_CTX_HDRS_MAX_LEN);
+}
+
 size_t ovs_key_attr_size(void)
 {
 	/* Whenever adding new OVS_KEY_ FIELDS, we should consider
 	 * updating this function.
 	 */
-	BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 28);
+	BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 29);
 
 	return    nla_total_size(4)   /* OVS_KEY_ATTR_PRIORITY */
 		+ nla_total_size(0)   /* OVS_KEY_ATTR_TUNNEL */
@@ -341,6 +359,8 @@ size_t ovs_key_attr_size(void)
 		+ nla_total_size(4)   /* OVS_KEY_ATTR_CT_MARK */
 		+ nla_total_size(16)  /* OVS_KEY_ATTR_CT_LABELS */
 		+ nla_total_size(40)  /* OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6 */
+		+ nla_total_size(0)   /* OVS_KEY_ATTR_NSH */
+		  + ovs_nsh_key_attr_size()
 		+ nla_total_size(12)  /* OVS_KEY_ATTR_ETHERNET */
 		+ nla_total_size(2)   /* OVS_KEY_ATTR_ETHERTYPE */
 		+ nla_total_size(4)   /* OVS_KEY_ATTR_VLAN */
@@ -373,6 +393,13 @@ static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1]
 	[OVS_TUNNEL_KEY_ATTR_IPV6_DST]      = { .len = sizeof(struct in6_addr) },
 };
 
+static const struct ovs_len_tbl
+ovs_nsh_key_attr_lens[OVS_NSH_KEY_ATTR_MAX + 1] = {
+	[OVS_NSH_KEY_ATTR_BASE]     = { .len = 8 },
+	[OVS_NSH_KEY_ATTR_MD1]      = { .len = 16 },
+	[OVS_NSH_KEY_ATTR_MD2]      = { .len = OVS_ATTR_VARIABLE },
+};
+
 /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute.  */
 static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
 	[OVS_KEY_ATTR_ENCAP]	 = { .len = OVS_ATTR_NESTED },
@@ -405,6 +432,8 @@ static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
 		.len = sizeof(struct ovs_key_ct_tuple_ipv4) },
 	[OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6] = {
 		.len = sizeof(struct ovs_key_ct_tuple_ipv6) },
+	[OVS_KEY_ATTR_NSH]       = { .len = OVS_ATTR_NESTED,
+				     .next = ovs_nsh_key_attr_lens, },
 };
 
 static bool check_attr_len(unsigned int attr_len, unsigned int expected_len)
@@ -1179,6 +1208,304 @@ static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
 	return 0;
 }
 
+int nsh_hdr_from_nlattr(const struct nlattr *attr,
+			struct nshhdr *nsh, size_t size)
+{
+	struct nlattr *a;
+	int rem;
+	u8 flags = 0;
+	u8 ttl = 0;
+	int mdlen = 0;
+	bool has_md1 = false;
+	bool has_md2 = false;
+	u8 len;
+
+	nla_for_each_nested(a, attr, rem) {
+		int type = nla_type(a);
+
+		if (type > OVS_NSH_KEY_ATTR_MAX) {
+			OVS_NLERR(1, "nsh attr %d is out of range max %d",
+				  type, OVS_NSH_KEY_ATTR_MAX);
+			return -EINVAL;
+		}
+
+		if (!check_attr_len(nla_len(a),
+				    ovs_nsh_key_attr_lens[type].len)) {
+			OVS_NLERR(
+			    1,
+			    "nsh attr %d has unexpected len %d expected %d",
+			    type,
+			    nla_len(a),
+			    ovs_nsh_key_attr_lens[type].len
+			);
+			return -EINVAL;
+		}
+
+		switch (type) {
+		case OVS_NSH_KEY_ATTR_BASE: {
+			const struct ovs_nsh_key_base *base =
+				(struct ovs_nsh_key_base *)nla_data(a);
+			flags = base->flags;
+			ttl = base->ttl;
+			nsh->np = base->np;
+			nsh->mdtype = base->mdtype;
+			nsh->path_hdr = base->path_hdr;
+			break;
+		}
+		case OVS_NSH_KEY_ATTR_MD1: {
+			const struct ovs_nsh_key_md1 *md1 =
+				(struct ovs_nsh_key_md1 *)nla_data(a);
+			struct nsh_md1_ctx *md1_dst = &nsh->md1;
+
+			has_md1 = true;
+			mdlen = nla_len(a);
+			if (((mdlen + NSH_BASE_HDR_LEN) != NSH_M_TYPE1_LEN) ||
+			    ((mdlen + NSH_BASE_HDR_LEN) > size) ||
+			    (mdlen <= 0)) {
+				OVS_NLERR(
+				    1,
+				    "length %d of nsh attr %d is invalid",
+				    mdlen,
+				    type
+				);
+				return -EINVAL;
+			}
+			memcpy(md1_dst, md1, mdlen);
+			break;
+		}
+		case OVS_NSH_KEY_ATTR_MD2: {
+			const struct u8 *md2 = nla_data(a);
+			struct nsh_md2_tlv *md2_dst = &nsh->md2;
+
+			has_md2 = true;
+			mdlen = nla_len(a);
+			if (((mdlen + NSH_BASE_HDR_LEN) > size) ||
+			    (mdlen <= 0)) {
+				OVS_NLERR(
+				    1,
+				    "length %d of nsh attr %d is invalid",
+				    mdlen,
+				    type
+				);
+				return -EINVAL;
+			}
+			memcpy(md2_dst, md2, mdlen);
+			break;
+		}
+		default:
+			OVS_NLERR(1, "Unknown nsh attribute %d",
+				  type);
+			return -EINVAL;
+		}
+	}
+
+	if (rem > 0) {
+		OVS_NLERR(1, "nsh attribute has %d unknown bytes.", rem);
+		return -EINVAL;
+	}
+
+	if ((has_md1 && nsh->mdtype != NSH_M_TYPE1) ||
+	    (has_md2 && nsh->mdtype != NSH_M_TYPE2)) {
+		OVS_NLERR(1,
+			  "nsh attribute has unmatched MD type %d.",
+			  nsh->mdtype);
+		return -EINVAL;
+	}
+
+	if (unlikely(has_md1 && has_md2)) {
+		OVS_NLERR(1, "both nsh md1 and md2 attribute are there");
+		return -EINVAL;
+	}
+
+	if (unlikely(!has_md1 && !has_md2)) {
+		OVS_NLERR(1, "neither nsh md1 nor md2 attribute is there");
+		return -EINVAL;
+	}
+
+	/* nsh header length  = NSH_BASE_HDR_LEN + mdlen */
+	nsh->ver_flags_ttl_len = 0;
+	len = NSH_BASE_HDR_LEN + mdlen;
+	nsh_set_flags_ttl_len(nsh, flags, ttl, len);
+
+	return 0;
+}
+
+int nsh_key_from_nlattr(const struct nlattr *attr,
+			struct ovs_key_nsh *nsh)
+{
+	struct nlattr *a;
+	int rem;
+	bool has_md1 = false;
+
+	nla_for_each_nested(a, attr, rem) {
+		int type = nla_type(a);
+
+		if (type > OVS_NSH_KEY_ATTR_MAX) {
+			OVS_NLERR(1, "nsh attr %d is out of range max %d",
+				  type, OVS_NSH_KEY_ATTR_MAX);
+			return -EINVAL;
+		}
+
+		if (!check_attr_len(nla_len(a),
+				    ovs_nsh_key_attr_lens[type].len)) {
+			OVS_NLERR(
+			    1,
+			    "nsh attr %d has unexpected len %d expected %d",
+			    type,
+			    nla_len(a),
+			    ovs_nsh_key_attr_lens[type].len
+			);
+			return -EINVAL;
+		}
+
+		switch (type) {
+		case OVS_NSH_KEY_ATTR_BASE: {
+			const struct ovs_nsh_key_base *base =
+				(struct ovs_nsh_key_base *)nla_data(a);
+
+			memcpy(nsh, base, sizeof(*base));
+			break;
+		}
+		case OVS_NSH_KEY_ATTR_MD1: {
+			const struct ovs_nsh_key_md1 *md1 =
+				(struct ovs_nsh_key_md1 *)nla_data(a);
+
+			has_md1 = true;
+			memcpy(nsh->context, md1->context, sizeof(*md1));
+			break;
+		}
+		case OVS_NSH_KEY_ATTR_MD2:
+			/* Not supported yet */
+			return -ENOTSUPP;
+		default:
+			OVS_NLERR(1, "Unknown nsh attribute %d",
+				  type);
+			return -EINVAL;
+		}
+	}
+
+	if (rem > 0) {
+		OVS_NLERR(1, "nsh attribute has %d unknown bytes.", rem);
+		return -EINVAL;
+	}
+
+	if ((has_md1 && nsh->mdtype != NSH_M_TYPE1)) {
+		OVS_NLERR(1, "nsh attribute has unmatched MD type %d.",
+			  nsh->mdtype);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int nsh_key_put_from_nlattr(const struct nlattr *attr,
+				   struct sw_flow_match *match, bool is_mask,
+				   bool is_push_nsh, bool log)
+{
+	struct nlattr *a;
+	int rem;
+	bool has_md1 = false;
+	bool has_md2 = false;
+	u8 mdtype = 0;
+	int mdlen = 0;
+
+	nla_for_each_nested(a, attr, rem) {
+		int type = nla_type(a);
+		int i;
+
+		if (type > OVS_NSH_KEY_ATTR_MAX) {
+			OVS_NLERR(log, "nsh attr %d is out of range max %d",
+				  type, OVS_NSH_KEY_ATTR_MAX);
+			return -EINVAL;
+		}
+
+		if (!check_attr_len(nla_len(a),
+				    ovs_nsh_key_attr_lens[type].len)) {
+			OVS_NLERR(
+			    log,
+			    "nsh attr %d has unexpected len %d expected %d",
+			    type,
+			    nla_len(a),
+			    ovs_nsh_key_attr_lens[type].len
+			);
+			return -EINVAL;
+		}
+
+		switch (type) {
+		case OVS_NSH_KEY_ATTR_BASE: {
+			const struct ovs_nsh_key_base *base =
+				(struct ovs_nsh_key_base *)nla_data(a);
+
+			mdtype = base->mdtype;
+			SW_FLOW_KEY_PUT(match, nsh.flags,
+					base->flags, is_mask);
+			SW_FLOW_KEY_PUT(match, nsh.ttl,
+					base->ttl, is_mask);
+			SW_FLOW_KEY_PUT(match, nsh.mdtype,
+					base->mdtype, is_mask);
+			SW_FLOW_KEY_PUT(match, nsh.np,
+					base->np, is_mask);
+			SW_FLOW_KEY_PUT(match, nsh.path_hdr,
+					base->path_hdr, is_mask);
+			break;
+		}
+		case OVS_NSH_KEY_ATTR_MD1: {
+			const struct ovs_nsh_key_md1 *md1 =
+				(struct ovs_nsh_key_md1 *)nla_data(a);
+
+			has_md1 = true;
+			for (i = 0; i < NSH_MD1_CONTEXT_SIZE; i++)
+				SW_FLOW_KEY_PUT(match, nsh.context[i],
+						md1->context[i], is_mask);
+			break;
+		}
+		case OVS_NSH_KEY_ATTR_MD2:
+			if (!is_push_nsh) /* Not supported MD type 2 yet */
+				return -ENOTSUPP;
+
+			has_md2 = true;
+			mdlen = nla_len(a);
+			if ((mdlen > NSH_CTX_HDRS_MAX_LEN) ||
+			    (mdlen <= 0))
+				return -EINVAL;
+			break;
+		default:
+			OVS_NLERR(log, "Unknown nsh attribute %d",
+				  type);
+			return -EINVAL;
+		}
+	}
+
+	if (rem > 0) {
+		OVS_NLERR(log, "nsh attribute has %d unknown bytes.", rem);
+		return -EINVAL;
+	}
+
+	if (!is_mask) {
+		if ((has_md1 && mdtype != NSH_M_TYPE1)) {
+			OVS_NLERR(1, "nsh attribute has unmatched MD type %d.",
+				  mdtype);
+			return -EINVAL;
+		}
+	}
+
+	if (is_push_nsh & !is_mask) {
+		if ((has_md1 && mdtype != NSH_M_TYPE1) ||
+		    (has_md2 && mdtype != NSH_M_TYPE2) ||
+		    (has_md1 && has_md2) ||
+		    (!has_md1 && !has_md2)) {
+			OVS_NLERR(
+			    1,
+			    "push nsh attributes are invalid for type %d.",
+			    mdtype
+			);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
 static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match,
 				u64 attrs, const struct nlattr **a,
 				bool is_mask, bool log)
@@ -1306,6 +1633,13 @@ static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match,
 		attrs &= ~(1 << OVS_KEY_ATTR_ARP);
 	}
 
+	if (attrs & (1 << OVS_KEY_ATTR_NSH)) {
+		if (nsh_key_put_from_nlattr(a[OVS_KEY_ATTR_NSH], match,
+					    is_mask, false, log) < 0)
+			return -EINVAL;
+		attrs &= ~(1 << OVS_KEY_ATTR_NSH);
+	}
+
 	if (attrs & (1 << OVS_KEY_ATTR_MPLS)) {
 		const struct ovs_key_mpls *mpls_key;
 
@@ -1622,6 +1956,40 @@ static int ovs_nla_put_vlan(struct sk_buff *skb, const struct vlan_head *vh,
 	return 0;
 }
 
+static int nsh_key_to_nlattr(const struct ovs_key_nsh *nsh, bool is_mask,
+			     struct sk_buff *skb)
+{
+	struct nlattr *start;
+	struct ovs_nsh_key_base base;
+	struct ovs_nsh_key_md1 md1;
+
+	memcpy(&base, nsh, sizeof(base));
+
+	if (is_mask || nsh->mdtype == NSH_M_TYPE1)
+		memcpy(md1.context, nsh->context, sizeof(md1));
+
+	start = nla_nest_start(skb, OVS_KEY_ATTR_NSH);
+	if (!start)
+		return -EMSGSIZE;
+
+	if (nla_put(skb, OVS_NSH_KEY_ATTR_BASE, sizeof(base), &base))
+		goto nla_put_failure;
+
+	if (is_mask || nsh->mdtype == NSH_M_TYPE1) {
+		if (nla_put(skb, OVS_NSH_KEY_ATTR_MD1, sizeof(md1), &md1))
+			goto nla_put_failure;
+	}
+
+	/* Don't support MD type 2 yet */
+
+	nla_nest_end(skb, start);
+
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
 static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
 			     const struct sw_flow_key *output, bool is_mask,
 			     struct sk_buff *skb)
@@ -1750,6 +2118,9 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
 		ipv6_key->ipv6_tclass = output->ip.tos;
 		ipv6_key->ipv6_hlimit = output->ip.ttl;
 		ipv6_key->ipv6_frag = output->ip.frag;
+	} else if (swkey->eth.type == htons(ETH_P_NSH)) {
+		if (nsh_key_to_nlattr(&output->nsh, is_mask, skb))
+			goto nla_put_failure;
 	} else if (swkey->eth.type == htons(ETH_P_ARP) ||
 		   swkey->eth.type == htons(ETH_P_RARP)) {
 		struct ovs_key_arp *arp_key;
@@ -2242,6 +2613,19 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
 	return err;
 }
 
+static bool validate_nsh(const struct nlattr *attr, bool is_mask,
+			 bool is_push_nsh, bool log)
+{
+	struct sw_flow_match match;
+	struct sw_flow_key key;
+	int ret = 0;
+
+	ovs_match_init(&match, &key, true, NULL);
+	ret = nsh_key_put_from_nlattr(attr, &match, is_mask,
+				      is_push_nsh, log);
+	return ((ret != 0) ? false : true);
+}
+
 /* Return false if there are any non-masked bits set.
  * Mask follows data immediately, before any netlink padding.
  */
@@ -2384,6 +2768,11 @@ static int validate_set(const struct nlattr *a,
 
 		break;
 
+	case OVS_KEY_ATTR_NSH:
+		if (!validate_nsh(nla_data(a), masked, false, log))
+			return -EINVAL;
+		break;
+
 	default:
 		return -EINVAL;
 	}
@@ -2482,6 +2871,8 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
 			[OVS_ACTION_ATTR_TRUNC] = sizeof(struct ovs_action_trunc),
 			[OVS_ACTION_ATTR_PUSH_ETH] = sizeof(struct ovs_action_push_eth),
 			[OVS_ACTION_ATTR_POP_ETH] = 0,
+			[OVS_ACTION_ATTR_PUSH_NSH] = (u32)-1,
+			[OVS_ACTION_ATTR_POP_NSH] = 0,
 		};
 		const struct ovs_action_push_vlan *vlan;
 		int type = nla_type(a);
@@ -2636,6 +3027,19 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
 			mac_proto = MAC_PROTO_ETHERNET;
 			break;
 
+		case OVS_ACTION_ATTR_PUSH_NSH:
+			mac_proto = MAC_PROTO_NONE;
+			if (!validate_nsh(nla_data(a), false, true, true))
+				return -EINVAL;
+			break;
+
+		case OVS_ACTION_ATTR_POP_NSH:
+			if (key->nsh.np == TUN_P_ETHERNET)
+				mac_proto = MAC_PROTO_ETHERNET;
+			else
+				mac_proto = MAC_PROTO_NONE;
+			break;
+
 		default:
 			OVS_NLERR(log, "Unknown Action type %d", type);
 			return -EINVAL;
diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h
index 929c665..7be6750 100644
--- a/net/openvswitch/flow_netlink.h
+++ b/net/openvswitch/flow_netlink.h
@@ -79,4 +79,8 @@ int ovs_nla_put_actions(const struct nlattr *attr,
 void ovs_nla_free_flow_actions(struct sw_flow_actions *);
 void ovs_nla_free_flow_actions_rcu(struct sw_flow_actions *);
 
+int nsh_key_from_nlattr(const struct nlattr *attr, struct ovs_key_nsh *nsh);
+int nsh_hdr_from_nlattr(const struct nlattr *attr, struct nshhdr *nsh_src,
+			size_t size);
+
 #endif /* flow_netlink.h */
-- 
2.5.5

^ permalink raw reply related

* Re: [PATCH][next][V2] bpf: test_maps: fix typo "conenct" -> "connect"
From: Daniel Borkmann @ 2017-08-30 13:46 UTC (permalink / raw)
  To: Colin King, Alexei Starovoitov, Shuah Khan, netdev,
	linux-kselftest
  Cc: linux-kernel
In-Reply-To: <20170830114730.28550-1-colin.king@canonical.com>

On 08/30/2017 01:47 PM, Colin King wrote:
> From: Colin Ian King <colin.king@canonical.com>
>
> Trivial fix to typo in printf error message
>
> Signed-off-by: Colin Ian King <colin.king@canonical.com>

For net-next; looks like there is also one in "failed to listeen\n".
Want to fix this one as well ? ;)

Acked-by: Daniel Borkmann <daniel@iogearbox.net>

^ permalink raw reply

* Re: [PATCH net-next 3/6] flow_dissector: Add protocol specific flow dissection offload
From: kbuild test robot @ 2017-08-30 13:46 UTC (permalink / raw)
  To: Tom Herbert; +Cc: kbuild-all, davem, netdev, Tom Herbert
In-Reply-To: <20170829171942.8974-4-tom@quantonium.net>

[-- Attachment #1: Type: text/plain, Size: 5484 bytes --]

Hi Tom,

[auto build test WARNING on net-next/master]

url:    https://github.com/0day-ci/linux/commits/Tom-Herbert/flow_dissector-Protocol-specific-flow-dissector-offload/20170830-210709
config: x86_64-randconfig-x006-201735 (attached as .config)
compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

All warnings (new ones prefixed by >>):

   net//ipv4/route.c: In function 'fib_multipath_hash':
>> net//ipv4/route.c:1817:4: warning: ISO C90 forbids mixed declarations and code [-Wdeclaration-after-statement]
       struct flow_keys keys;
       ^~~~~~

vim +1817 net//ipv4/route.c

79a13159 Peter Nørlund       2015-09-30  1791  
bf4e0a3d Nikolay Aleksandrov 2017-03-16  1792  /* if skb is set it will be used and fl4 can be NULL */
bf4e0a3d Nikolay Aleksandrov 2017-03-16  1793  int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
bf4e0a3d Nikolay Aleksandrov 2017-03-16  1794  		       const struct sk_buff *skb)
bf4e0a3d Nikolay Aleksandrov 2017-03-16  1795  {
bf4e0a3d Nikolay Aleksandrov 2017-03-16  1796  	struct net *net = fi->fib_net;
bf4e0a3d Nikolay Aleksandrov 2017-03-16  1797  	struct flow_keys hash_keys;
bf4e0a3d Nikolay Aleksandrov 2017-03-16  1798  	u32 mhash;
79a13159 Peter Nørlund       2015-09-30  1799  
bf4e0a3d Nikolay Aleksandrov 2017-03-16  1800  	switch (net->ipv4.sysctl_fib_multipath_hash_policy) {
bf4e0a3d Nikolay Aleksandrov 2017-03-16  1801  	case 0:
bf4e0a3d Nikolay Aleksandrov 2017-03-16  1802  		memset(&hash_keys, 0, sizeof(hash_keys));
bf4e0a3d Nikolay Aleksandrov 2017-03-16  1803  		hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
bf4e0a3d Nikolay Aleksandrov 2017-03-16  1804  		if (skb) {
bf4e0a3d Nikolay Aleksandrov 2017-03-16  1805  			ip_multipath_l3_keys(skb, &hash_keys);
bf4e0a3d Nikolay Aleksandrov 2017-03-16  1806  		} else {
bf4e0a3d Nikolay Aleksandrov 2017-03-16  1807  			hash_keys.addrs.v4addrs.src = fl4->saddr;
bf4e0a3d Nikolay Aleksandrov 2017-03-16  1808  			hash_keys.addrs.v4addrs.dst = fl4->daddr;
bf4e0a3d Nikolay Aleksandrov 2017-03-16  1809  		}
bf4e0a3d Nikolay Aleksandrov 2017-03-16  1810  		break;
bf4e0a3d Nikolay Aleksandrov 2017-03-16  1811  	case 1:
bf4e0a3d Nikolay Aleksandrov 2017-03-16  1812  		/* skb is currently provided only when forwarding */
bf4e0a3d Nikolay Aleksandrov 2017-03-16  1813  		if (skb) {
19a7c5ba Tom Herbert         2017-08-29  1814  			unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP |
19a7c5ba Tom Herbert         2017-08-29  1815  					    FLOW_DISSECTOR_F_STOP_AT_L4;
19a7c5ba Tom Herbert         2017-08-29  1816  ;
bf4e0a3d Nikolay Aleksandrov 2017-03-16 @1817  			struct flow_keys keys;
bf4e0a3d Nikolay Aleksandrov 2017-03-16  1818  
bf4e0a3d Nikolay Aleksandrov 2017-03-16  1819  			/* short-circuit if we already have L4 hash present */
bf4e0a3d Nikolay Aleksandrov 2017-03-16  1820  			if (skb->l4_hash)
bf4e0a3d Nikolay Aleksandrov 2017-03-16  1821  				return skb_get_hash_raw(skb) >> 1;
bf4e0a3d Nikolay Aleksandrov 2017-03-16  1822  			memset(&hash_keys, 0, sizeof(hash_keys));
bf4e0a3d Nikolay Aleksandrov 2017-03-16  1823  			skb_flow_dissect_flow_keys(skb, &keys, flag);
bf4e0a3d Nikolay Aleksandrov 2017-03-16  1824  			hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
bf4e0a3d Nikolay Aleksandrov 2017-03-16  1825  			hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
bf4e0a3d Nikolay Aleksandrov 2017-03-16  1826  			hash_keys.ports.src = keys.ports.src;
bf4e0a3d Nikolay Aleksandrov 2017-03-16  1827  			hash_keys.ports.dst = keys.ports.dst;
bf4e0a3d Nikolay Aleksandrov 2017-03-16  1828  			hash_keys.basic.ip_proto = keys.basic.ip_proto;
bf4e0a3d Nikolay Aleksandrov 2017-03-16  1829  		} else {
bf4e0a3d Nikolay Aleksandrov 2017-03-16  1830  			memset(&hash_keys, 0, sizeof(hash_keys));
bf4e0a3d Nikolay Aleksandrov 2017-03-16  1831  			hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
bf4e0a3d Nikolay Aleksandrov 2017-03-16  1832  			hash_keys.addrs.v4addrs.src = fl4->saddr;
bf4e0a3d Nikolay Aleksandrov 2017-03-16  1833  			hash_keys.addrs.v4addrs.dst = fl4->daddr;
bf4e0a3d Nikolay Aleksandrov 2017-03-16  1834  			hash_keys.ports.src = fl4->fl4_sport;
bf4e0a3d Nikolay Aleksandrov 2017-03-16  1835  			hash_keys.ports.dst = fl4->fl4_dport;
bf4e0a3d Nikolay Aleksandrov 2017-03-16  1836  			hash_keys.basic.ip_proto = fl4->flowi4_proto;
bf4e0a3d Nikolay Aleksandrov 2017-03-16  1837  		}
bf4e0a3d Nikolay Aleksandrov 2017-03-16  1838  		break;
79a13159 Peter Nørlund       2015-09-30  1839  	}
bf4e0a3d Nikolay Aleksandrov 2017-03-16  1840  	mhash = flow_hash_from_keys(&hash_keys);
79a13159 Peter Nørlund       2015-09-30  1841  
bf4e0a3d Nikolay Aleksandrov 2017-03-16  1842  	return mhash >> 1;
bf4e0a3d Nikolay Aleksandrov 2017-03-16  1843  }
bf4e0a3d Nikolay Aleksandrov 2017-03-16  1844  EXPORT_SYMBOL_GPL(fib_multipath_hash);
79a13159 Peter Nørlund       2015-09-30  1845  #endif /* CONFIG_IP_ROUTE_MULTIPATH */
79a13159 Peter Nørlund       2015-09-30  1846  

:::::: The code at line 1817 was first introduced by commit
:::::: bf4e0a3db97eb882368fd82980b3b1fa0b5b9778 net: ipv4: add support for ECMP hash policy choice

:::::: TO: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
:::::: CC: David S. Miller <davem@davemloft.net>

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 35395 bytes --]

^ permalink raw reply

* Re: [iproute PATCH] lib/bpf: Fix bytecode-file parsing
From: Daniel Borkmann @ 2017-08-30 13:53 UTC (permalink / raw)
  To: Phil Sutter, Stephen Hemminger; +Cc: netdev
In-Reply-To: <20170829150945.7077-1-phil@nwl.cc>

On 08/29/2017 05:09 PM, Phil Sutter wrote:
> The signedness of char type is implementation dependent, and there are
> architectures on which it is unsigned by default. In that case, the
> check whether fgetc() returned EOF failed because the return value was
> assigned an (unsigned) char variable prior to comparison with EOF (which
> is defined to -1). Fix this by using int as type for 'c' variable, which
> also matches the declaration of fgetc().
>
> While being at it, fix the parser logic to correctly handle multiple
> empty lines and consecutive whitespace and tab characters to further
> improve the parser's robustness. Note that this will still detect double
> separator characters, so doesn't soften up the parser too much.
>
> Fixes: 3da3ebfca85b8 ("bpf: Make bytecode-file reading a little more robust")
> Cc: Daniel Borkmann <daniel@iogearbox.net>
> Signed-off-by: Phil Sutter <phil@nwl.cc>

Definitely ack on the EOF bug:

Acked-by: Daniel Borkmann <daniel@iogearbox.net>

[...]
> @@ -228,18 +229,20 @@ static int bpf_parse_string(char *arg, bool from_file, __u16 *bpf_len,
>   			case '\n':
>   				if (c_prev != ',')
>   					*(pos++) = ',';
> +				c_prev = ',';
>   				break;
>   			case ' ':
>   			case '\t':
>   				if (c_prev != ' ')
>   					*(pos++) = c;
> +				c_prev = ' ';
>   				break;
>   			default:
>   				*(pos++) = c;
> +				c_prev = c;
>   			}
>   			if (pos - tmp_string == tmp_len)
>   				break;
> -			c_prev = c;

I don't really have a strong opinion on this, but the logic for
normalizing here is getting a bit convoluted. Is your use case
for making the parser more robust mainly so you can just use the
-ddd output from tcpdump for cBPF w/o piping through tr? But even
that shouldn't give multiple empty lines afaik, no?

^ permalink raw reply

* Re: [PATCH] RDS: constify rhashtable_params
From: santosh.shilimkar @ 2017-08-30 13:54 UTC (permalink / raw)
  To: Arvind Yadav, davem; +Cc: linux-kernel, rds-devel, linux-rdma, netdev
In-Reply-To: <3cdaecc7c8080b0f2172d8788b96303528957b08.1504093495.git.arvind.yadav.cs@gmail.com>

On 8/30/17 4:49 AM, Arvind Yadav wrote:
> rhashtable_params are not supposed to change at runtime. All
> Functions rhashtable_* working with const rhashtable_params
> provided by <linux/rhashtable.h>. So mark the non-const structs
> as const.
> 
> Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
> ---
This is already addressed in net-next by [1]

Regards,
Santosh

[1] https://lkml.org/lkml/2017/8/25/482

^ permalink raw reply

* [PATCH] rtlwifi: btcoex: 23b 1ant: fix duplicated code for different branches
From: Gustavo A. R. Silva @ 2017-08-30 13:42 UTC (permalink / raw)
  To: Larry Finger, Chaoming Li, Kalle Valo
  Cc: linux-wireless, netdev, linux-kernel, Gustavo A. R. Silva

Refactor code in order to avoid identical code for different branches.

This issue was detected with the help of Coccinelle.

Addresses-Coverity-ID: 1226788
Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
---
This issue was reported by Coverity and it was tested by compilation only.
I'm suspicious this may be a copy/paste error. Please, verify.

 .../net/wireless/realtek/rtlwifi/btcoexist/halbtc8723b1ant.c   | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8723b1ant.c b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8723b1ant.c
index c044252..960ce80f 100644
--- a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8723b1ant.c
+++ b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8723b1ant.c
@@ -2260,14 +2260,8 @@ static void halbtc8723b1ant_run_coexist_mechanism(struct btc_coexist *btcoexist)
 
 		if (iot_peer != BTC_IOT_PEER_CISCO &&
 		    iot_peer != BTC_IOT_PEER_BROADCOM) {
-			if (bt_link_info->sco_exist)
-				halbtc8723b1ant_limited_rx(btcoexist,
-							   NORMAL_EXEC, false,
-							   false, 0x5);
-			else
-				halbtc8723b1ant_limited_rx(btcoexist,
-							   NORMAL_EXEC, false,
-							   false, 0x5);
+			halbtc8723b1ant_limited_rx(btcoexist, NORMAL_EXEC,
+						   false, false, 0x5);
 		} else {
 			if (bt_link_info->sco_exist) {
 				halbtc8723b1ant_limited_rx(btcoexist,
-- 
2.5.0

^ permalink raw reply related

* Re: [PATCH net-next 3/6] flow_dissector: Add protocol specific flow dissection offload
From: kbuild test robot @ 2017-08-30 14:10 UTC (permalink / raw)
  To: Tom Herbert; +Cc: kbuild-all, davem, netdev, Tom Herbert
In-Reply-To: <20170829171942.8974-4-tom@quantonium.net>

[-- Attachment #1: Type: text/plain, Size: 795 bytes --]

Hi Tom,

[auto build test ERROR on net-next/master]

url:    https://github.com/0day-ci/linux/commits/Tom-Herbert/flow_dissector-Protocol-specific-flow-dissector-offload/20170830-210709
config: i386-randconfig-a0-201735 (attached as .config)
compiler: gcc-5 (Debian 5.4.1-2) 5.4.1 20160904
reproduce:
        # save the attached .config to linux build tree
        make ARCH=i386 

All errors (new ones prefixed by >>):

   net/core/flow_dissector.o: In function `__skb_flow_dissect':
>> flow_dissector.c:(.text+0xcb3): undefined reference to `inet6_offloads'
>> flow_dissector.c:(.text+0xcc0): undefined reference to `inet_offloads'

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 20768 bytes --]

^ permalink raw reply

* Re: [iproute PATCH] lib/bpf: Fix bytecode-file parsing
From: Phil Sutter @ 2017-08-30 14:11 UTC (permalink / raw)
  To: Daniel Borkmann; +Cc: Stephen Hemminger, netdev
In-Reply-To: <59A6C377.90705@iogearbox.net>

Hi Daniel,

On Wed, Aug 30, 2017 at 03:53:59PM +0200, Daniel Borkmann wrote:
> On 08/29/2017 05:09 PM, Phil Sutter wrote:
[...]
> > @@ -228,18 +229,20 @@ static int bpf_parse_string(char *arg, bool from_file, __u16 *bpf_len,
> >   			case '\n':
> >   				if (c_prev != ',')
> >   					*(pos++) = ',';
> > +				c_prev = ',';
> >   				break;
> >   			case ' ':
> >   			case '\t':
> >   				if (c_prev != ' ')
> >   					*(pos++) = c;
> > +				c_prev = ' ';
> >   				break;
> >   			default:
> >   				*(pos++) = c;
> > +				c_prev = c;
> >   			}
> >   			if (pos - tmp_string == tmp_len)
> >   				break;
> > -			c_prev = c;
> 
> I don't really have a strong opinion on this, but the logic for
> normalizing here is getting a bit convoluted. Is your use case
> for making the parser more robust mainly so you can just use the
> -ddd output from tcpdump for cBPF w/o piping through tr? But even
> that shouldn't give multiple empty lines afaik, no?

Well, using tcpdump output was functional before already. I just noticed
that if I add an empty line to the end of bytecode-file, it will fail
and I didn't like that. Then while searching for the EOF issue, I
noticed that the parser logic above is a bit faulty in that it will
treat different characters equally but doesn't make sure c_prev will be
assigned only one of them. So apart from the added robustness, it really
fixes an inconsistency in the parsing logic.

Cheers, Phil

^ permalink raw reply

* Re: [PATCH net-next 3/3 v11] drivers: net: ethernet: qualcomm: rmnet: Initial implementation
From: Dan Williams @ 2017-08-30 14:39 UTC (permalink / raw)
  To: Subash Abhinov Kasiviswanathan, netdev, davem, fengguang.wu, jiri,
	stephen, David.Laight, marcel, andrew
In-Reply-To: <1504068258-16982-4-git-send-email-subashab@codeaurora.org>

On Tue, 2017-08-29 at 22:44 -0600, Subash Abhinov Kasiviswanathan
wrote:
> RmNet driver provides a transport agnostic MAP (multiplexing and
> aggregation protocol) support in embedded module. Module provides
> virtual network devices which can be attached to any IP-mode
> physical device. This will be used to provide all MAP functionality
> on future hardware in a single consistent location.

General comment; other drivers that do similar things (macvlan, ipvlan)
use the term "port" to refer to what I think you're calling a
"rmnet_real_dev_info".  Maybe that's a shorter or less confusing term. 
Could be renamed later too, if you wanted to do so.

> Signed-off-by: Subash Abhinov Kasiviswanathan
> <subashab@codeaurora.org>
> ---
>  Documentation/networking/rmnet.txt                 |  82 ++++
>  drivers/net/ethernet/qualcomm/Kconfig              |   2 +
>  drivers/net/ethernet/qualcomm/Makefile             |   2 +
>  drivers/net/ethernet/qualcomm/rmnet/Kconfig        |  12 +
>  drivers/net/ethernet/qualcomm/rmnet/Makefile       |  10 +
>  drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c | 419
> +++++++++++++++++++++
>  drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h |  56 +++
>  .../net/ethernet/qualcomm/rmnet/rmnet_handlers.c   | 271
> +++++++++++++
>  .../net/ethernet/qualcomm/rmnet/rmnet_handlers.h   |  26 ++
>  drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h    |  88 +++++
>  .../ethernet/qualcomm/rmnet/rmnet_map_command.c    | 107 ++++++
>  .../net/ethernet/qualcomm/rmnet/rmnet_map_data.c   | 105 ++++++
>  .../net/ethernet/qualcomm/rmnet/rmnet_private.h    |  45 +++
>  drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c    | 170 +++++++++
>  drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h    |  29 ++
>  15 files changed, 1424 insertions(+)
>  create mode 100644 Documentation/networking/rmnet.txt
>  create mode 100644 drivers/net/ethernet/qualcomm/rmnet/Kconfig
>  create mode 100644 drivers/net/ethernet/qualcomm/rmnet/Makefile
>  create mode 100644
> drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
>  create mode 100644
> drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h
>  create mode 100644
> drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
>  create mode 100644
> drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.h
>  create mode 100644 drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h
>  create mode 100644
> drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c
>  create mode 100644
> drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
>  create mode 100644
> drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h
>  create mode 100644 drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
>  create mode 100644 drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h
> 
> diff --git a/Documentation/networking/rmnet.txt
> b/Documentation/networking/rmnet.txt
> new file mode 100644
> index 0000000..6b341ea
> --- /dev/null
> +++ b/Documentation/networking/rmnet.txt
> @@ -0,0 +1,82 @@
> +1. Introduction
> +
> +rmnet driver is used for supporting the Multiplexing and aggregation
> +Protocol (MAP). This protocol is used by all recent chipsets using
> Qualcomm
> +Technologies, Inc. modems.
> +
> +This driver can be used to register onto any physical network device
> in
> +IP mode. Physical transports include USB, HSIC, PCIe and IP
> accelerator.
> +
> +Multiplexing allows for creation of logical netdevices (rmnet
> devices) to
> +handle multiple private data networks (PDN) like a default internet,
> tethering,
> +multimedia messaging service (MMS) or IP media subsystem (IMS).
> Hardware sends
> +packets with MAP headers to rmnet. Based on the multiplexer id,
> rmnet
> +routes to the appropriate PDN after removing the MAP header.
> +
> +Aggregation is required to achieve high data rates. This involves
> hardware
> +sending aggregated bunch of MAP frames. rmnet driver will de-
> aggregate
> +these MAP frames and send them to appropriate PDN's.
> +
> +2. Packet format
> +
> +a. MAP packet (data / control)
> +
> +MAP header has the same endianness of the IP packet.
> +
> +Packet format -
> +
> +Bit             0             1           2-7      8 -
> 15           16 - 31
> +Function   Command / Data   Reserved     Pad   Multiplexer
> ID    Payload length
> +Bit            32 - x
> +Function     Raw  Bytes
> +
> +Command (1)/ Data (0) bit value is to indicate if the packet is a
> MAP command
> +or data packet. Control packet is used for transport level flow
> control. Data
> +packets are standard IP packets.
> +
> +Reserved bits are usually zeroed out and to be ignored by receiver.
> +
> +Padding is number of bytes to be added for 4 byte alignment if
> required by
> +hardware.
> +
> +Multiplexer ID is to indicate the PDN on which data has to be sent.
> +
> +Payload length includes the padding length but does not include MAP
> header
> +length.
> +
> +b. MAP packet (command specific)
> +
> +Bit             0             1           2-7      8 -
> 15           16 - 31
> +Function   Command         Reserved     Pad   Multiplexer
> ID    Payload length
> +Bit          32 - 39        40 - 45    46 - 47       48 - 63
> +Function   Command name    Reserved   Command Type   Reserved
> +Bit          64 - 95
> +Function   Transaction ID
> +Bit          96 - 127
> +Function   Command data
> +
> +Command 1 indicates disabling flow while 2 is enabling flow
> +
> +Command types -
> +0 for MAP command request
> +1 is to acknowledge the receipt of a command
> +2 is for unsupported commands
> +3 is for error during processing of commands
> +
> +c. Aggregation
> +
> +Aggregation is multiple MAP packets (can be data or command)
> delivered to
> +rmnet in a single linear skb. rmnet will process the individual
> +packets and either ACK the MAP command or deliver the IP packet to
> the
> +network stack as needed
> +
> +MAP header|IP Packet|Optional padding|MAP header|IP Packet|Optional
> padding....
> +MAP header|IP Packet|Optional padding|MAP header|Command
> Packet|Optional pad...
> +
> +3. Userspace configuration
> +
> +rmnet userspace configuration is done through netlink library
> librmnetctl
> +and command line utility rmnetcli. Utility is hosted in codeaurora
> forum git.
> +The driver uses rtnl_link_ops for communication.
> +
> +https://source.codeaurora.org/quic/la/platform/vendor/qcom-opensourc
> e/dataservices/tree/rmnetctl
> diff --git a/drivers/net/ethernet/qualcomm/Kconfig
> b/drivers/net/ethernet/qualcomm/Kconfig
> index 877675a..f520071 100644
> --- a/drivers/net/ethernet/qualcomm/Kconfig
> +++ b/drivers/net/ethernet/qualcomm/Kconfig
> @@ -59,4 +59,6 @@ config QCOM_EMAC
>  	  low power, Receive-Side Scaling (RSS), and IEEE 1588-2008
>  	  Precision Clock Synchronization Protocol.
>  
> +source "drivers/net/ethernet/qualcomm/rmnet/Kconfig"
> +
>  endif # NET_VENDOR_QUALCOMM
> diff --git a/drivers/net/ethernet/qualcomm/Makefile
> b/drivers/net/ethernet/qualcomm/Makefile
> index 92fa7c4..1847350 100644
> --- a/drivers/net/ethernet/qualcomm/Makefile
> +++ b/drivers/net/ethernet/qualcomm/Makefile
> @@ -9,3 +9,5 @@ obj-$(CONFIG_QCA7000_UART) += qcauart.o
>  qcauart-objs := qca_uart.o
>  
>  obj-y += emac/
> +
> +obj-$(CONFIG_RMNET) += rmnet/
> diff --git a/drivers/net/ethernet/qualcomm/rmnet/Kconfig
> b/drivers/net/ethernet/qualcomm/rmnet/Kconfig
> new file mode 100644
> index 0000000..6e2587a
> --- /dev/null
> +++ b/drivers/net/ethernet/qualcomm/rmnet/Kconfig
> @@ -0,0 +1,12 @@
> +#
> +# RMNET MAP driver
> +#
> +
> +menuconfig RMNET
> +	tristate "RmNet MAP driver"
> +	default n
> +	---help---
> +	  If you select this, you will enable the RMNET module which
> is used
> +	  for handling data in the multiplexing and aggregation
> protocol (MAP)
> +	  format in the embedded data path. RMNET devices can be
> attached to
> +	  any IP mode physical device.
> diff --git a/drivers/net/ethernet/qualcomm/rmnet/Makefile
> b/drivers/net/ethernet/qualcomm/rmnet/Makefile
> new file mode 100644
> index 0000000..01bddf2
> --- /dev/null
> +++ b/drivers/net/ethernet/qualcomm/rmnet/Makefile
> @@ -0,0 +1,10 @@
> +#
> +# Makefile for the RMNET module
> +#
> +
> +rmnet-y		 := rmnet_config.o
> +rmnet-y		 += rmnet_vnd.o
> +rmnet-y		 += rmnet_handlers.o
> +rmnet-y		 += rmnet_map_data.o
> +rmnet-y		 += rmnet_map_command.o
> +obj-$(CONFIG_RMNET) += rmnet.o
> diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
> b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
> new file mode 100644
> index 0000000..e836d26
> --- /dev/null
> +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
> @@ -0,0 +1,419 @@
> +/* Copyright (c) 2013-2017, The Linux Foundation. All rights
> reserved.
> + *
> + * This program is free software; you can redistribute it and/or
> modify
> + * it under the terms of the GNU General Public License version 2
> and
> + * only version 2 as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * RMNET configuration engine
> + *
> + */
> +
> +#include <net/sock.h>
> +#include <linux/module.h>
> +#include <linux/netlink.h>
> +#include <linux/netdevice.h>
> +#include "rmnet_config.h"
> +#include "rmnet_handlers.h"
> +#include "rmnet_vnd.h"
> +#include "rmnet_private.h"
> +
> +/* Locking scheme -
> + * The shared resource which needs to be protected is realdev-
> >rx_handler_data.
> + * For the writer path, this is using rtnl_lock(). The writer paths
> are
> + * rmnet_newlink(), rmnet_dellink() and
> rmnet_force_unassociate_device(). These
> + * paths are already called with rtnl_lock() acquired in. There is
> also an
> + * ASSERT_RTNL() to ensure that we are calling with rtnl acquired.
> For
> + * dereference here, we will need to use rtnl_dereference(). Dev
> list writing
> + * needs to happen with rtnl_lock() acquired for
> netdev_master_upper_dev_link().
> + * For the reader path, the real_dev->rx_handler_data is called in
> the TX / RX
> + * path. We only need rcu_read_lock() for these scenarios. In these
> cases,
> + * the rcu_read_lock() is held in __dev_queue_xmit() and
> + * netif_receive_skb_internal(), so readers need to use
> rcu_dereference_rtnl()
> + * to get the relevant information. For dev list reading, we again
> acquire
> + * rcu_read_lock() in rmnet_dellink() for
> netdev_master_upper_dev_get_rcu().
> + * We also use unregister_netdevice_many() to free all rmnet devices
> in
> + * rmnet_force_unassociate_device() so we dont lose the rtnl_lock()
> and free in
> + * same context.
> + */
> +
> +/* Local Definitions and Declarations */
> +#define RMNET_LOCAL_LOGICAL_ENDPOINT -1
> +
> +struct rmnet_walk_data {
> +	struct net_device *real_dev;
> +	struct list_head *head;
> +	struct rmnet_real_dev_info *real_dev_info;
> +};
> +
> +static int rmnet_is_real_dev_registered(const struct net_device
> *real_dev)
> +{
> +	rx_handler_func_t *rx_handler;
> +
> +	rx_handler = rcu_dereference(real_dev->rx_handler);
> +	return (rx_handler == rmnet_rx_handler);
> +}
> +
> +/* Needs either rcu_read_lock() or rtnl lock */
> +static struct rmnet_real_dev_info*
> +__rmnet_get_real_dev_info(const struct net_device *real_dev)
> +{
> +	if (rmnet_is_real_dev_registered(real_dev))
> +		return rcu_dereference_rtnl(real_dev-
> >rx_handler_data);
> +	else
> +		return NULL;
> +}
> +
> +/* Needs rtnl lock */
> +static struct rmnet_real_dev_info*
> +rmnet_get_real_dev_info_rtnl(const struct net_device *real_dev)
> +{
> +	return rtnl_dereference(real_dev->rx_handler_data);
> +}
> +
> +static struct rmnet_endpoint*
> +rmnet_get_endpoint(struct net_device *dev, int config_id)
> +{
> +	struct rmnet_real_dev_info *r;
> +	struct rmnet_endpoint *ep;
> +
> +	if (!rmnet_is_real_dev_registered(dev)) {
> +		ep = rmnet_vnd_get_endpoint(dev);
> +	} else {
> +		r = __rmnet_get_real_dev_info(dev);
> +
> +		if (!r)
> +			return NULL;
> +
> +		if (config_id == RMNET_LOCAL_LOGICAL_ENDPOINT)
> +			ep = &r->local_ep;
> +		else
> +			ep = &r->muxed_ep[config_id];
> +	}
> +
> +	return ep;
> +}
> +
> +static int rmnet_unregister_real_device(struct net_device *real_dev,
> +					struct rmnet_real_dev_info
> *r)
> +{
> +	if (r->nr_rmnet_devs)
> +		return -EINVAL;
> +
> +	kfree(r);
> +
> +	netdev_rx_handler_unregister(real_dev);
> +
> +	/* release reference on real_dev */
> +	dev_put(real_dev);
> +
> +	netdev_dbg(real_dev, "Removed from rmnet\n");
> +	return 0;
> +}
> +
> +static int rmnet_register_real_device(struct net_device *real_dev)
> +{
> +	struct rmnet_real_dev_info *r;
> +	int rc;
> +
> +	ASSERT_RTNL();
> +
> +	if (rmnet_is_real_dev_registered(real_dev))
> +		return 0;
> +
> +	r = kzalloc(sizeof(*r), GFP_ATOMIC);
> +	if (!r)
> +		return -ENOMEM;
> +
> +	r->dev = real_dev;
> +	rc = netdev_rx_handler_register(real_dev, rmnet_rx_handler,
> r);
> +	if (rc) {
> +		kfree(r);
> +		return -EBUSY;
> +	}
> +
> +	/* hold on to real dev for MAP data */
> +	dev_hold(real_dev);
> +
> +	netdev_dbg(real_dev, "registered with rmnet\n");
> +	return 0;
> +}
> +
> +static int rmnet_set_ingress_data_format(struct net_device *dev, u32
> idf)
> +{
> +	struct rmnet_real_dev_info *r;
> +
> +	netdev_dbg(dev, "Ingress format 0x%08X\n", idf);
> +
> +	r = __rmnet_get_real_dev_info(dev);
> +
> +	r->ingress_data_format = idf;
> +
> +	return 0;
> +}
> +
> +static int rmnet_set_egress_data_format(struct net_device *dev, u32
> edf,
> +					u16 agg_size, u16 agg_count)
> +{
> +	struct rmnet_real_dev_info *r;
> +
> +	netdev_dbg(dev, "Egress format 0x%08X agg size %d cnt %d\n",
> +		   edf, agg_size, agg_count);
> +
> +	r = __rmnet_get_real_dev_info(dev);
> +
> +	r->egress_data_format = edf;
> +
> +	return 0;
> +}
> +
> +static int __rmnet_set_endpoint_config(struct net_device *dev, int
> config_id,
> +				       struct rmnet_endpoint *ep)
> +{
> +	struct rmnet_endpoint *dev_ep;
> +
> +	dev_ep = rmnet_get_endpoint(dev, config_id);
> +
> +	if (!dev_ep)
> +		return -EINVAL;
> +
> +	memcpy(dev_ep, ep, sizeof(struct rmnet_endpoint));
> +	if (config_id == RMNET_LOCAL_LOGICAL_ENDPOINT)

Maybe this got elided during the revisions, but now I can't find
anywhere that sets RMNET_LOCAL_LOGICAL_ENDPOINT.  Looking at the
callchain, there are two places that LOCAL_LOGICAL_ENDPOINT matters:

rmnet_get_endpoint(): only ever called by __rmnet_set_endpoint_config()

__rmnet_set_endpoint_config(): only called from
rmnet_set_endpoint_config(); which itself is only called from
rmnet_newlink().

So the only place that 'config_id' is set, and thus that it could be
LOCAL_LOGICAL_ENDPOINT, is rmnet_newlink() via 'mux_id'.  But
IFLA_VLAN_ID is a u16, and so I don't see anywhere that
config_id/mux_id will ever be < 0, and thus anywhere that it could be
LOCAL_LOGICAL_ENDPOINT.

I could well just not be seeing it though...

> +		dev_ep->mux_id = 0;
> +	else
> +		dev_ep->mux_id = config_id;
> +
> +	return 0;
> +}

This function (__rmnet_set_endpoint_config) seems to only be called
from rmnet_set_endpoint_config().  Perhaps just combine them?

But that brings up another point; can the rmnet "mode" or egress_dev
change at runtime, after the rmnet child has been created?  I forget if
that was possible with your original patchset that used ioctls.

> +static int rmnet_set_endpoint_config(struct net_device *dev,
> +				     int config_id, u8 rmnet_mode,
> +				     struct net_device *egress_dev)
> +{
> +	struct rmnet_endpoint ep;
> +
> +	netdev_dbg(dev, "id %d mode %d dev %s\n",
> +		   config_id, rmnet_mode, egress_dev->name);
> +
> +	if (config_id < RMNET_LOCAL_LOGICAL_ENDPOINT ||
> +	    config_id >= RMNET_MAX_LOGICAL_EP)
> +		return -EINVAL;
> +
> +	/* This config is cleared on every set, so its ok to not
> +	 * clear it on a device delete.
> +	 */
> +	memset(&ep, 0, sizeof(struct rmnet_endpoint));
> +	ep.rmnet_mode = rmnet_mode;
> +	ep.egress_dev = egress_dev;
> +
> +	return __rmnet_set_endpoint_config(dev, config_id, &ep);
> +}
> +
> +static int rmnet_newlink(struct net *src_net, struct net_device
> *dev,
> +			 struct nlattr *tb[], struct nlattr *data[],
> +			 struct netlink_ext_ack *extack)
> +{
> +	int ingress_format = RMNET_INGRESS_FORMAT_DEMUXING |
> +			     RMNET_INGRESS_FORMAT_DEAGGREGATION |
> +			     RMNET_INGRESS_FORMAT_MAP;
> +	int egress_format = RMNET_EGRESS_FORMAT_MUXING |
> +			    RMNET_EGRESS_FORMAT_MAP;
> +	struct rmnet_real_dev_info *r;
> +	struct net_device *real_dev;
> +	int mode = RMNET_EPMODE_VND;
> +	int err = 0;
> +	u16 mux_id;
> +
> +	real_dev = __dev_get_by_index(src_net,
> nla_get_u32(tb[IFLA_LINK]));
> +	if (!real_dev || !dev)
> +		return -ENODEV;
> +
> +	if (!data[IFLA_VLAN_ID])
> +		return -EINVAL;
> +
> +	mux_id = nla_get_u16(data[IFLA_VLAN_ID]);
> +
> +	err = rmnet_register_real_device(real_dev);
> +	if (err)
> +		goto err0;
> +
> +	r = rmnet_get_real_dev_info_rtnl(real_dev);
> +	err = rmnet_vnd_newlink(mux_id, dev, r);
> +	if (err)
> +		goto err1;
> +
> +	err = netdev_master_upper_dev_link(dev, real_dev, NULL,
> NULL);
> +	if (err)
> +		goto err2;
> +
> +	rmnet_vnd_set_mux(dev, mux_id);

Why not set the mux_id in rmnet_vnd_newlink()?

Also, bigger problem.  r->rmnet_devices[] is only 32 items in size. 
But mux_id (which is used as an index into rmnet_devices in a few
places) can be up to 255 (RMNET_MAX_LOGICAL_EP).

So if you try to create an rmnet for mux ID 32, you panic the kernel. 
See below my comments about rmnet_real_dev_info...

> +	rmnet_set_egress_data_format(real_dev, egress_format, 0, 0);
> +	rmnet_set_ingress_data_format(real_dev, ingress_format);

I can't see anywhere that the egress/ingress data get set except for
this function, so perhaps you could just skip these functions and
(since you already have 'r' from above) set r-
>[egress|ingress]_data_format directly?

> +	rmnet_set_endpoint_config(real_dev, mux_id, mode, dev);
> +	rmnet_set_endpoint_config(dev, mux_id, mode, real_dev);
> +	return 0;
> +
> +err2:
> +	rmnet_vnd_dellink(mux_id, r);
> +err1:
> +	rmnet_unregister_real_device(real_dev, r);
> +err0:
> +	return err;
> +}
> +
> +static void rmnet_dellink(struct net_device *dev, struct list_head
> *head)
> +{
> +	struct rmnet_real_dev_info *r;
> +	struct net_device *real_dev;
> +	u8 mux_id;
> +
> +	rcu_read_lock();
> +	real_dev = netdev_master_upper_dev_get_rcu(dev);
> +	rcu_read_unlock();
> +
> +	if (!real_dev || !rmnet_is_real_dev_registered(real_dev))
> +		return;
> +
> +	r = rmnet_get_real_dev_info_rtnl(real_dev);
> +
> +	mux_id = rmnet_vnd_get_mux(dev);
> +	rmnet_vnd_dellink(mux_id, r);
> +	netdev_upper_dev_unlink(dev, real_dev);
> +	rmnet_unregister_real_device(real_dev, r);
> +
> +	unregister_netdevice_queue(dev, head);
> +}
> +
> +static int rmnet_dev_walk_unreg(struct net_device *rmnet_dev, void
> *data)
> +{
> +	struct rmnet_walk_data *d = data;
> +	u8 mux_id;
> +
> +	mux_id = rmnet_vnd_get_mux(rmnet_dev);
> +
> +	rmnet_vnd_dellink(mux_id, d->real_dev_info);
> +	netdev_upper_dev_unlink(rmnet_dev, d->real_dev);
> +	unregister_netdevice_queue(rmnet_dev, d->head);
> +
> +	return 0;
> +}
> +
> +static void rmnet_force_unassociate_device(struct net_device *dev)
> +{
> +	struct net_device *real_dev = dev;
> +	struct rmnet_real_dev_info *r;
> +	struct rmnet_walk_data d;
> +	LIST_HEAD(list);
> +
> +	if (!rmnet_is_real_dev_registered(real_dev))
> +		return;
> +
> +	ASSERT_RTNL();
> +
> +	d.real_dev = real_dev;
> +	d.head = &list;
> +
> +	r = rmnet_get_real_dev_info_rtnl(dev);
> +	d.real_dev_info = r;
> +
> +	rcu_read_lock();
> +	netdev_walk_all_lower_dev_rcu(real_dev,
> rmnet_dev_walk_unreg, &d);
> +	rcu_read_unlock();
> +	unregister_netdevice_many(&list);
> +
> +	rmnet_unregister_real_device(real_dev, r);
> +}
> +
> +static int rmnet_config_notify_cb(struct notifier_block *nb,
> +				  unsigned long event, void *data)
> +{
> +	struct net_device *dev = netdev_notifier_info_to_dev(data);
> +
> +	if (!dev)
> +		return NOTIFY_DONE;
> +
> +	switch (event) {
> +	case NETDEV_UNREGISTER:
> +		netdev_dbg(dev, "Kernel unregister\n");
> +		rmnet_force_unassociate_device(dev);
> +		break;
> +
> +	default:
> +		break;
> +	}
> +
> +	return NOTIFY_DONE;
> +}
> +
> +static struct notifier_block rmnet_dev_notifier __read_mostly = {
> +	.notifier_call = rmnet_config_notify_cb,
> +};
> +
> +static int rmnet_rtnl_validate(struct nlattr *tb[], struct nlattr
> *data[],
> +			       struct netlink_ext_ack *extack)
> +{
> +	u16 mux_id;
> +
> +	if (!data || !data[IFLA_VLAN_ID])
> +		return -EINVAL;
> +
> +	mux_id = nla_get_u16(data[IFLA_VLAN_ID]);
> +	if (mux_id > (RMNET_MAX_LOGICAL_EP - 1))
> +		return -ERANGE;
> +
> +	return 0;
> +}
> +
> +static size_t rmnet_get_size(const struct net_device *dev)
> +{
> +	return nla_total_size(2); /* IFLA_VLAN_ID */
> +}
> +
> +struct rtnl_link_ops rmnet_link_ops __read_mostly = {
> +	.kind		= "rmnet",
> +	.maxtype	= __IFLA_VLAN_MAX,
> +	.priv_size	= sizeof(struct rmnet_priv),
> +	.setup		= rmnet_vnd_setup,
> +	.validate	= rmnet_rtnl_validate,
> +	.newlink	= rmnet_newlink,
> +	.dellink	= rmnet_dellink,
> +	.get_size	= rmnet_get_size,
> +};
> +
> +struct rmnet_real_dev_info*
> +rmnet_get_real_dev_info(struct net_device *real_dev)
> +{
> +	return __rmnet_get_real_dev_info(real_dev);
> +}
> +
> +/* Startup/Shutdown */
> +
> +static int __init rmnet_init(void)
> +{
> +	int rc;
> +
> +	rc = register_netdevice_notifier(&rmnet_dev_notifier);
> +	if (rc != 0)
> +		return rc;
> +
> +	rc = rtnl_link_register(&rmnet_link_ops);
> +	if (rc != 0) {
> +		unregister_netdevice_notifier(&rmnet_dev_notifier);
> +		return rc;
> +	}
> +	return rc;
> +}
> +
> +static void __exit rmnet_exit(void)
> +{
> +	unregister_netdevice_notifier(&rmnet_dev_notifier);
> +	rtnl_link_unregister(&rmnet_link_ops);
> +}
> +
> +module_init(rmnet_init)
> +module_exit(rmnet_exit)
> +MODULE_LICENSE("GPL v2");
> diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h
> b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h
> new file mode 100644
> index 0000000..985d372
> --- /dev/null
> +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h
> @@ -0,0 +1,56 @@
> +/* Copyright (c) 2013-2014, 2016-2017 The Linux Foundation. All
> rights reserved.
> + *
> + * This program is free software; you can redistribute it and/or
> modify
> + * it under the terms of the GNU General Public License version 2
> and
> + * only version 2 as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * RMNET Data configuration engine
> + *
> + */
> +
> +#include <linux/skbuff.h>
> +
> +#ifndef _RMNET_CONFIG_H_
> +#define _RMNET_CONFIG_H_
> +
> +#define RMNET_MAX_LOGICAL_EP 255
> +#define RMNET_MAX_VND        32
> +
> +/* Information about the next device to deliver the packet to.
> + * Exact usage of this parameter depends on the rmnet_mode.
> + */
> +struct rmnet_endpoint {
> +	u8 rmnet_mode;
> +	u8 mux_id;
> +	struct net_device *egress_dev;
> +};
> +
> +/* One instance of this structure is instantiated for each real_dev
> associated
> + * with rmnet.
> + */
> +struct rmnet_real_dev_info {
> +	struct net_device *dev;
> +	struct rmnet_endpoint local_ep;
> +	struct rmnet_endpoint muxed_ep[RMNET_MAX_LOGICAL_EP];

This means that the first time you add an rmnet dev to a netdev, it'll
create a structure that's quite large (at least 255 * 6, but more due
to padding), when in most cases few of these items will be used.  Most
of the time you'd have only a couple PDNs active, but this will
allocate memory for MAX_LOGICAL_EP of them, no?

ipvlan uses a list to track the child devices attached to a physical
device so that it doesn't have to allocate them all at once and waste
memory; that technique could replace the 'rmnet_devices' member below.

It also uses a hash to find the actual ipvlan upperdev from the
rx_handler of the lowerdev, which is probably what would replace
muxed_ep[] here.

Is the relationship between rmnet "child"/upper devs and mux_ids 1:1? 
Or can you have multiple rmnet devs for the same mux_id?

Dan

> +	u32 ingress_data_format;
> +	u32 egress_data_format;
> +	struct net_device *rmnet_devices[RMNET_MAX_VND];
> +	u8 nr_rmnet_devs;
> +};
> +
> +extern struct rtnl_link_ops rmnet_link_ops;
> +
> +struct rmnet_priv {
> +	struct rmnet_endpoint local_ep;
> +	u8 mux_id;
> +};
> +
> +struct rmnet_real_dev_info*
> +rmnet_get_real_dev_info(struct net_device *real_dev);
> +
> +#endif /* _RMNET_CONFIG_H_ */
> diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
> b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
> new file mode 100644
> index 0000000..7dab3bb
> --- /dev/null
> +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
> @@ -0,0 +1,271 @@
> +/* Copyright (c) 2013-2017, The Linux Foundation. All rights
> reserved.
> + *
> + * This program is free software; you can redistribute it and/or
> modify
> + * it under the terms of the GNU General Public License version 2
> and
> + * only version 2 as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * RMNET Data ingress/egress handler
> + *
> + */
> +
> +#include <linux/netdevice.h>
> +#include <linux/netdev_features.h>
> +#include "rmnet_private.h"
> +#include "rmnet_config.h"
> +#include "rmnet_vnd.h"
> +#include "rmnet_map.h"
> +#include "rmnet_handlers.h"
> +
> +#define RMNET_IP_VERSION_4 0x40
> +#define RMNET_IP_VERSION_6 0x60
> +
> +/* Helper Functions */
> +
> +static void rmnet_set_skb_proto(struct sk_buff *skb)
> +{
> +	switch (skb->data[0] & 0xF0) {
> +	case RMNET_IP_VERSION_4:
> +		skb->protocol = htons(ETH_P_IP);
> +		break;
> +	case RMNET_IP_VERSION_6:
> +		skb->protocol = htons(ETH_P_IPV6);
> +		break;
> +	default:
> +		skb->protocol = htons(ETH_P_MAP);
> +		break;
> +	}
> +}
> +
> +/* Generic handler */
> +
> +static rx_handler_result_t
> +rmnet_bridge_handler(struct sk_buff *skb, struct rmnet_endpoint *ep)
> +{
> +	if (!ep->egress_dev)
> +		kfree_skb(skb);
> +	else
> +		rmnet_egress_handler(skb, ep);
> +
> +	return RX_HANDLER_CONSUMED;
> +}
> +
> +static rx_handler_result_t
> +rmnet_deliver_skb(struct sk_buff *skb, struct rmnet_endpoint *ep)
> +{
> +	switch (ep->rmnet_mode) {
> +	case RMNET_EPMODE_NONE:
> +		return RX_HANDLER_PASS;
> +
> +	case RMNET_EPMODE_BRIDGE:
> +		return rmnet_bridge_handler(skb, ep);
> +
> +	case RMNET_EPMODE_VND:
> +		skb_reset_transport_header(skb);
> +		skb_reset_network_header(skb);
> +		rmnet_vnd_rx_fixup(skb, skb->dev);
> +
> +		skb->pkt_type = PACKET_HOST;
> +		skb_set_mac_header(skb, 0);
> +		netif_receive_skb(skb);
> +		return RX_HANDLER_CONSUMED;
> +
> +	default:
> +		kfree_skb(skb);
> +		return RX_HANDLER_CONSUMED;
> +	}
> +}
> +
> +static rx_handler_result_t
> +rmnet_ingress_deliver_packet(struct sk_buff *skb,
> +			     struct rmnet_real_dev_info *r)
> +{
> +	if (!r) {
> +		kfree_skb(skb);
> +		return RX_HANDLER_CONSUMED;
> +	}
> +
> +	skb->dev = r->local_ep.egress_dev;
> +
> +	return rmnet_deliver_skb(skb, &r->local_ep);
> +}
> +
> +/* MAP handler */
> +
> +static rx_handler_result_t
> +__rmnet_map_ingress_handler(struct sk_buff *skb,
> +			    struct rmnet_real_dev_info *r)
> +{
> +	struct rmnet_endpoint *ep;
> +	u8 mux_id;
> +	u16 len;
> +
> +	if (RMNET_MAP_GET_CD_BIT(skb)) {
> +		if (r->ingress_data_format
> +		    & RMNET_INGRESS_FORMAT_MAP_COMMANDS)
> +			return rmnet_map_command(skb, r);
> +
> +		kfree_skb(skb);
> +		return RX_HANDLER_CONSUMED;
> +	}
> +
> +	mux_id = RMNET_MAP_GET_MUX_ID(skb);
> +	len = RMNET_MAP_GET_LENGTH(skb) - RMNET_MAP_GET_PAD(skb);
> +
> +	if (mux_id >= RMNET_MAX_LOGICAL_EP) {
> +		kfree_skb(skb);
> +		return RX_HANDLER_CONSUMED;
> +	}
> +
> +	ep = &r->muxed_ep[mux_id];
> +
> +	if (r->ingress_data_format & RMNET_INGRESS_FORMAT_DEMUXING)
> +		skb->dev = ep->egress_dev;
> +
> +	/* Subtract MAP header */
> +	skb_pull(skb, sizeof(struct rmnet_map_header));
> +	skb_trim(skb, len);
> +	rmnet_set_skb_proto(skb);
> +	return rmnet_deliver_skb(skb, ep);
> +}
> +
> +static rx_handler_result_t
> +rmnet_map_ingress_handler(struct sk_buff *skb,
> +			  struct rmnet_real_dev_info *r)
> +{
> +	struct sk_buff *skbn;
> +	int rc;
> +
> +	if (r->ingress_data_format &
> RMNET_INGRESS_FORMAT_DEAGGREGATION) {
> +		while ((skbn = rmnet_map_deaggregate(skb, r)) !=
> NULL)
> +			__rmnet_map_ingress_handler(skbn, r);
> +
> +		consume_skb(skb);
> +		rc = RX_HANDLER_CONSUMED;
> +	} else {
> +		rc = __rmnet_map_ingress_handler(skb, r);
> +	}
> +
> +	return rc;
> +}
> +
> +static int rmnet_map_egress_handler(struct sk_buff *skb,
> +				    struct rmnet_real_dev_info *r,
> +				    struct rmnet_endpoint *ep,
> +				    struct net_device *orig_dev)
> +{
> +	int required_headroom, additional_header_len;
> +	struct rmnet_map_header *map_header;
> +
> +	additional_header_len = 0;
> +	required_headroom = sizeof(struct rmnet_map_header);
> +
> +	if (skb_headroom(skb) < required_headroom) {
> +		if (pskb_expand_head(skb, required_headroom, 0,
> GFP_KERNEL))
> +			return RMNET_MAP_CONSUMED;
> +	}
> +
> +	map_header = rmnet_map_add_map_header(skb,
> additional_header_len, 0);
> +	if (!map_header)
> +		return RMNET_MAP_CONSUMED;
> +
> +	if (r->egress_data_format & RMNET_EGRESS_FORMAT_MUXING) {
> +		if (ep->mux_id == 0xff)
> +			map_header->mux_id = 0;
> +		else
> +			map_header->mux_id = ep->mux_id;
> +	}
> +
> +	skb->protocol = htons(ETH_P_MAP);
> +
> +	return RMNET_MAP_SUCCESS;
> +}
> +
> +/* Ingress / Egress Entry Points */
> +
> +/* Processes packet as per ingress data format for receiving device.
> Logical
> + * endpoint is determined from packet inspection. Packet is then
> sent to the
> + * egress device listed in the logical endpoint configuration.
> + */
> +rx_handler_result_t rmnet_rx_handler(struct sk_buff **pskb)
> +{
> +	struct rmnet_real_dev_info *r;
> +	struct sk_buff *skb = *pskb;
> +	struct net_device *dev;
> +	int rc;
> +
> +	if (!skb)
> +		return RX_HANDLER_CONSUMED;
> +
> +	dev = skb->dev;
> +	r = rmnet_get_real_dev_info(dev);
> +
> +	if (r->ingress_data_format & RMNET_INGRESS_FORMAT_MAP) {
> +		rc = rmnet_map_ingress_handler(skb, r);
> +	} else {
> +		switch (ntohs(skb->protocol)) {
> +		case ETH_P_MAP:
> +			if (r->local_ep.rmnet_mode ==
> +				RMNET_EPMODE_BRIDGE) {
> +				rc =
> rmnet_ingress_deliver_packet(skb, r);
> +			} else {
> +				kfree_skb(skb);
> +				rc = RX_HANDLER_CONSUMED;
> +			}
> +			break;
> +
> +		case ETH_P_IP:
> +		case ETH_P_IPV6:
> +			rc = rmnet_ingress_deliver_packet(skb, r);
> +			break;
> +
> +		default:
> +			rc = RX_HANDLER_PASS;
> +		}
> +	}
> +
> +	return rc;
> +}
> +
> +/* Modifies packet as per logical endpoint configuration and egress
> data format
> + * for egress device configured in logical endpoint. Packet is then
> transmitted
> + * on the egress device.
> + */
> +void rmnet_egress_handler(struct sk_buff *skb,
> +			  struct rmnet_endpoint *ep)
> +{
> +	struct rmnet_real_dev_info *r;
> +	struct net_device *orig_dev;
> +
> +	orig_dev = skb->dev;
> +	skb->dev = ep->egress_dev;
> +
> +	r = rmnet_get_real_dev_info(skb->dev);
> +	if (!r) {
> +		kfree_skb(skb);
> +		return;
> +	}
> +
> +	if (r->egress_data_format & RMNET_EGRESS_FORMAT_MAP) {
> +		switch (rmnet_map_egress_handler(skb, r, ep,
> orig_dev)) {
> +		case RMNET_MAP_CONSUMED:
> +			return;
> +
> +		case RMNET_MAP_SUCCESS:
> +			break;
> +
> +		default:
> +			kfree_skb(skb);
> +			return;
> +		}
> +	}
> +
> +	if (ep->rmnet_mode == RMNET_EPMODE_VND)
> +		rmnet_vnd_tx_fixup(skb, orig_dev);
> +
> +	dev_queue_xmit(skb);
> +}
> diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.h
> b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.h
> new file mode 100644
> index 0000000..f2638cf
> --- /dev/null
> +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.h
> @@ -0,0 +1,26 @@
> +/* Copyright (c) 2013, 2016-2017 The Linux Foundation. All rights
> reserved.
> + *
> + * This program is free software; you can redistribute it and/or
> modify
> + * it under the terms of the GNU General Public License version 2
> and
> + * only version 2 as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * RMNET Data ingress/egress handler
> + *
> + */
> +
> +#ifndef _RMNET_HANDLERS_H_
> +#define _RMNET_HANDLERS_H_
> +
> +#include "rmnet_config.h"
> +
> +void rmnet_egress_handler(struct sk_buff *skb,
> +			  struct rmnet_endpoint *ep);
> +
> +rx_handler_result_t rmnet_rx_handler(struct sk_buff **pskb);
> +
> +#endif /* _RMNET_HANDLERS_H_ */
> diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h
> b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h
> new file mode 100644
> index 0000000..2aabad2
> --- /dev/null
> +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h
> @@ -0,0 +1,88 @@
> +/* Copyright (c) 2013-2017, The Linux Foundation. All rights
> reserved.
> + *
> + * This program is free software; you can redistribute it and/or
> modify
> + * it under the terms of the GNU General Public License version 2
> and
> + * only version 2 as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + */
> +
> +#ifndef _RMNET_MAP_H_
> +#define _RMNET_MAP_H_
> +
> +struct rmnet_map_control_command {
> +	u8  command_name;
> +	u8  cmd_type:2;
> +	u8  reserved:6;
> +	u16 reserved2;
> +	u32 transaction_id;
> +	union {
> +		struct {
> +			u16 ip_family:2;
> +			u16 reserved:14;
> +			u16 flow_control_seq_num;
> +			u32 qos_id;
> +		} flow_control;
> +		u8 data[0];
> +	};
> +}  __aligned(1);
> +
> +enum rmnet_map_results {
> +	RMNET_MAP_SUCCESS,
> +	RMNET_MAP_CONSUMED,
> +	RMNET_MAP_GENERAL_FAILURE,
> +	RMNET_MAP_NOT_ENABLED,
> +	RMNET_MAP_FAILED_AGGREGATION,
> +	RMNET_MAP_FAILED_MUX
> +};
> +
> +enum rmnet_map_commands {
> +	RMNET_MAP_COMMAND_NONE,
> +	RMNET_MAP_COMMAND_FLOW_DISABLE,
> +	RMNET_MAP_COMMAND_FLOW_ENABLE,
> +	/* These should always be the last 2 elements */
> +	RMNET_MAP_COMMAND_UNKNOWN,
> +	RMNET_MAP_COMMAND_ENUM_LENGTH
> +};
> +
> +struct rmnet_map_header {
> +	u8  pad_len:6;
> +	u8  reserved_bit:1;
> +	u8  cd_bit:1;
> +	u8  mux_id;
> +	u16 pkt_len;
> +}  __aligned(1);
> +
> +#define RMNET_MAP_GET_MUX_ID(Y) (((struct rmnet_map_header *) \
> +				 (Y)->data)->mux_id)
> +#define RMNET_MAP_GET_CD_BIT(Y) (((struct rmnet_map_header *) \
> +				(Y)->data)->cd_bit)
> +#define RMNET_MAP_GET_PAD(Y) (((struct rmnet_map_header *) \
> +				(Y)->data)->pad_len)
> +#define RMNET_MAP_GET_CMD_START(Y) ((struct
> rmnet_map_control_command *) \
> +				    ((Y)->data + \
> +				      sizeof(struct
> rmnet_map_header)))
> +#define RMNET_MAP_GET_LENGTH(Y) (ntohs(((struct rmnet_map_header *)
> \
> +					(Y)->data)->pkt_len))
> +
> +#define RMNET_MAP_COMMAND_REQUEST     0
> +#define RMNET_MAP_COMMAND_ACK         1
> +#define RMNET_MAP_COMMAND_UNSUPPORTED 2
> +#define RMNET_MAP_COMMAND_INVALID     3
> +
> +#define RMNET_MAP_NO_PAD_BYTES        0
> +#define RMNET_MAP_ADD_PAD_BYTES       1
> +
> +u8 rmnet_map_demultiplex(struct sk_buff *skb);
> +struct sk_buff *rmnet_map_deaggregate(struct sk_buff *skb,
> +				      struct rmnet_real_dev_info
> *rdinfo);
> +
> +struct rmnet_map_header *rmnet_map_add_map_header(struct sk_buff
> *skb,
> +						  int hdrlen, int
> pad);
> +rx_handler_result_t rmnet_map_command(struct sk_buff *skb,
> +				      struct rmnet_real_dev_info
> *rdinfo);
> +
> +#endif /* _RMNET_MAP_H_ */
> diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c
> b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c
> new file mode 100644
> index 0000000..ccded40
> --- /dev/null
> +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c
> @@ -0,0 +1,107 @@
> +/* Copyright (c) 2013-2017, The Linux Foundation. All rights
> reserved.
> + *
> + * This program is free software; you can redistribute it and/or
> modify
> + * it under the terms of the GNU General Public License version 2
> and
> + * only version 2 as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + */
> +
> +#include <linux/netdevice.h>
> +#include "rmnet_config.h"
> +#include "rmnet_map.h"
> +#include "rmnet_private.h"
> +#include "rmnet_vnd.h"
> +
> +static u8 rmnet_map_do_flow_control(struct sk_buff *skb,
> +				    struct rmnet_real_dev_info
> *rdinfo,
> +				    int enable)
> +{
> +	struct rmnet_map_control_command *cmd;
> +	struct rmnet_endpoint *ep;
> +	struct net_device *vnd;
> +	u16 ip_family;
> +	u16 fc_seq;
> +	u32 qos_id;
> +	u8 mux_id;
> +	int r;
> +
> +	mux_id = RMNET_MAP_GET_MUX_ID(skb);
> +	cmd = RMNET_MAP_GET_CMD_START(skb);
> +
> +	if (mux_id >= RMNET_MAX_LOGICAL_EP) {
> +		kfree_skb(skb);
> +		return RX_HANDLER_CONSUMED;
> +	}
> +
> +	ep = &rdinfo->muxed_ep[mux_id];
> +	vnd = ep->egress_dev;
> +
> +	ip_family = cmd->flow_control.ip_family;
> +	fc_seq = ntohs(cmd->flow_control.flow_control_seq_num);
> +	qos_id = ntohl(cmd->flow_control.qos_id);
> +
> +	/* Ignore the ip family and pass the sequence number for
> both v4 and v6
> +	 * sequence. User space does not support creating dedicated
> flows for
> +	 * the 2 protocols
> +	 */
> +	r = rmnet_vnd_do_flow_control(vnd, enable);
> +	if (r) {
> +		kfree_skb(skb);
> +		return RMNET_MAP_COMMAND_UNSUPPORTED;
> +	} else {
> +		return RMNET_MAP_COMMAND_ACK;
> +	}
> +}
> +
> +static void rmnet_map_send_ack(struct sk_buff *skb,
> +			       unsigned char type,
> +			       struct rmnet_real_dev_info *rdinfo)
> +{
> +	struct rmnet_map_control_command *cmd;
> +	int xmit_status;
> +
> +	skb->protocol = htons(ETH_P_MAP);
> +
> +	cmd = RMNET_MAP_GET_CMD_START(skb);
> +	cmd->cmd_type = type & 0x03;
> +
> +	netif_tx_lock(skb->dev);
> +	xmit_status = skb->dev->netdev_ops->ndo_start_xmit(skb, skb-
> >dev);
> +	netif_tx_unlock(skb->dev);
> +}
> +
> +/* Process MAP command frame and send N/ACK message as appropriate.
> Message cmd
> + * name is decoded here and appropriate handler is called.
> + */
> +rx_handler_result_t rmnet_map_command(struct sk_buff *skb,
> +				      struct rmnet_real_dev_info
> *rdinfo)
> +{
> +	struct rmnet_map_control_command *cmd;
> +	unsigned char command_name;
> +	unsigned char rc = 0;
> +
> +	cmd = RMNET_MAP_GET_CMD_START(skb);
> +	command_name = cmd->command_name;
> +
> +	switch (command_name) {
> +	case RMNET_MAP_COMMAND_FLOW_ENABLE:
> +		rc = rmnet_map_do_flow_control(skb, rdinfo, 1);
> +		break;
> +
> +	case RMNET_MAP_COMMAND_FLOW_DISABLE:
> +		rc = rmnet_map_do_flow_control(skb, rdinfo, 0);
> +		break;
> +
> +	default:
> +		rc = RMNET_MAP_COMMAND_UNSUPPORTED;
> +		kfree_skb(skb);
> +		break;
> +	}
> +	if (rc == RMNET_MAP_COMMAND_ACK)
> +		rmnet_map_send_ack(skb, rc, rdinfo);
> +	return RX_HANDLER_CONSUMED;
> +}
> diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
> b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
> new file mode 100644
> index 0000000..a29c476
> --- /dev/null
> +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
> @@ -0,0 +1,105 @@
> +/* Copyright (c) 2013-2017, The Linux Foundation. All rights
> reserved.
> + *
> + * This program is free software; you can redistribute it and/or
> modify
> + * it under the terms of the GNU General Public License version 2
> and
> + * only version 2 as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * RMNET Data MAP protocol
> + *
> + */
> +
> +#include <linux/netdevice.h>
> +#include "rmnet_config.h"
> +#include "rmnet_map.h"
> +#include "rmnet_private.h"
> +
> +#define RMNET_MAP_DEAGGR_SPACING  64
> +#define RMNET_MAP_DEAGGR_HEADROOM (RMNET_MAP_DEAGGR_SPACING / 2)
> +
> +/* Adds MAP header to front of skb->data
> + * Padding is calculated and set appropriately in MAP header. Mux ID
> is
> + * initialized to 0.
> + */
> +struct rmnet_map_header *rmnet_map_add_map_header(struct sk_buff
> *skb,
> +						  int hdrlen, int
> pad)
> +{
> +	struct rmnet_map_header *map_header;
> +	u32 padding, map_datalen;
> +	u8 *padbytes;
> +
> +	if (skb_headroom(skb) < sizeof(struct rmnet_map_header))
> +		return NULL;
> +
> +	map_datalen = skb->len - hdrlen;
> +	map_header = (struct rmnet_map_header *)
> +			skb_push(skb, sizeof(struct
> rmnet_map_header));
> +	memset(map_header, 0, sizeof(struct rmnet_map_header));
> +
> +	if (pad == RMNET_MAP_NO_PAD_BYTES) {
> +		map_header->pkt_len = htons(map_datalen);
> +		return map_header;
> +	}
> +
> +	padding = ALIGN(map_datalen, 4) - map_datalen;
> +
> +	if (padding == 0)
> +		goto done;
> +
> +	if (skb_tailroom(skb) < padding)
> +		return NULL;
> +
> +	padbytes = (u8 *)skb_put(skb, padding);
> +	memset(padbytes, 0, padding);
> +
> +done:
> +	map_header->pkt_len = htons(map_datalen + padding);
> +	map_header->pad_len = padding & 0x3F;
> +
> +	return map_header;
> +}
> +
> +/* Deaggregates a single packet
> + * A whole new buffer is allocated for each portion of an aggregated
> frame.
> + * Caller should keep calling deaggregate() on the source skb until
> 0 is
> + * returned, indicating that there are no more packets to
> deaggregate. Caller
> + * is responsible for freeing the original skb.
> + */
> +struct sk_buff *rmnet_map_deaggregate(struct sk_buff *skb,
> +				      struct rmnet_real_dev_info
> *rdinfo)
> +{
> +	struct rmnet_map_header *maph;
> +	struct sk_buff *skbn;
> +	u32 packet_len;
> +
> +	if (skb->len == 0)
> +		return NULL;
> +
> +	maph = (struct rmnet_map_header *)skb->data;
> +	packet_len = ntohs(maph->pkt_len) + sizeof(struct
> rmnet_map_header);
> +
> +	if (((int)skb->len - (int)packet_len) < 0)
> +		return NULL;
> +
> +	skbn = alloc_skb(packet_len + RMNET_MAP_DEAGGR_SPACING,
> GFP_ATOMIC);
> +	if (!skbn)
> +		return NULL;
> +
> +	skbn->dev = skb->dev;
> +	skb_reserve(skbn, RMNET_MAP_DEAGGR_HEADROOM);
> +	skb_put(skbn, packet_len);
> +	memcpy(skbn->data, skb->data, packet_len);
> +	skb_pull(skb, packet_len);
> +
> +	/* Some hardware can send us empty frames. Catch them */
> +	if (ntohs(maph->pkt_len) == 0) {
> +		kfree_skb(skb);
> +		return NULL;
> +	}
> +
> +	return skbn;
> +}
> diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h
> b/drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h
> new file mode 100644
> index 0000000..ed820b5
> --- /dev/null
> +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h
> @@ -0,0 +1,45 @@
> +/* Copyright (c) 2013-2014, 2016-2017 The Linux Foundation. All
> rights reserved.
> + *
> + * This program is free software; you can redistribute it and/or
> modify
> + * it under the terms of the GNU General Public License version 2
> and
> + * only version 2 as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + */
> +
> +#ifndef _RMNET_PRIVATE_H_
> +#define _RMNET_PRIVATE_H_
> +
> +#define RMNET_MAX_VND              32
> +#define RMNET_MAX_PACKET_SIZE      16384
> +#define RMNET_DFLT_PACKET_SIZE     1500
> +#define RMNET_NEEDED_HEADROOM      16
> +#define RMNET_TX_QUEUE_LEN         1000
> +
> +/* Constants */
> +#define RMNET_EGRESS_FORMAT__RESERVED__         BIT(0)
> +#define RMNET_EGRESS_FORMAT_MAP                 BIT(1)
> +#define RMNET_EGRESS_FORMAT_AGGREGATION         BIT(2)
> +#define RMNET_EGRESS_FORMAT_MUXING              BIT(3)
> +#define RMNET_EGRESS_FORMAT_MAP_CKSUMV3         BIT(4)
> +#define RMNET_EGRESS_FORMAT_MAP_CKSUMV4         BIT(5)
> +
> +#define RMNET_INGRESS_FIX_ETHERNET              BIT(0)
> +#define RMNET_INGRESS_FORMAT_MAP                BIT(1)
> +#define RMNET_INGRESS_FORMAT_DEAGGREGATION      BIT(2)
> +#define RMNET_INGRESS_FORMAT_DEMUXING           BIT(3)
> +#define RMNET_INGRESS_FORMAT_MAP_COMMANDS       BIT(4)
> +#define RMNET_INGRESS_FORMAT_MAP_CKSUMV3        BIT(5)
> +#define RMNET_INGRESS_FORMAT_MAP_CKSUMV4        BIT(6)
> +
> +/* Pass the frame up the stack with no modifications to skb->dev */
> +#define RMNET_EPMODE_NONE (0)
> +/* Replace skb->dev to a virtual rmnet device and pass up the stack
> */
> +#define RMNET_EPMODE_VND (1)
> +/* Pass the frame directly to another device with dev_queue_xmit()
> */
> +#define RMNET_EPMODE_BRIDGE (2)
> +
> +#endif /* _RMNET_PRIVATE_H_ */
> diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
> b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
> new file mode 100644
> index 0000000..c8b573d
> --- /dev/null
> +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
> @@ -0,0 +1,170 @@
> +/* Copyright (c) 2013-2017, The Linux Foundation. All rights
> reserved.
> + *
> + * This program is free software; you can redistribute it and/or
> modify
> + * it under the terms of the GNU General Public License version 2
> and
> + * only version 2 as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + *
> + * RMNET Data virtual network driver
> + *
> + */
> +
> +#include <linux/etherdevice.h>
> +#include <linux/if_arp.h>
> +#include <net/pkt_sched.h>
> +#include "rmnet_config.h"
> +#include "rmnet_handlers.h"
> +#include "rmnet_private.h"
> +#include "rmnet_map.h"
> +#include "rmnet_vnd.h"
> +
> +/* RX/TX Fixup */
> +
> +void rmnet_vnd_rx_fixup(struct sk_buff *skb, struct net_device *dev)
> +{
> +	dev->stats.rx_packets++;
> +	dev->stats.rx_bytes += skb->len;
> +}
> +
> +void rmnet_vnd_tx_fixup(struct sk_buff *skb, struct net_device *dev)
> +{
> +	dev->stats.tx_packets++;
> +	dev->stats.tx_bytes += skb->len;
> +}
> +
> +/* Network Device Operations */
> +
> +static netdev_tx_t rmnet_vnd_start_xmit(struct sk_buff *skb,
> +					struct net_device *dev)
> +{
> +	struct rmnet_priv *priv;
> +
> +	priv = netdev_priv(dev);
> +	if (priv->local_ep.egress_dev) {
> +		rmnet_egress_handler(skb, &priv->local_ep);
> +	} else {
> +		dev->stats.tx_dropped++;
> +		kfree_skb(skb);
> +	}
> +	return NETDEV_TX_OK;
> +}
> +
> +static int rmnet_vnd_change_mtu(struct net_device *rmnet_dev, int
> new_mtu)
> +{
> +	if (new_mtu < 0 || new_mtu > RMNET_MAX_PACKET_SIZE)
> +		return -EINVAL;
> +
> +	rmnet_dev->mtu = new_mtu;
> +	return 0;
> +}
> +
> +static const struct net_device_ops rmnet_vnd_ops = {
> +	.ndo_start_xmit = rmnet_vnd_start_xmit,
> +	.ndo_change_mtu = rmnet_vnd_change_mtu,
> +};

Please implement ndo_get_iflink as well, so that it's easy to find out
what the "parent"/lowerdev for a given rmnet interface is.

That might mean adding a "phy_dev" member to rmnet_priv, but that might
help you clean up a lot of other stuff too

> +/* Called by kernel whenever a new rmnet<n> device is created. Sets
> MTU,
> + * flags, ARP type, needed headroom, etc...
> + */
> +void rmnet_vnd_setup(struct net_device *rmnet_dev)
> +{
> +	struct rmnet_priv *priv;
> +
> +	priv = netdev_priv(rmnet_dev);
> +	netdev_dbg(rmnet_dev, "Setting up device %s\n", rmnet_dev-
> >name);
> +
> +	rmnet_dev->netdev_ops = &rmnet_vnd_ops;
> +	rmnet_dev->mtu = RMNET_DFLT_PACKET_SIZE;
> +	rmnet_dev->needed_headroom = RMNET_NEEDED_HEADROOM;
> +	random_ether_addr(rmnet_dev->dev_addr);
> +	rmnet_dev->tx_queue_len = RMNET_TX_QUEUE_LEN;
> +
> +	/* Raw IP mode */
> +	rmnet_dev->header_ops = NULL;  /* No header */
> +	rmnet_dev->type = ARPHRD_RAWIP;
> +	rmnet_dev->hard_header_len = 0;
> +	rmnet_dev->flags &= ~(IFF_BROADCAST | IFF_MULTICAST);
> +
> +	rmnet_dev->needs_free_netdev = true;
> +}
> +
> +/* Exposed API */
> +
> +int rmnet_vnd_newlink(u8 id, struct net_device *rmnet_dev,
> +		      struct rmnet_real_dev_info *r)
> +{
> +	int rc;
> +
> +	if (r->rmnet_devices[id])
> +		return -EINVAL;
> +
> +	rc = register_netdevice(rmnet_dev);
> +	if (!rc) {
> +		r->rmnet_devices[id] = rmnet_dev;
> +		r->nr_rmnet_devs++;
> +		rmnet_dev->rtnl_link_ops = &rmnet_link_ops;
> +	}
> +
> +	return rc;
> +}
> +
> +int rmnet_vnd_dellink(u8 id, struct rmnet_real_dev_info *r)
> +{
> +	if (id >= RMNET_MAX_VND || !r->rmnet_devices[id])
> +		return -EINVAL;
> +
> +	r->rmnet_devices[id] = NULL;
> +	r->nr_rmnet_devs--;
> +	return 0;
> +}
> +
> +u8 rmnet_vnd_get_mux(struct net_device *rmnet_dev)
> +{
> +	struct rmnet_priv *priv;
> +
> +	priv = netdev_priv(rmnet_dev);
> +	return priv->mux_id;
> +}
> +
> +void rmnet_vnd_set_mux(struct net_device *rmnet_dev, u8 mux_id)
> +{
> +	struct rmnet_priv *priv;
> +
> +	priv = netdev_priv(rmnet_dev);
> +	priv->mux_id = mux_id;
> +}
> +
> +/* Gets the logical endpoint configuration for a RmNet virtual
> network device
> + * node. Caller should confirm that devices is a RmNet VND before
> calling.
> + */
> +struct rmnet_endpoint *rmnet_vnd_get_endpoint(struct net_device
> *rmnet_dev)
> +{
> +	struct rmnet_priv *priv;
> +
> +	if (!rmnet_dev)
> +		return NULL;
> +
> +	priv = netdev_priv(rmnet_dev);
> +
> +	return &priv->local_ep;
> +}
> +
> +int rmnet_vnd_do_flow_control(struct net_device *rmnet_dev, int
> enable)
> +{
> +	netdev_dbg(rmnet_dev, "Setting VND TX queue state to %d\n",
> enable);
> +	/* Although we expect similar number of enable/disable
> +	 * commands, optimize for the disable. That is more
> +	 * latency sensitive than enable
> +	 */
> +	if (unlikely(enable))
> +		netif_wake_queue(rmnet_dev);
> +	else
> +		netif_stop_queue(rmnet_dev);
> +
> +	return 0;
> +}
> diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h
> b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h
> new file mode 100644
> index 0000000..b102b42
> --- /dev/null
> +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h
> @@ -0,0 +1,29 @@
> +/* Copyright (c) 2013-2017, The Linux Foundation. All rights
> reserved.
> + *
> + * This program is free software; you can redistribute it and/or
> modify
> + * it under the terms of the GNU General Public License version 2
> and
> + * only version 2 as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * RMNET Data Virtual Network Device APIs
> + *
> + */
> +
> +#ifndef _RMNET_VND_H_
> +#define _RMNET_VND_H_
> +
> +int rmnet_vnd_do_flow_control(struct net_device *dev, int enable);
> +struct rmnet_endpoint *rmnet_vnd_get_endpoint(struct net_device
> *dev);
> +int rmnet_vnd_newlink(u8 id, struct net_device *rmnet_dev,
> +		      struct rmnet_real_dev_info *r);
> +int rmnet_vnd_dellink(u8 id, struct rmnet_real_dev_info *r);
> +void rmnet_vnd_rx_fixup(struct sk_buff *skb, struct net_device
> *dev);
> +void rmnet_vnd_tx_fixup(struct sk_buff *skb, struct net_device
> *dev);
> +u8 rmnet_vnd_get_mux(struct net_device *rmnet_dev);
> +void rmnet_vnd_set_mux(struct net_device *rmnet_dev, u8 mux_id);
> +void rmnet_vnd_setup(struct net_device *dev);
> +#endif /* _RMNET_VND_H_ */

^ permalink raw reply

* Re: [GIT] Networking
From: Kalle Valo @ 2017-08-30 14:45 UTC (permalink / raw)
  To: Pavel Machek
  Cc: David Miller, xiyou.wangcong, torvalds, akpm, netdev,
	linux-kernel
In-Reply-To: <20170830094823.GA31503@amd>

Pavel Machek <pavel@ucw.cz> writes:

> Could we get this one in?
>
> wl1251 misses a spin_lock_init().
>
> https://www.mail-archive.com/netdev@vger.kernel.org/msg177031.html
>
> It seems pretty trivial, yet getting the backtraces is not nice.

It's in wireless-drivers-next and will be in 4.14-rc1:

https://git.kernel.org/pub/scm/linux/kernel/git/kvalo/wireless-drivers-next.git/commit/?id=6e9aae179f290f1a44fce7ef8e9a8e2dd68ed1e4

-- 
Kalle Valo

^ permalink raw reply

* Re: [PATCH v2 net-next 0/6] flow_dissector: Protocol specific flow dissector offload
From: Tom Herbert @ 2017-08-30 14:50 UTC (permalink / raw)
  To: Hannes Frederic Sowa; +Cc: David S . Miller, Linux Kernel Network Developers
In-Reply-To: <874lsp8n2x.fsf@stressinduktion.org>

On Wed, Aug 30, 2017 at 1:41 AM, Hannes Frederic Sowa
<hannes@stressinduktion.org> wrote:
> Hello Tom,
>
> Tom Herbert <tom@quantonium.net> writes:
>
>> This patch set adds a new offload type to perform flow dissection for
>> specific protocols (either by EtherType or by IP protocol). This is
>> primary useful to crack open UDP encapsulations (like VXLAN, GUE) for
>> the purposes of parsing the encapsulated packet.
>>
>> Items in this patch set:
>> - Constify skb argument to UDP lookup functions
>> - Create new protocol case in __skb_dissect for ETH_P_TEB. This is based
>>   on the code in the GRE dissect function and the special handling in
>>   GRE can now be removed (it sets protocol to ETH_P_TEB and returns so
>>   goto proto_again is done)
>> - Add infrastructure for protocol specific flow dissection offload
>> - Add infrastructure to perform UDP flow dissection. Uses same model of
>>   GRO where a flow_dissect callback can be associated with a UDP
>>   socket
>> - Use the infrastructure to support flow dissection of VXLAN and GUE
>>
>> Tested:
>>
>> Forced RPS to call flow dissection for VXLAN, FOU, and GUE. Observed
>> that inner packet was being properly dissected.
>>
>> v2: Add signed off
>
> [...]
>
> Can you provide more context on why you did this series? Is the entropy
> insufficient you receive via UDP source ports? I assume this is the case
> for HW RSS hashing but actually not for the software dissector.
>
Hi Hannes,

I think entropy is sufficient looking at UDP source ports, but there
is not universal agreement on that. In any case there are now many
other uses of flow dissector, for those that want DPI like getting TCP
flags, UDP encapsulation is currently a blind spot.

> Btw. we forbid hardware to use L4 information if IP_PROTO is UDP but we
> allow it in RPS (not in IPv6 if flowlabel is present). Your series could
> solve this problem by being more protocol specific and disallow
> fragmentation on a particular quadtuple, very much the same like hw
> encap offload, where we tell the specific port number to the hardware
> and then disallow using L4 information for all other UDP protocols.
>
IMO the fact that HW is protocol specific and operates solely on ports
is a problem (remember Less Is More...). It's better to be protocol
generic and do the socket lookup in SW which no longer has atomic
operations. Matching by bound socket tuple is more accurate than just
a port. However, technically this solution still isn't 100% correct
since it's possible that macvlan or ipvlan may intercede and steer
packet to a namespace where the socket isn't valid.

Tom

^ permalink raw reply

* Re: [PATCH v2 net-next 4/6] udp: flow dissector offload
From: Tom Herbert @ 2017-08-30 14:56 UTC (permalink / raw)
  To: Paolo Abeni; +Cc: David S . Miller, Linux Kernel Network Developers
In-Reply-To: <1504089372.2480.55.camel@redhat.com>

On Wed, Aug 30, 2017 at 3:36 AM, Paolo Abeni <pabeni@redhat.com> wrote:
> On Tue, 2017-08-29 at 16:27 -0700, Tom Herbert wrote:
>> Add support to perform UDP specific flow dissection. This is
>> primarily intended for dissecting encapsulated packets in UDP
>> encapsulation.
>>
>> This patch adds a flow_dissect offload for UDP4 and UDP6. The backend
>> function performs a socket lookup and calls the flow_dissect function
>> if a socket is found.
>>
>> Signed-off-by: Tom Herbert <tom@quantonium.net>
>> ---
>>  include/linux/udp.h      |  8 ++++++++
>>  include/net/udp.h        |  8 ++++++++
>>  include/net/udp_tunnel.h |  8 ++++++++
>>  net/ipv4/udp_offload.c   | 45 +++++++++++++++++++++++++++++++++++++++++++++
>>  net/ipv4/udp_tunnel.c    |  1 +
>>  net/ipv6/udp_offload.c   | 13 +++++++++++++
>>  6 files changed, 83 insertions(+)
>>
>> diff --git a/include/linux/udp.h b/include/linux/udp.h
>> index eaea63bc79bb..2e90b189ef6a 100644
>> --- a/include/linux/udp.h
>> +++ b/include/linux/udp.h
>> @@ -79,6 +79,14 @@ struct udp_sock {
>>       int                     (*gro_complete)(struct sock *sk,
>>                                               struct sk_buff *skb,
>>                                               int nhoff);
>> +     /* Flow dissector function for a UDP socket */
>> +     enum flow_dissect_ret (*flow_dissect)(struct sock *sk,
>> +                     const struct sk_buff *skb,
>> +                     struct flow_dissector_key_control *key_control,
>> +                     struct flow_dissector *flow_dissector,
>> +                     void *target_container, void *data,
>> +                     __be16 *p_proto, u8 *p_ip_proto, int *p_nhoff,
>> +                     int *p_hlen, unsigned int flags);
>>
>>       /* udp_recvmsg try to use this before splicing sk_receive_queue */
>>       struct sk_buff_head     reader_queue ____cacheline_aligned_in_smp;
>> diff --git a/include/net/udp.h b/include/net/udp.h
>> index f3d1de6f0983..499e4faf8b14 100644
>> --- a/include/net/udp.h
>> +++ b/include/net/udp.h
>> @@ -174,6 +174,14 @@ struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb,
>>                                struct udphdr *uh, udp_lookup_t lookup);
>>  int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup);
>>
>> +enum flow_dissect_ret udp_flow_dissect(const struct sk_buff *skb,
>> +                     udp_lookup_t lookup,
>> +                     struct flow_dissector_key_control *key_control,
>> +                     struct flow_dissector *flow_dissector,
>> +                     void *target_container, void *data,
>> +                     __be16 *p_proto, u8 *p_ip_proto, int *p_nhoff,
>> +                     int *p_hlen, unsigned int flags);
>> +
>>  static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb)
>>  {
>>       struct udphdr *uh;
>> diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h
>> index 10cce0dd4450..b7102e0f41a9 100644
>> --- a/include/net/udp_tunnel.h
>> +++ b/include/net/udp_tunnel.h
>> @@ -69,6 +69,13 @@ typedef struct sk_buff **(*udp_tunnel_gro_receive_t)(struct sock *sk,
>>                                                    struct sk_buff *skb);
>>  typedef int (*udp_tunnel_gro_complete_t)(struct sock *sk, struct sk_buff *skb,
>>                                        int nhoff);
>> +typedef enum flow_dissect_ret (*udp_tunnel_flow_dissect_t)(struct sock *sk,
>> +                     const struct sk_buff *skb,
>> +                     struct flow_dissector_key_control *key_control,
>> +                     struct flow_dissector *flow_dissector,
>> +                     void *target_container, void *data,
>> +                     __be16 *p_proto, u8 *p_ip_proto, int *p_nhoff,
>> +                     int *p_hlen, unsigned int flags);
>>
>>  struct udp_tunnel_sock_cfg {
>>       void *sk_user_data;     /* user data used by encap_rcv call back */
>> @@ -78,6 +85,7 @@ struct udp_tunnel_sock_cfg {
>>       udp_tunnel_encap_destroy_t encap_destroy;
>>       udp_tunnel_gro_receive_t gro_receive;
>>       udp_tunnel_gro_complete_t gro_complete;
>> +     udp_tunnel_flow_dissect_t flow_dissect;
>>  };
>>
>>  /* Setup the given (UDP) sock to receive UDP encapsulated packets */
>> diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
>> index 97658bfc1b58..7f0a7ed4a6f7 100644
>> --- a/net/ipv4/udp_offload.c
>> +++ b/net/ipv4/udp_offload.c
>> @@ -328,11 +328,56 @@ static int udp4_gro_complete(struct sk_buff *skb, int nhoff)
>>       return udp_gro_complete(skb, nhoff, udp4_lib_lookup_skb);
>>  }
>>
>> +enum flow_dissect_ret udp_flow_dissect(const struct sk_buff *skb,
>> +                     udp_lookup_t lookup,
>> +                     struct flow_dissector_key_control *key_control,
>> +                     struct flow_dissector *flow_dissector,
>> +                     void *target_container, void *data,
>> +                     __be16 *p_proto, u8 *p_ip_proto, int *p_nhoff,
>> +                     int *p_hlen, unsigned int flags)
>> +{
>> +     enum flow_dissect_ret ret = FLOW_DISSECT_RET_CONTINUE;
>> +     struct udphdr *uh, _uh;
>> +     struct sock *sk;
>> +
>> +     uh = __skb_header_pointer(skb, *p_nhoff, sizeof(_uh), data,
>> +                               *p_hlen, &_uh);
>> +     if (!uh)
>> +             return FLOW_DISSECT_RET_OUT_BAD;
>> +
>> +     rcu_read_lock();
>> +
>> +     sk = (*lookup)(skb, uh->source, uh->dest);
>> +
>> +     if (sk && udp_sk(sk)->flow_dissect)
>> +             ret = udp_sk(sk)->flow_dissect(sk, skb, key_control,
>> +                                            flow_dissector, target_container,
>> +                                            data, p_proto, p_ip_proto,
>> +                                            p_nhoff, p_hlen, flags);
>> +     rcu_read_unlock();
>> +
>> +     return ret;
>> +}
>> +EXPORT_SYMBOL(udp_flow_dissect);
>
> If I read the above correctly, this is going to add another full UDP
> lookup per UDP packet, can we avoid it with some static key enabled by
> vxlan/fou/etc. ?
>
That's a good idea! Should just check udp_encap_needed. Also makes
sense to have in udp_gro_receive.

Tom

> Thanks,
>
> Paolo

^ permalink raw reply

* Re: [PATCH v2 2/3] dt-binding: net: sfp binding documentation
From: Andrew Lunn @ 2017-08-30 14:58 UTC (permalink / raw)
  To: Baruch Siach
  Cc: Sergei Shtylyov, Rob Herring, Mark Rutland, Florian Fainelli,
	David S. Miller, Russell King, netdev, devicetree
In-Reply-To: <20170830112557.dhcl3pv7gmuzg22v@tarshish>

> > > >     Your example shows there's GPIO phandle *and* specifier.
> > > 
> > > Would "GPIO specifier" be enough here?
> > 
> >    No, specifier is the cells following GPIO (or any other) phandle.
> 
> So this should be "GPIO phandle and specifier of ...", is that correct?
> 
> I have found very few (< 4) occurrences of this language in (lots of) '-gpios' 
> property descriptions under Documentation/devicetree/bindings/. Is this a new 
> requirement?

Sometimes it is just easier to refer to another document:

GPIO, as defined in Documentation/devicetree/binding/gpio/gpio.txt

      Andrew

^ permalink raw reply

* Re: [ovs-dev] [PATCH net-next v6 3/3] openvswitch: enable NSH support
From: Hannes Frederic Sowa @ 2017-08-30 15:15 UTC (permalink / raw)
  To: Mooney, Sean K
  Cc: Yang, Yi Y, dev@openvswitch.org, netdev@vger.kernel.org,
	jbenc@redhat.com, e@erig.me
In-Reply-To: <4B1BB321037C0849AAE171801564DFA6888FAED3@IRSMSX107.ger.corp.intel.com>

"Mooney, Sean K" <sean.k.mooney@intel.com> writes:

>> -----Original Message-----
>> From: ovs-dev-bounces@openvswitch.org [mailto:ovs-dev-
>> bounces@openvswitch.org] On Behalf Of Hannes Frederic Sowa
>> Sent: Wednesday, August 30, 2017 10:53 AM
>> To: Yang, Yi Y <yi.y.yang@intel.com>
>> Cc: dev@openvswitch.org; netdev@vger.kernel.org; jbenc@redhat.com;
>> e@erig.me
>> Subject: Re: [ovs-dev] [PATCH net-next v6 3/3] openvswitch: enable NSH
>> support
>> 
>> Hello,
>> 
>> Yi Yang <yi.y.yang@intel.com> writes:
>> 
>> [...]
>> 
>> > +struct ovs_key_nsh {
>> > +	u8 flags;
>> > +	u8 ttl;
>> > +	u8 mdtype;
>> > +	u8 np;
>> > +	__be32 path_hdr;
>> > +	__be32 context[NSH_MD1_CONTEXT_SIZE]; };
>> > +
>> >  struct sw_flow_key {
>> >  	u8 tun_opts[IP_TUNNEL_OPTS_MAX];
>> >  	u8 tun_opts_len;
>> > @@ -144,6 +154,7 @@ struct sw_flow_key {
>> >  			};
>> >  		} ipv6;
>> >  	};
>> > +	struct ovs_key_nsh nsh;         /* network service header */
>> >  	struct {
>> >  		/* Connection tracking fields not packed above. */
>> >  		struct {
>> 
>> Does it makes sense to keep the context headers as part of the flow?
>> What is the reasoning behind it? With mdtype 2 headers this might
>> either not work very well or will increase sw_flow_key size causing
>> slowdowns for all protocols.
> [Mooney, Sean K]
> Having the nsh context headers in the flow is quite useful It would
> allow loadblancing on values stored in the context headers Or other
> use. I belive odl previously used context header 4 to store a Flow id
> so this could potentialy be used with the multipath action to have ovs
> Choose between several possible next hops in the chain.

In OVS, masks are a list(!) for matching. How can this work for
different paths that might require different masks? If they can't be
unified you even get exact matches. Thus, for OVS the context should not
be part of the flow.

> Another example of where this is usefull is branching chains.  if I
> assume that both the classifier and Service function forwarder are
> collocated in ovs on the host, and is send A packet to a firewall
> service function which tags the packet as suspicious Via setting a
> context header metadata field to 1, I as the sdn controller can
> Install a high priority rule that will reclassify the packet as part
> of as separate Service function chain the will prefer dpi on the
> packet before returning it to The original chain if demand not a
> threat.

You can do that with different path id's, too?

> So while a sff dose not in general have to be able to match on the
> context header If I assume I want to use ovs to implenet a classifier
> or service function(e.g. loadblancer) The its desirable to be able to
> both match on the context headers in md type1 and also be able To set
> them(this is something classifies and service fuction are allowed to
> do).

I don't think it is practical at all?

^ permalink raw reply

* Re: [net-next PATCHv6 1/2] dt-bindings: net: Add DT bindings for Socionext Netsec
From: Andrew Lunn @ 2017-08-30 15:19 UTC (permalink / raw)
  To: Jassi Brar
  Cc: netdev-u79uwXL29TY76Z2rM5mHXA, devicetree-u79uwXL29TY76Z2rM5mHXA,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
	davem-fT/PcQaiUtIeIZ0/mPfg9Q, mark.rutland-5wv7dgnIgG8,
	arnd-r2nGTMty4D4, patches-QSEj5FYQhm4dnm+yROfE0A, Jassi Brar,
	robh+dt-DgEjT+Ai2ygdnm+yROfE0A, andy-/Zus8d0mwwtBDgjK7y7TUQ
In-Reply-To: <1504088752-6204-1-git-send-email-jaswinder.singh-QSEj5FYQhm4dnm+yROfE0A@public.gmane.org>

On Wed, Aug 30, 2017 at 03:55:52PM +0530, Jassi Brar wrote:
> This patch adds documentation for Device-Tree bindings for the
> Socionext NetSec Controller driver.
> 
> Signed-off-by: Jassi Brar <jaswinder.singh-QSEj5FYQhm4dnm+yROfE0A@public.gmane.org>
> ---
>  .../devicetree/bindings/net/socionext-netsec.txt   | 46 ++++++++++++++++++++++
>  1 file changed, 46 insertions(+)
>  create mode 100644 Documentation/devicetree/bindings/net/socionext-netsec.txt
> 
> diff --git a/Documentation/devicetree/bindings/net/socionext-netsec.txt b/Documentation/devicetree/bindings/net/socionext-netsec.txt
> new file mode 100644
> index 0000000..12d596c
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/net/socionext-netsec.txt
> @@ -0,0 +1,46 @@
> +* Socionext NetSec Ethernet Controller IP
> +
> +Required properties:
> +- compatible: Should be "socionext,netsecv5"
> +- reg: Address and length of the register sets, the first is the main
> +	registers, then the rdlar and tdlar regions for the SoC
> +- interrupts: Should contain ethernet controller interrupt
> +- clocks: phandle to any clocks to be switched by runtime_pm
> +- phy-mode: See ethernet.txt file in the same directory

> +- max-speed: See ethernet.txt file in the same directory
> +- max-frame-size: See ethernet.txt file in the same directory, if 9000 or
> +	above jumbo frames are enabled
> +- local-mac-address: See ethernet.txt file in the same directory

These three are required, not optimal?

> +- phy-handle: phandle to select child phy
> +
> +Optional properties:
> +- use-jumbo: Boolean property to suggest if jumbo packets should be used or not
> +
> +For the child phy
> +
> +- compatible "ethernet-phy-ieee802.3-c22" is needed

This is normally considered optional. Why require it?

     Andrew
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [iproute PATCH] ss: Fix for added diag support check
From: Stephen Hemminger @ 2017-08-30 15:20 UTC (permalink / raw)
  To: Phil Sutter; +Cc: netdev
In-Reply-To: <20170828173122.11458-1-phil@nwl.cc>

On Mon, 28 Aug 2017 19:31:22 +0200
Phil Sutter <phil@nwl.cc> wrote:

> Commit 9f66764e308e9 ("libnetlink: Add test for error code returned from
> netlink reply") changed rtnl_dump_filter_l() to return an error in case
> NLMSG_DONE would contain one, even if it was ENOENT.
> 
> This in turn breaks ss when it tries to dump DCCP sockets on a system
> without support for it: The function tcp_show(), which is shared between
> TCP and DCCP, will start parsing /proc since inet_show_netlink() returns
> an error - yet it parses /proc/net/tcp which doesn't make sense for DCCP
> sockets at all.
> 
> On my system, a call to 'ss' without further arguments prints the list
> of connected TCP sockets twice.
> 
> Fix this by introducing a dedicated function dccp_show() which does not
> have a fallback to /proc, just like sctp_show(). And since tcp_show()
> is no longer "multi-purpose", drop it's socktype parameter.
> 
> Fixes: 9f66764e308e9 ("libnetlink: Add test for error code returned from netlink reply")
> Signed-off-by: Phil Sutter <phil@nwl.cc>

Applied

^ permalink raw reply

* Re: [PATCH net-next 0/3] tc: act_ife: handle IEEE IFE ethertype as default
From: Stephen Hemminger @ 2017-08-30 15:27 UTC (permalink / raw)
  To: Alexander Aring
  Cc: jhs, yotamg, xiyou.wangcong, jiri, lucasb, netdev,
	linux-kselftest
In-Reply-To: <20170828190315.26646-1-aring@mojatatu.com>

On Mon, 28 Aug 2017 15:03:12 -0400
Alexander Aring <aring@mojatatu.com> wrote:

> Hi,
> 
> this patch series will introduce the IFE ethertype which is registered by
> IEEE. If the netlink act_ife type netlink attribute is not given it will
> use this value by default now.
> At least it will introduce some UAPI testcases to check if the default type
> is used if not specified and vice versa.
> 
> - Alex
> 
> Alexander Aring (3):
>   if_ether: add forces ife lfb type
>   act_ife: use registered ife_type as fallback
>   tc-testing: add test for testing ife type
> 
>  include/uapi/linux/if_ether.h                      |  1 +
>  net/sched/act_ife.c                                | 17 ++------
>  .../tc-testing/tc-tests/actions/tests.json         | 50 ++++++++++++++++++++++
>  3 files changed, 54 insertions(+), 14 deletions(-)
> 

Applied to net-next

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox