Netdev List
 help / color / mirror / Atom feed
* [PATCH 02/31] netfilter: nf_tables: split set destruction in deactivate and destroy phase
From: Pablo Neira Ayuso @ 2018-10-08 23:00 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <20181008230125.2330-1-pablo@netfilter.org>

From: Florian Westphal <fw@strlen.de>

Splits unbind_set into destroy_set and unbinding operation.

Unbinding removes set from lists (so new transaction would not
find it anymore) but keeps memory allocated (so packet path continues
to work).

Rebind function is added to allow unrolling in case transaction
that wants to remove set is aborted.

Destroy function is added to free the memory, but this could occur
outside of transaction in the future.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h |  7 ++++++-
 net/netfilter/nf_tables_api.c     | 36 +++++++++++++++++++++++++-----------
 net/netfilter/nft_dynset.c        | 21 ++++++++++++++++++++-
 net/netfilter/nft_lookup.c        | 20 +++++++++++++++++++-
 net/netfilter/nft_objref.c        | 20 +++++++++++++++++++-
 5 files changed, 89 insertions(+), 15 deletions(-)

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 0f39ac487012..2c33958f3e7a 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -470,6 +470,9 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
 		       struct nft_set_binding *binding);
 void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
 			  struct nft_set_binding *binding);
+void nf_tables_rebind_set(const struct nft_ctx *ctx, struct nft_set *set,
+			  struct nft_set_binding *binding);
+void nf_tables_destroy_set(const struct nft_ctx *ctx, struct nft_set *set);
 
 /**
  *	enum nft_set_extensions - set extension type IDs
@@ -724,7 +727,9 @@ struct nft_expr_type {
  *	@eval: Expression evaluation function
  *	@size: full expression size, including private data size
  *	@init: initialization function
- *	@destroy: destruction function
+ *	@activate: activate expression in the next generation
+ *	@deactivate: deactivate expression in next generation
+ *	@destroy: destruction function, called after synchronize_rcu
  *	@dump: function to dump parameters
  *	@type: expression type
  *	@validate: validate expression, called during loop detection
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 2cfb173cd0b2..220e6aab3fac 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -298,7 +298,7 @@ static int nft_delrule_by_chain(struct nft_ctx *ctx)
 	return 0;
 }
 
-static int nft_trans_set_add(struct nft_ctx *ctx, int msg_type,
+static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type,
 			     struct nft_set *set)
 {
 	struct nft_trans *trans;
@@ -318,7 +318,7 @@ static int nft_trans_set_add(struct nft_ctx *ctx, int msg_type,
 	return 0;
 }
 
-static int nft_delset(struct nft_ctx *ctx, struct nft_set *set)
+static int nft_delset(const struct nft_ctx *ctx, struct nft_set *set)
 {
 	int err;
 
@@ -3567,13 +3567,6 @@ static void nft_set_destroy(struct nft_set *set)
 	kvfree(set);
 }
 
-static void nf_tables_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
-{
-	list_del_rcu(&set->list);
-	nf_tables_set_notify(ctx, set, NFT_MSG_DELSET, GFP_ATOMIC);
-	nft_set_destroy(set);
-}
-
 static int nf_tables_delset(struct net *net, struct sock *nlsk,
 			    struct sk_buff *skb, const struct nlmsghdr *nlh,
 			    const struct nlattr * const nla[],
@@ -3668,17 +3661,38 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
 }
 EXPORT_SYMBOL_GPL(nf_tables_bind_set);
 
-void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
+void nf_tables_rebind_set(const struct nft_ctx *ctx, struct nft_set *set,
 			  struct nft_set_binding *binding)
 {
+	if (list_empty(&set->bindings) && nft_set_is_anonymous(set) &&
+	    nft_is_active(ctx->net, set))
+		list_add_tail_rcu(&set->list, &ctx->table->sets);
+
+	list_add_tail_rcu(&binding->list, &set->bindings);
+}
+EXPORT_SYMBOL_GPL(nf_tables_rebind_set);
+
+void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
+		          struct nft_set_binding *binding)
+{
 	list_del_rcu(&binding->list);
 
 	if (list_empty(&set->bindings) && nft_set_is_anonymous(set) &&
 	    nft_is_active(ctx->net, set))
-		nf_tables_set_destroy(ctx, set);
+		list_del_rcu(&set->list);
 }
 EXPORT_SYMBOL_GPL(nf_tables_unbind_set);
 
+void nf_tables_destroy_set(const struct nft_ctx *ctx, struct nft_set *set)
+{
+	if (list_empty(&set->bindings) && nft_set_is_anonymous(set) &&
+	    nft_is_active(ctx->net, set)) {
+		nf_tables_set_notify(ctx, set, NFT_MSG_DELSET, GFP_ATOMIC);
+		nft_set_destroy(set);
+	}
+}
+EXPORT_SYMBOL_GPL(nf_tables_destroy_set);
+
 const struct nft_set_ext_type nft_set_ext_types[] = {
 	[NFT_SET_EXT_KEY]		= {
 		.align	= __alignof__(u32),
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index 6e91a37d57f2..07d4efd3d851 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -235,14 +235,31 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
 	return err;
 }
 
+static void nft_dynset_activate(const struct nft_ctx *ctx,
+				const struct nft_expr *expr)
+{
+	struct nft_dynset *priv = nft_expr_priv(expr);
+
+	nf_tables_rebind_set(ctx, priv->set, &priv->binding);
+}
+
+static void nft_dynset_deactivate(const struct nft_ctx *ctx,
+				  const struct nft_expr *expr)
+{
+	struct nft_dynset *priv = nft_expr_priv(expr);
+
+	nf_tables_unbind_set(ctx, priv->set, &priv->binding);
+}
+
 static void nft_dynset_destroy(const struct nft_ctx *ctx,
 			       const struct nft_expr *expr)
 {
 	struct nft_dynset *priv = nft_expr_priv(expr);
 
-	nf_tables_unbind_set(ctx, priv->set, &priv->binding);
 	if (priv->expr != NULL)
 		nft_expr_destroy(ctx, priv->expr);
+
+	nf_tables_destroy_set(ctx, priv->set);
 }
 
 static int nft_dynset_dump(struct sk_buff *skb, const struct nft_expr *expr)
@@ -279,6 +296,8 @@ static const struct nft_expr_ops nft_dynset_ops = {
 	.eval		= nft_dynset_eval,
 	.init		= nft_dynset_init,
 	.destroy	= nft_dynset_destroy,
+	.activate	= nft_dynset_activate,
+	.deactivate	= nft_dynset_deactivate,
 	.dump		= nft_dynset_dump,
 };
 
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index ad13e8643599..227b2b15a19c 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -121,12 +121,28 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
 	return 0;
 }
 
+static void nft_lookup_activate(const struct nft_ctx *ctx,
+				const struct nft_expr *expr)
+{
+	struct nft_lookup *priv = nft_expr_priv(expr);
+
+	nf_tables_rebind_set(ctx, priv->set, &priv->binding);
+}
+
+static void nft_lookup_deactivate(const struct nft_ctx *ctx,
+				  const struct nft_expr *expr)
+{
+	struct nft_lookup *priv = nft_expr_priv(expr);
+
+	nf_tables_unbind_set(ctx, priv->set, &priv->binding);
+}
+
 static void nft_lookup_destroy(const struct nft_ctx *ctx,
 			       const struct nft_expr *expr)
 {
 	struct nft_lookup *priv = nft_expr_priv(expr);
 
-	nf_tables_unbind_set(ctx, priv->set, &priv->binding);
+	nf_tables_destroy_set(ctx, priv->set);
 }
 
 static int nft_lookup_dump(struct sk_buff *skb, const struct nft_expr *expr)
@@ -209,6 +225,8 @@ static const struct nft_expr_ops nft_lookup_ops = {
 	.size		= NFT_EXPR_SIZE(sizeof(struct nft_lookup)),
 	.eval		= nft_lookup_eval,
 	.init		= nft_lookup_init,
+	.activate	= nft_lookup_activate,
+	.deactivate	= nft_lookup_deactivate,
 	.destroy	= nft_lookup_destroy,
 	.dump		= nft_lookup_dump,
 	.validate	= nft_lookup_validate,
diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c
index cdf348f751ec..a3185ca2a3a9 100644
--- a/net/netfilter/nft_objref.c
+++ b/net/netfilter/nft_objref.c
@@ -155,12 +155,28 @@ static int nft_objref_map_dump(struct sk_buff *skb, const struct nft_expr *expr)
 	return -1;
 }
 
+static void nft_objref_map_activate(const struct nft_ctx *ctx,
+				    const struct nft_expr *expr)
+{
+	struct nft_objref_map *priv = nft_expr_priv(expr);
+
+	nf_tables_rebind_set(ctx, priv->set, &priv->binding);
+}
+
+static void nft_objref_map_deactivate(const struct nft_ctx *ctx,
+				      const struct nft_expr *expr)
+{
+	struct nft_objref_map *priv = nft_expr_priv(expr);
+
+	nf_tables_unbind_set(ctx, priv->set, &priv->binding);
+}
+
 static void nft_objref_map_destroy(const struct nft_ctx *ctx,
 				   const struct nft_expr *expr)
 {
 	struct nft_objref_map *priv = nft_expr_priv(expr);
 
-	nf_tables_unbind_set(ctx, priv->set, &priv->binding);
+	nf_tables_destroy_set(ctx, priv->set);
 }
 
 static struct nft_expr_type nft_objref_type;
@@ -169,6 +185,8 @@ static const struct nft_expr_ops nft_objref_map_ops = {
 	.size		= NFT_EXPR_SIZE(sizeof(struct nft_objref_map)),
 	.eval		= nft_objref_map_eval,
 	.init		= nft_objref_map_init,
+	.activate	= nft_objref_map_activate,
+	.deactivate	= nft_objref_map_deactivate,
 	.destroy	= nft_objref_map_destroy,
 	.dump		= nft_objref_map_dump,
 };
-- 
2.11.0

^ permalink raw reply related

* [PATCH 03/31] netfilter: nf_tables: warn when expr implements only one of activate/deactivate
From: Pablo Neira Ayuso @ 2018-10-08 23:00 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <20181008230125.2330-1-pablo@netfilter.org>

From: Florian Westphal <fw@strlen.de>

->destroy is only allowed to free data, or do other cleanups that do not
have side effects on other state, such as visibility to other netlink
requests.

Such things need to be done in ->deactivate.
As a transaction can fail, we need to make sure we can undo such
operations, therefore ->activate() has to be provided too.

So print a warning and refuse registration if expr->ops provides
only one of the two operations.

v2: fix nft_expr_check_ops to not repeat same check twice (Jones Desougi)

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 220e6aab3fac..d4c531e0a26f 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -207,6 +207,18 @@ static int nft_delchain(struct nft_ctx *ctx)
 	return err;
 }
 
+/* either expr ops provide both activate/deactivate, or neither */
+static bool nft_expr_check_ops(const struct nft_expr_ops *ops)
+{
+	if (!ops)
+		return true;
+
+	if (WARN_ON_ONCE((!ops->activate ^ !ops->deactivate)))
+		return false;
+
+	return true;
+}
+
 static void nft_rule_expr_activate(const struct nft_ctx *ctx,
 				   struct nft_rule *rule)
 {
@@ -1907,6 +1919,9 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk,
  */
 int nft_register_expr(struct nft_expr_type *type)
 {
+	if (!nft_expr_check_ops(type->ops))
+		return -EINVAL;
+
 	nfnl_lock(NFNL_SUBSYS_NFTABLES);
 	if (type->family == NFPROTO_UNSPEC)
 		list_add_tail_rcu(&type->list, &nf_tables_expressions);
@@ -2054,6 +2069,10 @@ static int nf_tables_expr_parse(const struct nft_ctx *ctx,
 			err = PTR_ERR(ops);
 			goto err1;
 		}
+		if (!nft_expr_check_ops(ops)) {
+			err = -EINVAL;
+			goto err1;
+		}
 	} else
 		ops = type->ops;
 
-- 
2.11.0

^ permalink raw reply related

* [PATCH 01/31] netfilter: nf_tables: rt: allow checking if dst has xfrm attached
From: Pablo Neira Ayuso @ 2018-10-08 23:00 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <20181008230125.2330-1-pablo@netfilter.org>

From: Florian Westphal <fw@strlen.de>

Useful e.g. to avoid NATting inner headers of to-be-encrypted packets.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_tables.h |  2 ++
 net/netfilter/nft_rt.c                   | 11 +++++++++++
 2 files changed, 13 insertions(+)

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index e23290ffdc77..6c44cbbb2cda 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -826,12 +826,14 @@ enum nft_meta_keys {
  * @NFT_RT_NEXTHOP4: routing nexthop for IPv4
  * @NFT_RT_NEXTHOP6: routing nexthop for IPv6
  * @NFT_RT_TCPMSS: fetch current path tcp mss
+ * @NFT_RT_XFRM: boolean, skb->dst->xfrm != NULL
  */
 enum nft_rt_keys {
 	NFT_RT_CLASSID,
 	NFT_RT_NEXTHOP4,
 	NFT_RT_NEXTHOP6,
 	NFT_RT_TCPMSS,
+	NFT_RT_XFRM,
 	__NFT_RT_MAX
 };
 #define NFT_RT_MAX		(__NFT_RT_MAX - 1)
diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c
index 76dba9f6b6f6..f35fa33913ae 100644
--- a/net/netfilter/nft_rt.c
+++ b/net/netfilter/nft_rt.c
@@ -90,6 +90,11 @@ static void nft_rt_get_eval(const struct nft_expr *expr,
 	case NFT_RT_TCPMSS:
 		nft_reg_store16(dest, get_tcpmss(pkt, dst));
 		break;
+#ifdef CONFIG_XFRM
+	case NFT_RT_XFRM:
+		nft_reg_store8(dest, !!dst->xfrm);
+		break;
+#endif
 	default:
 		WARN_ON(1);
 		goto err;
@@ -130,6 +135,11 @@ static int nft_rt_get_init(const struct nft_ctx *ctx,
 	case NFT_RT_TCPMSS:
 		len = sizeof(u16);
 		break;
+#ifdef CONFIG_XFRM
+	case NFT_RT_XFRM:
+		len = sizeof(u8);
+		break;
+#endif
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -164,6 +174,7 @@ static int nft_rt_validate(const struct nft_ctx *ctx, const struct nft_expr *exp
 	case NFT_RT_NEXTHOP4:
 	case NFT_RT_NEXTHOP6:
 	case NFT_RT_CLASSID:
+	case NFT_RT_XFRM:
 		return 0;
 	case NFT_RT_TCPMSS:
 		hooks = (1 << NF_INET_FORWARD) |
-- 
2.11.0

^ permalink raw reply related

* [PATCH 00/31] Netfilter updates for net-next
From: Pablo Neira Ayuso @ 2018-10-08 23:00 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev

Hi David,

The following patchset contains Netfilter updates for your net-next tree:

1) Support for matching on ipsec policy already set in the route, from
   Florian Westphal.

2) Split set destruction into deactivate and destroy phase to make it
   fit better into the transaction infrastructure, also from Florian.
   This includes a patch to warn on imbalance when setting the new
   activate and deactivate interfaces.

3) Release transaction list from the workqueue to remove expensive
   synchronize_rcu() from configuration plane path. This speeds up
   configuration plane quite a bit. From Florian Westphal.

4) Add new xfrm/ipsec extension, this new extension allows you to match
   for ipsec tunnel keys such as source and destination address, spi and
   reqid. From Máté Eckl and Florian Westphal.

5) Add secmark support, this includes connsecmark too, patches
   from Christian Gottsche.

6) Allow to specify remaining bytes in xt_quota, from Chenbo Feng.
   One follow up patch to calm a clang warning for this one, from
   Nathan Chancellor.

7) Flush conntrack entries based on layer 3 family, from Kristian Evensen.

8) New revision for cgroups2 to shrink the path field.

9) Get rid of obsolete need_conntrack(), as a result from recent
   demodularization works.

10) Use WARN_ON instead of BUG_ON, from Florian Westphal.

11) Unused exported symbol in nf_nat_ipv4_fn(), from Florian.

12) Remove superfluous check for timeout netlink parser and dump
    functions in layer 4 conntrack helpers.

13) Unnecessary redundant rcu read side locks in NAT redirect,
    from Taehee Yoo.

14) Pass nf_hook_state structure to error handlers, patch from
    Florian Westphal.

15) Remove ->new() interface from layer 4 protocol trackers. Place
    them in the ->packet() interface. From Florian.

16) Place conntrack ->error() handling in the ->packet() interface.
    Patches from Florian Westphal.

17) Remove unused parameter in the pernet initialization path,
    also from Florian.

18) Remove additional parameter to specify layer 3 protocol when
    looking up for protocol tracker. From Florian.

19) Shrink array of layer 4 protocol trackers, from Florian.

20) Check for linear skb only once from the ALG NAT mangling
    codebase, from Taehee Yoo.

21) Use rhashtable_walk_enter() instead of deprecated
    rhashtable_walk_init(), also from Taehee.

22) No need to flush all conntracks when only one single address
    is gone, from Tan Hu.

23) Remove redundant check for NAT flags in flowtable code, from
    Taehee Yoo.

24) Use rhashtable_lookup() instead of rhashtable_lookup_fast()
    from netfilter codebase, since rcu read lock side is already
    assumed in this path.

You can pull these changes from:

  git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next.git

Thanks.

----------------------------------------------------------------

The following changes since commit a82738adff167593bbb9df90b4201ce4b3407d21:

  ip6_gre: simplify gre header parsing in ip6gre_err (2018-09-16 15:32:59 -0700)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next.git HEAD

for you to fetch changes up to ffa0a9a5903e9fcfde71a0200af30692ac223ef7:

  netfilter: xt_quota: Don't use aligned attribute in sizeof (2018-10-09 00:19:25 +0200)

----------------------------------------------------------------
Chenbo Feng (1):
      netfilter: xt_quota: fix the behavior of xt_quota module

Christian Göttsche (2):
      netfilter: nf_tables: add SECMARK support
      netfilter: nf_tables: add requirements for connsecmark support

Florian Westphal (18):
      netfilter: nf_tables: rt: allow checking if dst has xfrm attached
      netfilter: nf_tables: split set destruction in deactivate and destroy phase
      netfilter: nf_tables: warn when expr implements only one of activate/deactivate
      netfilter: nf_tables: asynchronous release
      netfilter: remove obsolete need_conntrack stub
      netfilter: nf_tables: add xfrm expression
      netfilter: nf_tables: avoid BUG_ON usage
      netfilter: xtables: avoid BUG_ON
      netfilter: nf_nat_ipv4: remove obsolete EXPORT_SYMBOL
      netfilter: conntrack: pass nf_hook_state to packet and error handlers
      netfilter: conntrack: remove the l4proto->new() function
      netfilter: conntrack: deconstify packet callback skb pointer
      netfilter: conntrack: avoid using ->error callback if possible
      netfilter: conntrack: remove error callback and handle icmp from core
      netfilter: conntrack: remove unused proto arg from netns init functions
      netfilter: conntrack: remove l3->l4 mapping information
      netfilter: conntrack: clamp l4proto array size at largers supported protocol
      netfilter: ctnetlink: must check mark attributes vs NULL

Kristian Evensen (1):
      netfilter: ctnetlink: Support L3 protocol-filter on flush

Nathan Chancellor (1):
      netfilter: xt_quota: Don't use aligned attribute in sizeof

Pablo Neira Ayuso (2):
      netfilter: xt_cgroup: shrink size of v2 path
      netfilter: cttimeout: remove superfluous check on layer 4 netlink functions

Taehee Yoo (5):
      netfilter: nat: remove unnecessary rcu_read_lock in nf_nat_redirect_ipv{4/6}
      netfilter: nat: remove duplicate skb_is_nonlinear() in __nf_nat_mangle_tcp_packet()
      netfilter: nf_tables: use rhashtable_walk_enter instead of rhashtable_walk_init
      netfilter: nf_flow_table: remove unnecessary nat flag check code
      netfilter: nf_tables: use rhashtable_lookup() instead of rhashtable_lookup_fast()

Tan Hu (1):
      netfilter: masquerade: don't flush all conntracks if only one address deleted on device

 include/linux/netfilter/nf_conntrack_common.h  |   3 -
 include/net/netfilter/ipv4/nf_conntrack_ipv4.h |  13 +-
 include/net/netfilter/ipv6/nf_conntrack_ipv6.h |  13 --
 include/net/netfilter/nf_conntrack_core.h      |   3 +-
 include/net/netfilter/nf_conntrack_l4proto.h   |  36 ++-
 include/net/netfilter/nf_tables.h              |   9 +-
 include/net/netfilter/nf_tables_core.h         |   4 +
 include/uapi/linux/netfilter/nf_tables.h       |  49 ++++-
 include/uapi/linux/netfilter/xt_cgroup.h       |  16 ++
 include/uapi/linux/netfilter/xt_quota.h        |   8 +-
 net/ipv4/netfilter/nf_nat_l3proto_ipv4.c       |   1 -
 net/ipv4/netfilter/nf_nat_masquerade_ipv4.c    |  22 +-
 net/ipv6/netfilter/ip6t_ipv6header.c           |   5 +-
 net/ipv6/netfilter/ip6t_rt.c                   |  10 +-
 net/ipv6/netfilter/nf_nat_masquerade_ipv6.c    |  19 +-
 net/netfilter/Kconfig                          |   7 +
 net/netfilter/Makefile                         |   1 +
 net/netfilter/nf_conntrack_core.c              | 105 +++++----
 net/netfilter/nf_conntrack_expect.c            |   3 +-
 net/netfilter/nf_conntrack_netlink.c           |  73 +++---
 net/netfilter/nf_conntrack_proto.c             | 117 +++-------
 net/netfilter/nf_conntrack_proto_dccp.c        | 155 +++++--------
 net/netfilter/nf_conntrack_proto_generic.c     |  28 +--
 net/netfilter/nf_conntrack_proto_gre.c         |  44 ++--
 net/netfilter/nf_conntrack_proto_icmp.c        |  78 +++----
 net/netfilter/nf_conntrack_proto_icmpv6.c      |  80 +++----
 net/netfilter/nf_conntrack_proto_sctp.c        | 253 +++++++++------------
 net/netfilter/nf_conntrack_proto_tcp.c         | 251 +++++++++------------
 net/netfilter/nf_conntrack_proto_udp.c         | 236 +++++++++-----------
 net/netfilter/nf_conntrack_standalone.c        |   9 +-
 net/netfilter/nf_flow_table_core.c             |  41 ++--
 net/netfilter/nf_flow_table_ip.c               |   6 +-
 net/netfilter/nf_nat_helper.c                  |   4 +-
 net/netfilter/nf_nat_redirect.c                |   4 -
 net/netfilter/nf_tables_api.c                  | 120 ++++++++--
 net/netfilter/nf_tables_core.c                 |  28 ++-
 net/netfilter/nfnetlink_cttimeout.c            |  59 ++---
 net/netfilter/nft_cmp.c                        |   6 +-
 net/netfilter/nft_ct.c                         |  22 +-
 net/netfilter/nft_dynset.c                     |  21 +-
 net/netfilter/nft_lookup.c                     |  20 +-
 net/netfilter/nft_meta.c                       | 116 ++++++++++
 net/netfilter/nft_objref.c                     |  20 +-
 net/netfilter/nft_reject.c                     |   6 +-
 net/netfilter/nft_rt.c                         |  11 +
 net/netfilter/nft_set_hash.c                   |  38 +---
 net/netfilter/nft_xfrm.c                       | 293 +++++++++++++++++++++++++
 net/netfilter/xt_CT.c                          |   2 +-
 net/netfilter/xt_IDLETIMER.c                   |   4 -
 net/netfilter/xt_SECMARK.c                     |   2 -
 net/netfilter/xt_cgroup.c                      |  72 ++++++
 net/netfilter/xt_quota.c                       |  55 ++---
 net/openvswitch/conntrack.c                    |   8 +-
 53 files changed, 1555 insertions(+), 1054 deletions(-)
 create mode 100644 net/netfilter/nft_xfrm.c

^ permalink raw reply

* [PATCH] isdn/hisax: amd7930_fn: Remove unnecessary parentheses
From: Nathan Chancellor @ 2018-10-08 22:59 UTC (permalink / raw)
  To: Karsten Keil, David S. Miller; +Cc: netdev, linux-kernel, Nathan Chancellor

Clang warns when multiple sets of parentheses are used for a single
conditional statement.

drivers/isdn/hisax/amd7930_fn.c:628:32: warning: equality comparison
with extraneous parentheses [-Wparentheses-equality]
                if ((cs->dc.amd7930.ph_state == 8)) {
                     ~~~~~~~~~~~~~~~~~~~~~~~~^~~~
drivers/isdn/hisax/amd7930_fn.c:628:32: note: remove extraneous
parentheses around the comparison to silence this warning
                if ((cs->dc.amd7930.ph_state == 8)) {
                    ~                        ^   ~
drivers/isdn/hisax/amd7930_fn.c:628:32: note: use '=' to turn this
equality comparison into an assignment
                if ((cs->dc.amd7930.ph_state == 8)) {
                                             ^~
                                             =
1 warning generated.

Signed-off-by: Nathan Chancellor <natechancellor@gmail.com>
---
 drivers/isdn/hisax/amd7930_fn.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/isdn/hisax/amd7930_fn.c b/drivers/isdn/hisax/amd7930_fn.c
index 77debda2221b..6c336366128c 100644
--- a/drivers/isdn/hisax/amd7930_fn.c
+++ b/drivers/isdn/hisax/amd7930_fn.c
@@ -625,7 +625,7 @@ Amd7930_l1hw(struct PStack *st, int pr, void *arg)
 		break;
 	case (HW_RESET | REQUEST):
 		spin_lock_irqsave(&cs->lock, flags);
-		if ((cs->dc.amd7930.ph_state == 8)) {
+		if (cs->dc.amd7930.ph_state == 8) {
 			/* b-channels off, PH-AR cleared
 			 * change to F3 */
 			Amd7930_ph_command(cs, 0x20, "HW_RESET REQUEST"); //LMR1 bit 5
-- 
2.19.0

^ permalink raw reply related

* [PATCH net 07/10] rxrpc: Only take the rwind and mtu values from latest ACK
From: David Howells @ 2018-10-08 22:48 UTC (permalink / raw)
  To: netdev; +Cc: dhowells, pabeni, eric.dumazet, linux-afs, linux-kernel
In-Reply-To: <153903883882.17944.17642727588248415623.stgit@warthog.procyon.org.uk>

Move the out-of-order and duplicate ACK packet check to before the call to
rxrpc_input_ackinfo() so that the receive window size and MTU size are only
checked in the latest ACK packet and don't regress.

Fixes: 248f219cb8bc ("rxrpc: Rewrite the data and ack handling code")
Signed-off-by: David Howells <dhowells@redhat.com>
---

 net/rxrpc/input.c |   19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index af8ce64f4162..04213a65c1ac 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -868,6 +868,16 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb,
 				  rxrpc_propose_ack_respond_to_ack);
 	}
 
+	/* Discard any out-of-order or duplicate ACKs. */
+	if (before_eq(sp->hdr.serial, call->acks_latest)) {
+		_debug("discard ACK %d <= %d",
+		       sp->hdr.serial, call->acks_latest);
+		return;
+	}
+	call->acks_latest_ts = skb->tstamp;
+	call->acks_latest = sp->hdr.serial;
+
+	/* Parse rwind and mtu sizes if provided. */
 	ioffset = offset + nr_acks + 3;
 	if (skb->len >= ioffset + sizeof(buf.info)) {
 		if (skb_copy_bits(skb, ioffset, &buf.info, sizeof(buf.info)) < 0)
@@ -889,15 +899,6 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb,
 		return;
 	}
 
-	/* Discard any out-of-order or duplicate ACKs. */
-	if (before_eq(sp->hdr.serial, call->acks_latest)) {
-		_debug("discard ACK %d <= %d",
-		       sp->hdr.serial, call->acks_latest);
-		return;
-	}
-	call->acks_latest_ts = skb->tstamp;
-	call->acks_latest = sp->hdr.serial;
-
 	if (before(hard_ack, call->tx_hard_ack) ||
 	    after(hard_ack, call->tx_top))
 		return rxrpc_proto_abort("AKW", call, 0);

^ permalink raw reply related

* [PATCH net 02/10] rxrpc: Fix the data_ready handler
From: David Howells @ 2018-10-08 22:47 UTC (permalink / raw)
  To: netdev; +Cc: dhowells, pabeni, eric.dumazet, linux-afs, linux-kernel
In-Reply-To: <153903883882.17944.17642727588248415623.stgit@warthog.procyon.org.uk>

Fix the rxrpc_data_ready() function to pick up all packets and to not miss
any.  There are two problems:

 (1) The sk_data_ready pointer on the UDP socket is set *after* it is
     bound.  This means that it's open for business before we're ready to
     dequeue packets and there's a tiny window exists in which a packet can
     sneak onto the receive queue, but we never know about it.

     Fix this by setting the pointers on the socket prior to binding it.

 (2) skb_recv_udp() will return an error (such as ENETUNREACH) if there was
     an error on the transmission side, even though we set the
     sk_error_report hook.  Because rxrpc_data_ready() returns immediately
     in such a case, it never actually removes its packet from the receive
     queue.

     Fix this by abstracting out the UDP dequeuing and checksumming into a
     separate function that keeps hammering on skb_recv_udp() until it
     returns -EAGAIN, passing the packets extracted to the remainder of the
     function.

and two potential problems:

 (3) It might be possible in some circumstances or in the future for
     packets to be being added to the UDP receive queue whilst rxrpc is
     running consuming them, so the data_ready() handler might get called
     less often than once per packet.

     Allow for this by fully draining the queue on each call as (2).

 (4) If a packet fails the checksum check, the code currently returns after
     discarding the packet without checking for more.

     Allow for this by fully draining the queue on each call as (2).

Fixes: 17926a79320a ("[AF_RXRPC]: Provide secure RxRPC sockets for use by userspace and kernel both")
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Paolo Abeni <pabeni@redhat.com>
---

 net/rxrpc/input.c        |   68 ++++++++++++++++++++++++++--------------------
 net/rxrpc/local_object.c |   11 ++++---
 2 files changed, 44 insertions(+), 35 deletions(-)

diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index c5af9955665b..c3114fa66c92 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -1121,7 +1121,7 @@ int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb)
  * shut down and the local endpoint from going away, thus sk_user_data will not
  * be cleared until this function returns.
  */
-void rxrpc_data_ready(struct sock *udp_sk)
+void rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb)
 {
 	struct rxrpc_connection *conn;
 	struct rxrpc_channel *chan;
@@ -1130,39 +1130,11 @@ void rxrpc_data_ready(struct sock *udp_sk)
 	struct rxrpc_local *local = udp_sk->sk_user_data;
 	struct rxrpc_peer *peer = NULL;
 	struct rxrpc_sock *rx = NULL;
-	struct sk_buff *skb;
 	unsigned int channel;
-	int ret, skew = 0;
+	int skew = 0;
 
 	_enter("%p", udp_sk);
 
-	ASSERT(!irqs_disabled());
-
-	skb = skb_recv_udp(udp_sk, 0, 1, &ret);
-	if (!skb) {
-		if (ret == -EAGAIN)
-			return;
-		_debug("UDP socket error %d", ret);
-		return;
-	}
-
-	if (skb->tstamp == 0)
-		skb->tstamp = ktime_get_real();
-
-	rxrpc_new_skb(skb, rxrpc_skb_rx_received);
-
-	_net("recv skb %p", skb);
-
-	/* we'll probably need to checksum it (didn't call sock_recvmsg) */
-	if (skb_checksum_complete(skb)) {
-		rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
-		__UDP_INC_STATS(sock_net(udp_sk), UDP_MIB_INERRORS, 0);
-		_leave(" [CSUM failed]");
-		return;
-	}
-
-	__UDP_INC_STATS(sock_net(udp_sk), UDP_MIB_INDATAGRAMS, 0);
-
 	/* The UDP protocol already released all skb resources;
 	 * we are free to add our own data there.
 	 */
@@ -1181,6 +1153,8 @@ void rxrpc_data_ready(struct sock *udp_sk)
 		}
 	}
 
+	if (skb->tstamp == 0)
+		skb->tstamp = ktime_get_real();
 	trace_rxrpc_rx_packet(sp);
 
 	switch (sp->hdr.type) {
@@ -1398,3 +1372,37 @@ void rxrpc_data_ready(struct sock *udp_sk)
 	rxrpc_reject_packet(local, skb);
 	_leave(" [badmsg]");
 }
+
+void rxrpc_data_ready(struct sock *udp_sk)
+{
+	struct sk_buff *skb;
+	int ret;
+
+	for (;;) {
+		skb = skb_recv_udp(udp_sk, 0, 1, &ret);
+		if (!skb) {
+			if (ret == -EAGAIN)
+				return;
+
+			/* If there was a transmission failure, we get an error
+			 * here that we need to ignore.
+			 */
+			_debug("UDP socket error %d", ret);
+			continue;
+		}
+
+		rxrpc_new_skb(skb, rxrpc_skb_rx_received);
+
+		/* we'll probably need to checksum it (didn't call sock_recvmsg) */
+		if (skb_checksum_complete(skb)) {
+			rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
+			__UDP_INC_STATS(sock_net(udp_sk), UDP_MIB_INERRORS, 0);
+			_debug("csum failed");
+			continue;
+		}
+
+		__UDP_INC_STATS(sock_net(udp_sk), UDP_MIB_INDATAGRAMS, 0);
+
+		rxrpc_input_packet(udp_sk, skb);
+	}
+}
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index 94d234e9c685..30862f44c9f1 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -122,6 +122,12 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net)
 		return ret;
 	}
 
+	/* set the socket up */
+	sock = local->socket->sk;
+	sock->sk_user_data	= local;
+	sock->sk_data_ready	= rxrpc_data_ready;
+	sock->sk_error_report	= rxrpc_error_report;
+
 	/* if a local address was supplied then bind it */
 	if (local->srx.transport_len > sizeof(sa_family_t)) {
 		_debug("bind");
@@ -191,11 +197,6 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net)
 		BUG();
 	}
 
-	/* set the socket up */
-	sock = local->socket->sk;
-	sock->sk_user_data	= local;
-	sock->sk_data_ready	= rxrpc_data_ready;
-	sock->sk_error_report	= rxrpc_error_report;
 	_leave(" = 0");
 	return 0;
 

^ permalink raw reply related

* [PATCH net 01/10] rxrpc: Fix some missed refs to init_net
From: David Howells @ 2018-10-08 22:47 UTC (permalink / raw)
  To: netdev; +Cc: dhowells, pabeni, eric.dumazet, linux-afs, linux-kernel
In-Reply-To: <153903883882.17944.17642727588248415623.stgit@warthog.procyon.org.uk>

Fix some refs to init_net that should've been changed to the appropriate
network namespace.

Fixes: 2baec2c3f854 ("rxrpc: Support network namespacing")
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Paolo Abeni <pabeni@redhat.com>
---

 net/rxrpc/ar-internal.h |   10 ++++++----
 net/rxrpc/call_accept.c |    2 +-
 net/rxrpc/call_object.c |    4 ++--
 net/rxrpc/conn_client.c |   10 ++++++----
 net/rxrpc/input.c       |    4 ++--
 net/rxrpc/peer_object.c |   28 +++++++++++++++++-----------
 6 files changed, 34 insertions(+), 24 deletions(-)

diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index ef9554131434..63c43b3a2096 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -891,8 +891,9 @@ extern unsigned long rxrpc_conn_idle_client_fast_expiry;
 extern struct idr rxrpc_client_conn_ids;
 
 void rxrpc_destroy_client_conn_ids(void);
-int rxrpc_connect_call(struct rxrpc_call *, struct rxrpc_conn_parameters *,
-		       struct sockaddr_rxrpc *, gfp_t);
+int rxrpc_connect_call(struct rxrpc_sock *, struct rxrpc_call *,
+		       struct rxrpc_conn_parameters *, struct sockaddr_rxrpc *,
+		       gfp_t);
 void rxrpc_expose_client_call(struct rxrpc_call *);
 void rxrpc_disconnect_client_call(struct rxrpc_call *);
 void rxrpc_put_client_conn(struct rxrpc_connection *);
@@ -1045,10 +1046,11 @@ void rxrpc_peer_keepalive_worker(struct work_struct *);
  */
 struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *,
 					 const struct sockaddr_rxrpc *);
-struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *,
+struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *, struct rxrpc_local *,
 				     struct sockaddr_rxrpc *, gfp_t);
 struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *, gfp_t);
-void rxrpc_new_incoming_peer(struct rxrpc_local *, struct rxrpc_peer *);
+void rxrpc_new_incoming_peer(struct rxrpc_sock *, struct rxrpc_local *,
+			     struct rxrpc_peer *);
 void rxrpc_destroy_all_peers(struct rxrpc_net *);
 struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *);
 struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *);
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index 9c7f26d06a52..f55f67894465 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -287,7 +287,7 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx,
 					  (peer_tail + 1) &
 					  (RXRPC_BACKLOG_MAX - 1));
 
-			rxrpc_new_incoming_peer(local, peer);
+			rxrpc_new_incoming_peer(rx, local, peer);
 		}
 
 		/* Now allocate and set up the connection */
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 799f75b6900d..0ca2c2dfd196 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -287,7 +287,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
 	/* Set up or get a connection record and set the protocol parameters,
 	 * including channel number and call ID.
 	 */
-	ret = rxrpc_connect_call(call, cp, srx, gfp);
+	ret = rxrpc_connect_call(rx, call, cp, srx, gfp);
 	if (ret < 0)
 		goto error;
 
@@ -339,7 +339,7 @@ int rxrpc_retry_client_call(struct rxrpc_sock *rx,
 	/* Set up or get a connection record and set the protocol parameters,
 	 * including channel number and call ID.
 	 */
-	ret = rxrpc_connect_call(call, cp, srx, gfp);
+	ret = rxrpc_connect_call(rx, call, cp, srx, gfp);
 	if (ret < 0)
 		goto error;
 
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index 8acf74fe24c0..521189f4b666 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -276,7 +276,8 @@ static bool rxrpc_may_reuse_conn(struct rxrpc_connection *conn)
  * If we return with a connection, the call will be on its waiting list.  It's
  * left to the caller to assign a channel and wake up the call.
  */
-static int rxrpc_get_client_conn(struct rxrpc_call *call,
+static int rxrpc_get_client_conn(struct rxrpc_sock *rx,
+				 struct rxrpc_call *call,
 				 struct rxrpc_conn_parameters *cp,
 				 struct sockaddr_rxrpc *srx,
 				 gfp_t gfp)
@@ -289,7 +290,7 @@ static int rxrpc_get_client_conn(struct rxrpc_call *call,
 
 	_enter("{%d,%lx},", call->debug_id, call->user_call_ID);
 
-	cp->peer = rxrpc_lookup_peer(cp->local, srx, gfp);
+	cp->peer = rxrpc_lookup_peer(rx, cp->local, srx, gfp);
 	if (!cp->peer)
 		goto error;
 
@@ -683,7 +684,8 @@ static int rxrpc_wait_for_channel(struct rxrpc_call *call, gfp_t gfp)
  * find a connection for a call
  * - called in process context with IRQs enabled
  */
-int rxrpc_connect_call(struct rxrpc_call *call,
+int rxrpc_connect_call(struct rxrpc_sock *rx,
+		       struct rxrpc_call *call,
 		       struct rxrpc_conn_parameters *cp,
 		       struct sockaddr_rxrpc *srx,
 		       gfp_t gfp)
@@ -696,7 +698,7 @@ int rxrpc_connect_call(struct rxrpc_call *call,
 	rxrpc_discard_expired_client_conns(&rxnet->client_conn_reaper);
 	rxrpc_cull_active_client_conns(rxnet);
 
-	ret = rxrpc_get_client_conn(call, cp, srx, gfp);
+	ret = rxrpc_get_client_conn(rx, call, cp, srx, gfp);
 	if (ret < 0)
 		goto out;
 
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 800f5b8a1baa..c5af9955665b 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -1156,12 +1156,12 @@ void rxrpc_data_ready(struct sock *udp_sk)
 	/* we'll probably need to checksum it (didn't call sock_recvmsg) */
 	if (skb_checksum_complete(skb)) {
 		rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
-		__UDP_INC_STATS(&init_net, UDP_MIB_INERRORS, 0);
+		__UDP_INC_STATS(sock_net(udp_sk), UDP_MIB_INERRORS, 0);
 		_leave(" [CSUM failed]");
 		return;
 	}
 
-	__UDP_INC_STATS(&init_net, UDP_MIB_INDATAGRAMS, 0);
+	__UDP_INC_STATS(sock_net(udp_sk), UDP_MIB_INDATAGRAMS, 0);
 
 	/* The UDP protocol already released all skb resources;
 	 * we are free to add our own data there.
diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c
index 01a9febfa367..2d39eaf19620 100644
--- a/net/rxrpc/peer_object.c
+++ b/net/rxrpc/peer_object.c
@@ -153,8 +153,10 @@ struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *local,
  * assess the MTU size for the network interface through which this peer is
  * reached
  */
-static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer)
+static void rxrpc_assess_MTU_size(struct rxrpc_sock *rx,
+				  struct rxrpc_peer *peer)
 {
+	struct net *net = sock_net(&rx->sk);
 	struct dst_entry *dst;
 	struct rtable *rt;
 	struct flowi fl;
@@ -169,7 +171,7 @@ static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer)
 	switch (peer->srx.transport.family) {
 	case AF_INET:
 		rt = ip_route_output_ports(
-			&init_net, fl4, NULL,
+			net, fl4, NULL,
 			peer->srx.transport.sin.sin_addr.s_addr, 0,
 			htons(7000), htons(7001), IPPROTO_UDP, 0, 0);
 		if (IS_ERR(rt)) {
@@ -188,7 +190,7 @@ static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer)
 		       sizeof(struct in6_addr));
 		fl6->fl6_dport = htons(7001);
 		fl6->fl6_sport = htons(7000);
-		dst = ip6_route_output(&init_net, NULL, fl6);
+		dst = ip6_route_output(net, NULL, fl6);
 		if (dst->error) {
 			_leave(" [route err %d]", dst->error);
 			return;
@@ -240,10 +242,11 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp)
 /*
  * Initialise peer record.
  */
-static void rxrpc_init_peer(struct rxrpc_peer *peer, unsigned long hash_key)
+static void rxrpc_init_peer(struct rxrpc_sock *rx, struct rxrpc_peer *peer,
+			    unsigned long hash_key)
 {
 	peer->hash_key = hash_key;
-	rxrpc_assess_MTU_size(peer);
+	rxrpc_assess_MTU_size(rx, peer);
 	peer->mtu = peer->if_mtu;
 	peer->rtt_last_req = ktime_get_real();
 
@@ -275,7 +278,8 @@ static void rxrpc_init_peer(struct rxrpc_peer *peer, unsigned long hash_key)
 /*
  * Set up a new peer.
  */
-static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_local *local,
+static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_sock *rx,
+					    struct rxrpc_local *local,
 					    struct sockaddr_rxrpc *srx,
 					    unsigned long hash_key,
 					    gfp_t gfp)
@@ -287,7 +291,7 @@ static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_local *local,
 	peer = rxrpc_alloc_peer(local, gfp);
 	if (peer) {
 		memcpy(&peer->srx, srx, sizeof(*srx));
-		rxrpc_init_peer(peer, hash_key);
+		rxrpc_init_peer(rx, peer, hash_key);
 	}
 
 	_leave(" = %p", peer);
@@ -299,14 +303,15 @@ static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_local *local,
  * since we've already done a search in the list from the non-reentrant context
  * (the data_ready handler) that is the only place we can add new peers.
  */
-void rxrpc_new_incoming_peer(struct rxrpc_local *local, struct rxrpc_peer *peer)
+void rxrpc_new_incoming_peer(struct rxrpc_sock *rx, struct rxrpc_local *local,
+			     struct rxrpc_peer *peer)
 {
 	struct rxrpc_net *rxnet = local->rxnet;
 	unsigned long hash_key;
 
 	hash_key = rxrpc_peer_hash_key(local, &peer->srx);
 	peer->local = local;
-	rxrpc_init_peer(peer, hash_key);
+	rxrpc_init_peer(rx, peer, hash_key);
 
 	spin_lock(&rxnet->peer_hash_lock);
 	hash_add_rcu(rxnet->peer_hash, &peer->hash_link, hash_key);
@@ -317,7 +322,8 @@ void rxrpc_new_incoming_peer(struct rxrpc_local *local, struct rxrpc_peer *peer)
 /*
  * obtain a remote transport endpoint for the specified address
  */
-struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local,
+struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *rx,
+				     struct rxrpc_local *local,
 				     struct sockaddr_rxrpc *srx, gfp_t gfp)
 {
 	struct rxrpc_peer *peer, *candidate;
@@ -337,7 +343,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local,
 		/* The peer is not yet present in hash - create a candidate
 		 * for a new record and then redo the search.
 		 */
-		candidate = rxrpc_create_peer(local, srx, hash_key, gfp);
+		candidate = rxrpc_create_peer(rx, local, srx, hash_key, gfp);
 		if (!candidate) {
 			_leave(" = NULL [nomem]");
 			return NULL;

^ permalink raw reply related

* [PATCH net-next] tcp: refactor DCTCP ECN ACK handling
From: Yuchung Cheng @ 2018-10-08 22:32 UTC (permalink / raw)
  To: davem; +Cc: netdev, edumazet, ncardwell, ysseung, Yuchung Cheng

DCTCP has two parts - a new ECN signalling mechanism and the response
function to it. The first part can be used by other congestion
control for DCTCP-ECN deployed networks. This patch moves that part
into a separate tcp_dctcp.h to be used by other congestion control
module (like how Yeah uses Vegas algorithmas). For example, BBR is
experimenting such ECN signal currently
https://tinyurl.com/ietf-102-iccrg-bbr2

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Yousuk Seung <ysseung@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/ipv4/tcp_dctcp.c | 55 ++++----------------------------------------
 net/ipv4/tcp_dctcp.h | 40 ++++++++++++++++++++++++++++++++
 2 files changed, 44 insertions(+), 51 deletions(-)
 create mode 100644 net/ipv4/tcp_dctcp.h

diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
index ca61e2a659e7..cd4814f7e962 100644
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -44,6 +44,7 @@
 #include <linux/mm.h>
 #include <net/tcp.h>
 #include <linux/inet_diag.h>
+#include "tcp_dctcp.h"
 
 #define DCTCP_MAX_ALPHA	1024U
 
@@ -118,54 +119,6 @@ static u32 dctcp_ssthresh(struct sock *sk)
 	return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->dctcp_alpha) >> 11U), 2U);
 }
 
-/* Minimal DCTP CE state machine:
- *
- * S:	0 <- last pkt was non-CE
- *	1 <- last pkt was CE
- */
-
-static void dctcp_ce_state_0_to_1(struct sock *sk)
-{
-	struct dctcp *ca = inet_csk_ca(sk);
-	struct tcp_sock *tp = tcp_sk(sk);
-
-	if (!ca->ce_state) {
-		/* State has changed from CE=0 to CE=1, force an immediate
-		 * ACK to reflect the new CE state. If an ACK was delayed,
-		 * send that first to reflect the prior CE state.
-		 */
-		if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER)
-			__tcp_send_ack(sk, ca->prior_rcv_nxt);
-		inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
-	}
-
-	ca->prior_rcv_nxt = tp->rcv_nxt;
-	ca->ce_state = 1;
-
-	tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
-}
-
-static void dctcp_ce_state_1_to_0(struct sock *sk)
-{
-	struct dctcp *ca = inet_csk_ca(sk);
-	struct tcp_sock *tp = tcp_sk(sk);
-
-	if (ca->ce_state) {
-		/* State has changed from CE=1 to CE=0, force an immediate
-		 * ACK to reflect the new CE state. If an ACK was delayed,
-		 * send that first to reflect the prior CE state.
-		 */
-		if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER)
-			__tcp_send_ack(sk, ca->prior_rcv_nxt);
-		inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
-	}
-
-	ca->prior_rcv_nxt = tp->rcv_nxt;
-	ca->ce_state = 0;
-
-	tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
-}
-
 static void dctcp_update_alpha(struct sock *sk, u32 flags)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
@@ -230,12 +183,12 @@ static void dctcp_state(struct sock *sk, u8 new_state)
 
 static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev)
 {
+	struct dctcp *ca = inet_csk_ca(sk);
+
 	switch (ev) {
 	case CA_EVENT_ECN_IS_CE:
-		dctcp_ce_state_0_to_1(sk);
-		break;
 	case CA_EVENT_ECN_NO_CE:
-		dctcp_ce_state_1_to_0(sk);
+		dctcp_ece_ack_update(sk, ev, &ca->prior_rcv_nxt, &ca->ce_state);
 		break;
 	default:
 		/* Don't care for the rest. */
diff --git a/net/ipv4/tcp_dctcp.h b/net/ipv4/tcp_dctcp.h
new file mode 100644
index 000000000000..d69a77cbd0c7
--- /dev/null
+++ b/net/ipv4/tcp_dctcp.h
@@ -0,0 +1,40 @@
+#ifndef _TCP_DCTCP_H
+#define _TCP_DCTCP_H
+
+static inline void dctcp_ece_ack_cwr(struct sock *sk, u32 ce_state)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	if (ce_state == 1)
+		tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
+	else
+		tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
+}
+
+/* Minimal DCTP CE state machine:
+ *
+ * S:	0 <- last pkt was non-CE
+ *	1 <- last pkt was CE
+ */
+static inline void dctcp_ece_ack_update(struct sock *sk, enum tcp_ca_event evt,
+					u32 *prior_rcv_nxt, u32 *ce_state)
+{
+	u32 new_ce_state = (evt == CA_EVENT_ECN_IS_CE) ? 1 : 0;
+
+	if (*ce_state != new_ce_state) {
+		/* CE state has changed, force an immediate ACK to
+		 * reflect the new CE state. If an ACK was delayed,
+		 * send that first to reflect the prior CE state.
+		 */
+		if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) {
+			dctcp_ece_ack_cwr(sk, *ce_state);
+			__tcp_send_ack(sk, *prior_rcv_nxt);
+		}
+		inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
+	}
+	*prior_rcv_nxt = tcp_sk(sk)->rcv_nxt;
+	*ce_state = new_ce_state;
+	dctcp_ece_ack_cwr(sk, new_ce_state);
+}
+
+#endif
-- 
2.19.0.605.g01d371f741-goog

^ permalink raw reply related

* Re: [PATCH net 1/2] net: ipv4: update fnhe_pmtu when first hop's MTU changes
From: Sabrina Dubroca @ 2018-10-08 21:42 UTC (permalink / raw)
  To: David Ahern; +Cc: netdev, Stefano Brivio, Ido Schimmel
In-Reply-To: <fa74dac1-bb8e-7892-a48f-1336eda9628f@gmail.com>

2018-10-08, 11:18:49 -0600, David Ahern wrote:
> On 10/8/18 6:36 AM, Sabrina Dubroca wrote:
> > diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> > index c7861e4b402c..dc9d2668d9bb 100644
> > --- a/include/linux/netdevice.h
> > +++ b/include/linux/netdevice.h
> > @@ -2458,6 +2458,13 @@ struct netdev_notifier_info {
> >  	struct netlink_ext_ack	*extack;
> >  };
> >  
> > +struct netdev_notifier_info_ext {
> > +	struct netdev_notifier_info info; /* must be first */
> > +	union {
> > +		u32 u32;
> 
> I realize you want this to be generic, but that is a really odd
> definition. can you make that mtu instead? the union allows other use
> cases to add new names.

It might get ugly if we end up with 4 different u32, but ok, I'll
rename this and we can see how it evolves.

-- 
Sabrina

^ permalink raw reply

* Re: [PATCH net-next] net: core: change bool members of struct net_device to bitfield members
From: Heiner Kallweit @ 2018-10-08 21:21 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: David Miller, netdev@vger.kernel.org
In-Reply-To: <20181008142009.7b32df14@xeon-e3>

On 08.10.2018 23:20, Stephen Hemminger wrote:
> On Mon, 8 Oct 2018 22:00:51 +0200
> Heiner Kallweit <hkallweit1@gmail.com> wrote:
> 
>>   *
>> + *	@uc_promisc:	Counter that indicates promiscuous mode
>> + *			has been enabled due to the need to listen to
>> + *			additional unicast addresses in a device that
>> + *			does not implement ndo_set_rx_mode()
>> + *
> 
> I see you just moved the pre-existing comment, but it the comment
> looks incorrect. uc_promisc is not a counter but a flag. A counter would
> have more than two states normally.
> 
Right. A v2 fixing the comment has been submitted already.

^ permalink raw reply

* Re: [PATCH net-next] net: core: change bool members of struct net_device to bitfield members
From: Stephen Hemminger @ 2018-10-08 21:20 UTC (permalink / raw)
  To: Heiner Kallweit; +Cc: David Miller, netdev@vger.kernel.org
In-Reply-To: <c1d1e04d-693a-b81e-323a-d04b1dcd6544@gmail.com>

On Mon, 8 Oct 2018 22:00:51 +0200
Heiner Kallweit <hkallweit1@gmail.com> wrote:

>   *
> + *	@uc_promisc:	Counter that indicates promiscuous mode
> + *			has been enabled due to the need to listen to
> + *			additional unicast addresses in a device that
> + *			does not implement ndo_set_rx_mode()
> + *

I see you just moved the pre-existing comment, but it the comment
looks incorrect. uc_promisc is not a counter but a flag. A counter would
have more than two states normally.

^ permalink raw reply

* Re: [PATCH net 1/2] net: ipv4: update fnhe_pmtu when first hop's MTU changes
From: kbuild test robot @ 2018-10-08 21:16 UTC (permalink / raw)
  To: Sabrina Dubroca; +Cc: kbuild-all, netdev, Stefano Brivio, Sabrina Dubroca
In-Reply-To: <bdb0235df165b1a9684670be3839962c80c9b45a.1539000663.git.sd@queasysnail.net>

[-- Attachment #1: Type: text/plain, Size: 15775 bytes --]

Hi Sabrina,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on net/master]

url:    https://github.com/0day-ci/linux/commits/Sabrina-Dubroca/net-ipv4-update-fnhe_pmtu-when-first-hop-s-MTU-changes/20181008-225709
reproduce: make htmldocs

All warnings (new ones prefixed by >>):

   net/mac80211/sta_info.h:588: warning: Function parameter or member 'status_stats.retry_failed' not described in 'sta_info'
   net/mac80211/sta_info.h:588: warning: Function parameter or member 'status_stats.retry_count' not described in 'sta_info'
   net/mac80211/sta_info.h:588: warning: Function parameter or member 'status_stats.lost_packets' not described in 'sta_info'
   net/mac80211/sta_info.h:588: warning: Function parameter or member 'status_stats.last_tdls_pkt_time' not described in 'sta_info'
   net/mac80211/sta_info.h:588: warning: Function parameter or member 'status_stats.msdu_retries' not described in 'sta_info'
   net/mac80211/sta_info.h:588: warning: Function parameter or member 'status_stats.msdu_failed' not described in 'sta_info'
   net/mac80211/sta_info.h:588: warning: Function parameter or member 'status_stats.last_ack' not described in 'sta_info'
   net/mac80211/sta_info.h:588: warning: Function parameter or member 'status_stats.last_ack_signal' not described in 'sta_info'
   net/mac80211/sta_info.h:588: warning: Function parameter or member 'status_stats.ack_signal_filled' not described in 'sta_info'
   net/mac80211/sta_info.h:588: warning: Function parameter or member 'status_stats.avg_ack_signal' not described in 'sta_info'
   net/mac80211/sta_info.h:588: warning: Function parameter or member 'tx_stats.packets' not described in 'sta_info'
   net/mac80211/sta_info.h:588: warning: Function parameter or member 'tx_stats.bytes' not described in 'sta_info'
   net/mac80211/sta_info.h:588: warning: Function parameter or member 'tx_stats.last_rate' not described in 'sta_info'
   net/mac80211/sta_info.h:588: warning: Function parameter or member 'tx_stats.msdu' not described in 'sta_info'
   include/linux/dma-buf.h:304: warning: Function parameter or member 'cb_excl.cb' not described in 'dma_buf'
   include/linux/dma-buf.h:304: warning: Function parameter or member 'cb_excl.poll' not described in 'dma_buf'
   include/linux/dma-buf.h:304: warning: Function parameter or member 'cb_excl.active' not described in 'dma_buf'
   include/linux/dma-buf.h:304: warning: Function parameter or member 'cb_shared.cb' not described in 'dma_buf'
   include/linux/dma-buf.h:304: warning: Function parameter or member 'cb_shared.poll' not described in 'dma_buf'
   include/linux/dma-buf.h:304: warning: Function parameter or member 'cb_shared.active' not described in 'dma_buf'
   include/linux/dma-fence-array.h:54: warning: Function parameter or member 'work' not described in 'dma_fence_array'
   include/linux/gpio/driver.h:142: warning: Function parameter or member 'request_key' not described in 'gpio_irq_chip'
   include/linux/iio/hw-consumer.h:1: warning: no structured comments found
   include/linux/input/sparse-keymap.h:46: warning: Function parameter or member 'sw' not described in 'key_entry'
   drivers/pci/pci.c:218: warning: Excess function parameter 'p' description in 'pci_dev_str_match_path'
   include/linux/regulator/driver.h:227: warning: Function parameter or member 'resume' not described in 'regulator_ops'
   drivers/regulator/core.c:4479: warning: Excess function parameter 'state' description in 'regulator_suspend'
   arch/s390/include/asm/cio.h:245: warning: Function parameter or member 'esw.esw0' not described in 'irb'
   arch/s390/include/asm/cio.h:245: warning: Function parameter or member 'esw.esw1' not described in 'irb'
   arch/s390/include/asm/cio.h:245: warning: Function parameter or member 'esw.esw2' not described in 'irb'
   arch/s390/include/asm/cio.h:245: warning: Function parameter or member 'esw.esw3' not described in 'irb'
   arch/s390/include/asm/cio.h:245: warning: Function parameter or member 'esw.eadm' not described in 'irb'
   drivers/slimbus/stream.c:1: warning: no structured comments found
   drivers/target/target_core_device.c:1: warning: no structured comments found
   drivers/usb/typec/bus.c:1: warning: no structured comments found
   drivers/usb/typec/class.c:1: warning: no structured comments found
   include/linux/w1.h:281: warning: Function parameter or member 'of_match_table' not described in 'w1_family'
   fs/direct-io.c:257: warning: Excess function parameter 'offset' description in 'dio_complete'
   fs/file_table.c:1: warning: no structured comments found
   fs/libfs.c:477: warning: Excess function parameter 'available' description in 'simple_write_end'
   fs/posix_acl.c:646: warning: Function parameter or member 'inode' not described in 'posix_acl_update_mode'
   fs/posix_acl.c:646: warning: Function parameter or member 'mode_p' not described in 'posix_acl_update_mode'
   fs/posix_acl.c:646: warning: Function parameter or member 'acl' not described in 'posix_acl_update_mode'
   drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c:183: warning: Function parameter or member 'blockable' not described in 'amdgpu_mn_read_lock'
   drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c:254: warning: Function parameter or member 'blockable' not described in 'amdgpu_mn_invalidate_range_start_gfx'
   drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c:302: warning: Function parameter or member 'blockable' not described in 'amdgpu_mn_invalidate_range_start_hsa'
   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c:3011: warning: Excess function parameter 'dev' description in 'amdgpu_vm_get_task_info'
   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c:3012: warning: Function parameter or member 'adev' not described in 'amdgpu_vm_get_task_info'
   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c:3012: warning: Excess function parameter 'dev' description in 'amdgpu_vm_get_task_info'
   include/drm/drm_drv.h:610: warning: Function parameter or member 'gem_prime_pin' not described in 'drm_driver'
   include/drm/drm_drv.h:610: warning: Function parameter or member 'gem_prime_unpin' not described in 'drm_driver'
   include/drm/drm_drv.h:610: warning: Function parameter or member 'gem_prime_res_obj' not described in 'drm_driver'
   include/drm/drm_drv.h:610: warning: Function parameter or member 'gem_prime_get_sg_table' not described in 'drm_driver'
   include/drm/drm_drv.h:610: warning: Function parameter or member 'gem_prime_import_sg_table' not described in 'drm_driver'
   include/drm/drm_drv.h:610: warning: Function parameter or member 'gem_prime_vmap' not described in 'drm_driver'
   include/drm/drm_drv.h:610: warning: Function parameter or member 'gem_prime_vunmap' not described in 'drm_driver'
   include/drm/drm_drv.h:610: warning: Function parameter or member 'gem_prime_mmap' not described in 'drm_driver'
   drivers/gpu/drm/i915/i915_vma.h:49: warning: cannot understand function prototype: 'struct i915_vma '
   drivers/gpu/drm/i915/i915_vma.h:1: warning: no structured comments found
   drivers/gpu/drm/i915/intel_guc_fwif.h:553: warning: cannot understand function prototype: 'struct guc_log_buffer_state '
   drivers/gpu/drm/i915/i915_trace.h:1: warning: no structured comments found
   include/linux/skbuff.h:860: warning: Function parameter or member 'dev_scratch' not described in 'sk_buff'
   include/linux/skbuff.h:860: warning: Function parameter or member 'list' not described in 'sk_buff'
   include/linux/skbuff.h:860: warning: Function parameter or member 'ip_defrag_offset' not described in 'sk_buff'
   include/linux/skbuff.h:860: warning: Function parameter or member 'skb_mstamp' not described in 'sk_buff'
   include/linux/skbuff.h:860: warning: Function parameter or member '__cloned_offset' not described in 'sk_buff'
   include/linux/skbuff.h:860: warning: Function parameter or member 'head_frag' not described in 'sk_buff'
   include/linux/skbuff.h:860: warning: Function parameter or member '__pkt_type_offset' not described in 'sk_buff'
   include/linux/skbuff.h:860: warning: Function parameter or member 'encapsulation' not described in 'sk_buff'
   include/linux/skbuff.h:860: warning: Function parameter or member 'encap_hdr_csum' not described in 'sk_buff'
   include/linux/skbuff.h:860: warning: Function parameter or member 'csum_valid' not described in 'sk_buff'
   include/linux/skbuff.h:860: warning: Function parameter or member 'csum_complete_sw' not described in 'sk_buff'
   include/linux/skbuff.h:860: warning: Function parameter or member 'csum_level' not described in 'sk_buff'
   include/linux/skbuff.h:860: warning: Function parameter or member 'inner_protocol_type' not described in 'sk_buff'
   include/linux/skbuff.h:860: warning: Function parameter or member 'remcsum_offload' not described in 'sk_buff'
   include/linux/skbuff.h:860: warning: Function parameter or member 'offload_fwd_mark' not described in 'sk_buff'
   include/linux/skbuff.h:860: warning: Function parameter or member 'offload_mr_fwd_mark' not described in 'sk_buff'
   include/linux/skbuff.h:860: warning: Function parameter or member 'sender_cpu' not described in 'sk_buff'
   include/linux/skbuff.h:860: warning: Function parameter or member 'reserved_tailroom' not described in 'sk_buff'
   include/linux/skbuff.h:860: warning: Function parameter or member 'inner_ipproto' not described in 'sk_buff'
   include/net/sock.h:238: warning: Function parameter or member 'skc_addrpair' not described in 'sock_common'
   include/net/sock.h:238: warning: Function parameter or member 'skc_portpair' not described in 'sock_common'
   include/net/sock.h:238: warning: Function parameter or member 'skc_ipv6only' not described in 'sock_common'
   include/net/sock.h:238: warning: Function parameter or member 'skc_net_refcnt' not described in 'sock_common'
   include/net/sock.h:238: warning: Function parameter or member 'skc_v6_daddr' not described in 'sock_common'
   include/net/sock.h:238: warning: Function parameter or member 'skc_v6_rcv_saddr' not described in 'sock_common'
   include/net/sock.h:238: warning: Function parameter or member 'skc_cookie' not described in 'sock_common'
   include/net/sock.h:238: warning: Function parameter or member 'skc_listener' not described in 'sock_common'
   include/net/sock.h:238: warning: Function parameter or member 'skc_tw_dr' not described in 'sock_common'
   include/net/sock.h:238: warning: Function parameter or member 'skc_rcv_wnd' not described in 'sock_common'
   include/net/sock.h:238: warning: Function parameter or member 'skc_tw_rcv_nxt' not described in 'sock_common'
   include/net/sock.h:509: warning: Function parameter or member 'sk_backlog.rmem_alloc' not described in 'sock'
   include/net/sock.h:509: warning: Function parameter or member 'sk_backlog.len' not described in 'sock'
   include/net/sock.h:509: warning: Function parameter or member 'sk_backlog.head' not described in 'sock'
   include/net/sock.h:509: warning: Function parameter or member 'sk_backlog.tail' not described in 'sock'
   include/net/sock.h:509: warning: Function parameter or member 'sk_wq_raw' not described in 'sock'
   include/net/sock.h:509: warning: Function parameter or member 'tcp_rtx_queue' not described in 'sock'
   include/net/sock.h:509: warning: Function parameter or member 'sk_route_forced_caps' not described in 'sock'
   include/net/sock.h:509: warning: Function parameter or member 'sk_txtime_report_errors' not described in 'sock'
   include/net/sock.h:509: warning: Function parameter or member 'sk_validate_xmit_skb' not described in 'sock'
>> net/core/dev.c:1767: warning: Function parameter or member 'arg' not described in 'call_netdevice_notifiers_u32'
   net/core/dev.c:1767: warning: Excess function parameter 'u32' description in 'call_netdevice_notifiers_u32'
   include/linux/netdevice.h:2021: warning: Function parameter or member 'adj_list.upper' not described in 'net_device'
   include/linux/netdevice.h:2021: warning: Function parameter or member 'adj_list.lower' not described in 'net_device'
   include/linux/netdevice.h:2021: warning: Function parameter or member 'gso_partial_features' not described in 'net_device'
   include/linux/netdevice.h:2021: warning: Function parameter or member 'switchdev_ops' not described in 'net_device'
   include/linux/netdevice.h:2021: warning: Function parameter or member 'l3mdev_ops' not described in 'net_device'
   include/linux/netdevice.h:2021: warning: Function parameter or member 'xfrmdev_ops' not described in 'net_device'
   include/linux/netdevice.h:2021: warning: Function parameter or member 'tlsdev_ops' not described in 'net_device'
   include/linux/netdevice.h:2021: warning: Function parameter or member 'name_assign_type' not described in 'net_device'
   include/linux/netdevice.h:2021: warning: Function parameter or member 'ieee802154_ptr' not described in 'net_device'
   include/linux/netdevice.h:2021: warning: Function parameter or member 'mpls_ptr' not described in 'net_device'
   include/linux/netdevice.h:2021: warning: Function parameter or member 'xdp_prog' not described in 'net_device'
   include/linux/netdevice.h:2021: warning: Function parameter or member 'gro_flush_timeout' not described in 'net_device'
   include/linux/netdevice.h:2021: warning: Function parameter or member 'nf_hooks_ingress' not described in 'net_device'
   include/linux/netdevice.h:2021: warning: Function parameter or member '____cacheline_aligned_in_smp' not described in 'net_device'
   include/linux/netdevice.h:2021: warning: Function parameter or member 'qdisc_hash' not described in 'net_device'
   include/linux/netdevice.h:2021: warning: Function parameter or member 'xps_cpus_map' not described in 'net_device'
   include/linux/netdevice.h:2021: warning: Function parameter or member 'xps_rxqs_map' not described in 'net_device'
   include/linux/phylink.h:56: warning: Function parameter or member '__ETHTOOL_DECLARE_LINK_MODE_MASK(advertising' not described in 'phylink_link_state'
   include/linux/phylink.h:56: warning: Function parameter or member '__ETHTOOL_DECLARE_LINK_MODE_MASK(lp_advertising' not described in 'phylink_link_state'
   sound/soc/soc-core.c:2918: warning: Excess function parameter 'legacy_dai_naming' description in 'snd_soc_register_dais'
   Documentation/admin-guide/cgroup-v2.rst:1485: WARNING: Block quote ends without a blank line; unexpected unindent.
   Documentation/admin-guide/cgroup-v2.rst:1487: WARNING: Block quote ends without a blank line; unexpected unindent.
   Documentation/admin-guide/cgroup-v2.rst:1488: WARNING: Block quote ends without a blank line; unexpected unindent.
   Documentation/core-api/boot-time-mm.rst:78: ERROR: Error in "kernel-doc" directive:
   unknown option: "nodocs".

vim +1767 net/core/dev.c

  1754	
  1755	/**
  1756	 *	call_netdevice_notifiers_u32 - call all network notifier blocks
  1757	 *	@val: value passed unmodified to notifier function
  1758	 *      @dev: net_device pointer passed unmodified to notifier function
  1759	 *      @u32: additional u32 argument passed to the notifier function
  1760	 *
  1761	 *	Call all network notifier blocks.  Parameters and return value
  1762	 *	are as for raw_notifier_call_chain().
  1763	 */
  1764	int call_netdevice_notifiers_u32(unsigned long val, struct net_device *dev,
  1765					 u32 arg)
  1766	{
> 1767		struct netdev_notifier_info_ext info = {
  1768			.info.dev = dev,
  1769			.ext.u32 = arg,
  1770		};
  1771	
  1772		BUILD_BUG_ON(offsetof(struct netdev_notifier_info_ext, info) != 0);
  1773	
  1774		return call_netdevice_notifiers_info(val, &info.info);
  1775	}
  1776	EXPORT_SYMBOL(call_netdevice_notifiers_u32);
  1777	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 6570 bytes --]

^ permalink raw reply

* [PATCH net-next] net/ipv6: Make ipv6_route_table_template static
From: David Ahern @ 2018-10-08 21:06 UTC (permalink / raw)
  To: netdev; +Cc: David Ahern

From: David Ahern <dsahern@gmail.com>

ipv6_route_table_template is exported but there are no users outside
of route.c. Make it static.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 include/net/ipv6.h | 2 --
 net/ipv6/route.c   | 2 +-
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index ff33f498c137..829650540780 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -1089,8 +1089,6 @@ static inline int snmp6_unregister_dev(struct inet6_dev *idev) { return 0; }
 #endif
 
 #ifdef CONFIG_SYSCTL
-extern struct ctl_table ipv6_route_table_template[];
-
 struct ctl_table *ipv6_icmp_sysctl_init(struct net *net);
 struct ctl_table *ipv6_route_sysctl_init(struct net *net);
 int ipv6_sysctl_register(void);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 7c38e0e058ae..bf4cd647d8b8 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -5031,7 +5031,7 @@ int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
 	return 0;
 }
 
-struct ctl_table ipv6_route_table_template[] = {
+static struct ctl_table ipv6_route_table_template[] = {
 	{
 		.procname	=	"flush",
 		.data		=	&init_net.ipv6.sysctl.flush_delay,
-- 
2.11.0

^ permalink raw reply related

* [PATCH net-next] rtnetlink: Update comment in rtnl_stats_dump regarding strict data checking
From: David Ahern @ 2018-10-08 20:58 UTC (permalink / raw)
  To: netdev; +Cc: David Ahern

From: David Ahern <dsahern@gmail.com>

The NLM_F_DUMP_PROPER_HDR netlink flag was replaced by a setsockopt.
Update the comment in rtnl_stats_dump.

Fixes: 841891ec0c65 ("rtnetlink: Update rtnl_stats_dump for strict data checking")
Reported-by: Christian Brauner <christian@brauner.io>
Signed-off-by: David Ahern <dsahern@gmail.com>
---
 net/core/rtnetlink.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index c894c4af8981..6406e26171ff 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -4775,8 +4775,8 @@ static int rtnl_stats_dump(struct sk_buff *skb, struct netlink_callback *cb)
 
 	ifsm = nlmsg_data(cb->nlh);
 
-	/* only requests using NLM_F_DUMP_PROPER_HDR can pass data to
-	 * influence the dump. The legacy exception is filter_mask.
+	/* only requests using strict checks can pass data to influence
+	 * the dump. The legacy exception is filter_mask.
 	 */
 	if (cb->strict_check) {
 		if (ifsm->pad1 || ifsm->pad2 || ifsm->ifindex) {
-- 
2.11.0

^ permalink raw reply related

* [PATCH net-next] rtnetlink: Move ifm in valid_fdb_dump_legacy to closer to use
From: David Ahern @ 2018-10-08 20:57 UTC (permalink / raw)
  To: netdev; +Cc: David Ahern

From: David Ahern <dsahern@gmail.com>

Move setting of local variable ifm to after the message parsing in
valid_fdb_dump_legacy. Avoid potential future use of unchecked variable.

Fixes: 8dfbda19a21b ("rtnetlink: Move input checking for rtnl_fdb_dump to helper")
Reported-by: Christian Brauner <christian@brauner.io>
Signed-off-by: David Ahern <dsahern@gmail.com>
---
 net/core/rtnetlink.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 6406e26171ff..46328a10034a 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -3857,7 +3857,6 @@ static int valid_fdb_dump_legacy(const struct nlmsghdr *nlh,
 				 int *br_idx, int *brport_idx,
 				 struct netlink_ext_ack *extack)
 {
-	struct ifinfomsg *ifm = nlmsg_data(nlh);
 	struct nlattr *tb[IFLA_MAX+1];
 	int err;
 
@@ -3871,6 +3870,8 @@ static int valid_fdb_dump_legacy(const struct nlmsghdr *nlh,
 	if (nlmsg_len(nlh) != sizeof(struct ndmsg) &&
 	    (nlmsg_len(nlh) != sizeof(struct ndmsg) +
 	     nla_attr_size(sizeof(u32)))) {
+		struct ifinfomsg *ifm;
+
 		err = nlmsg_parse(nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX,
 				  ifla_policy, extack);
 		if (err < 0) {
@@ -3880,6 +3881,7 @@ static int valid_fdb_dump_legacy(const struct nlmsghdr *nlh,
 				*br_idx = nla_get_u32(tb[IFLA_MASTER]);
 		}
 
+		ifm = nlmsg_data(nlh);
 		*brport_idx = ifm->ifi_index;
 	}
 	return 0;
-- 
2.11.0

^ permalink raw reply related

* [PATCH iproute2-next] libnetlink: fix use-after-free of message buf
From: Vlad Buslov @ 2018-10-08 20:52 UTC (permalink / raw)
  To: netdev; +Cc: Vlad Buslov

In __rtnl_talk_iov() main loop, err is a pointer to memory in dynamically
allocated 'buf' that is used to store netlink messages. If netlink message
is an error message, buf is deallocated before returning with error code.
However, on return err->error code is checked one more time to generate
return value, after memory which err points to has already been
freed. Save error code in temporary variable and use the variable to
generate return value.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
---
 lib/libnetlink.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lib/libnetlink.c b/lib/libnetlink.c
index f8b8fbfd0010..bc8338052e17 100644
--- a/lib/libnetlink.c
+++ b/lib/libnetlink.c
@@ -802,6 +802,7 @@ static int __rtnl_talk_iov(struct rtnl_handle *rtnl, struct iovec *iov,
 
 			if (h->nlmsg_type == NLMSG_ERROR) {
 				struct nlmsgerr *err = (struct nlmsgerr *)NLMSG_DATA(h);
+				int error = err->error;
 
 				if (l < sizeof(struct nlmsgerr)) {
 					fprintf(stderr, "ERROR truncated\n");
@@ -825,7 +826,7 @@ static int __rtnl_talk_iov(struct rtnl_handle *rtnl, struct iovec *iov,
 				else
 					free(buf);
 
-				return err->error ? -i : 0;
+				return error ? -i : 0;
 			}
 
 			if (answer) {
-- 
2.7.5

^ permalink raw reply related

* [PATCH 2/2] 9p/trans_fd: put worker reqs on destroy
From: Dominique Martinet @ 2018-10-09  4:05 UTC (permalink / raw)
  Cc: Dominique Martinet, v9fs-developer, netdev, linux-kernel,
	Eric Van Hensbergen, Latchesar Ionkov, Tomas Bortoli
In-Reply-To: <1539057956-23741-1-git-send-email-asmadeus@codewreck.org>

From: Dominique Martinet <dominique.martinet@cea.fr>

p9_read_work/p9_write_work might still hold references to a req after
having been cancelled; make sure we put any of these to avoid potential
request leak on disconnect.

Fixes: 728356dedeff8 ("9p: Add refcount to p9_req_t")
Signed-off-by: Dominique Martinet <dominique.martinet@cea.fr>
Cc: Eric Van Hensbergen <ericvh@gmail.com>
Cc: Latchesar Ionkov <lucho@ionkov.net>
Cc: Tomas Bortoli <tomasbortoli@gmail.com>
---
Noticed we could leak a ref while looking at the syzbot report,
this should be safe enough after the work has been cancelled...
Probably.

 net/9p/trans_fd.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index a0317d459cde..f868cf6fba79 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -876,7 +876,15 @@ static void p9_conn_destroy(struct p9_conn *m)
 
 	p9_mux_poll_stop(m);
 	cancel_work_sync(&m->rq);
+	if (m->rreq) {
+		p9_req_put(m->rreq);
+		m->rreq = NULL;
+	}
 	cancel_work_sync(&m->wq);
+	if (m->wreq) {
+		p9_req_put(m->wreq);
+		m->wreq = NULL;
+	}
 
 	p9_conn_cancel(m, -ECONNRESET);
 
-- 
2.19.1

^ permalink raw reply related

* [PATCH 1/2] 9p/trans_fd: abort p9_read_work if req status changed
From: Dominique Martinet @ 2018-10-09  4:05 UTC (permalink / raw)
  Cc: Dominique Martinet, v9fs-developer, netdev, linux-kernel,
	syzkaller-bugs, Eric Van Hensbergen, Latchesar Ionkov
In-Reply-To: <20181009020949.GA29622@nautica>

From: Dominique Martinet <dominique.martinet@cea.fr>

p9_read_work would try to handle an errored req even if it got put to
error state by another thread between the lookup (that worked) and the
time it had been fully read.
The request itself is safe to use because we hold a ref to it from the
lookup (for m->rreq, so it was safe to read into the request data buffer
until this point), but the req_list has been deleted at the same time
status changed, and client_cb already has been called as well, so we
should not do either.

Signed-off-by: Dominique Martinet <dominique.martinet@cea.fr>
Reported-by: syzbot+2222c34dc40b515f30dc@syzkaller.appspotmail.com
Cc: Eric Van Hensbergen <ericvh@gmail.com>
Cc: Latchesar Ionkov <lucho@ionkov.net>
---

As written in reply to the bug report I'm not sure it's what syzbot
complained about exactly, but it feels like a correct thing to do.
The second patch is unrelated to the syzbot report, but something that
occured to me while looking at this ; I'll take both into linux-next
around the start of next week after getting some proper testing done
unless remarks happen.
(they pass basic tests already)

 net/9p/trans_fd.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 12559c474dde..a0317d459cde 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -292,7 +292,6 @@ static void p9_read_work(struct work_struct *work)
 	__poll_t n;
 	int err;
 	struct p9_conn *m;
-	int status = REQ_STATUS_ERROR;
 
 	m = container_of(work, struct p9_conn, rq);
 
@@ -375,11 +374,17 @@ static void p9_read_work(struct work_struct *work)
 		p9_debug(P9_DEBUG_TRANS, "got new packet\n");
 		m->rreq->rc.size = m->rc.offset;
 		spin_lock(&m->client->lock);
-		if (m->rreq->status != REQ_STATUS_ERROR)
-			status = REQ_STATUS_RCVD;
-		list_del(&m->rreq->req_list);
-		/* update req->status while holding client->lock  */
-		p9_client_cb(m->client, m->rreq, status);
+		if (m->rreq->status == REQ_STATUS_SENT) {
+			list_del(&m->rreq->req_list);
+			p9_client_cb(m->client, m->rreq, REQ_STATUS_RCVD);
+		} else {
+			spin_unlock(&m->client->lock);
+			p9_debug(P9_DEBUG_ERROR,
+				 "Request tag %d errored out while we were reading the reply\n",
+				 m->rc.tag);
+			err = -EIO;
+			goto error;
+		}
 		spin_unlock(&m->client->lock);
 		m->rc.sdata = NULL;
 		m->rc.offset = 0;
-- 
2.19.1

^ permalink raw reply related

* Re: [PATCH net-next] net/ipv6: stop leaking percpu memory in fib6 info
From: Mike Rapoport @ 2018-10-09  4:03 UTC (permalink / raw)
  To: David Ahern; +Cc: David S. Miller, netdev, stable
In-Reply-To: <8b36e7c9-0f9b-64c4-f7d6-1d1a92097eaa@gmail.com>

On Mon, Oct 08, 2018 at 12:15:54PM -0600, David Ahern wrote:
> On 10/8/18 6:06 AM, Mike Rapoport wrote:
> > The fib6_info_alloc() function allocates percpu memory to hold per CPU
> > pointers to rt6_info, but this memory is never freed. Fix it.
> > 
> > Fixes: a64efe142f5e ("net/ipv6: introduce fib6_info struct and helpers")
> > 
> > Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
> > Cc: stable@vger.kernel.org
> > ---
> >  net/ipv6/ip6_fib.c | 2 ++
> >  1 file changed, 2 insertions(+)
> > 
> > diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
> > index cf709eadc932..cc7de7eb8b9c 100644
> > --- a/net/ipv6/ip6_fib.c
> > +++ b/net/ipv6/ip6_fib.c
> > @@ -194,6 +194,8 @@ void fib6_info_destroy_rcu(struct rcu_head *head)
> >  				*ppcpu_rt = NULL;
> >  			}
> >  		}
> > +
> > +		free_percpu(f6i->rt6i_pcpu);
> >  	}
> >  
> >  	lwtstate_put(f6i->fib6_nh.nh_lwtstate);
> > 
> 
> Odd that KMEMLEAK is not detecting this. Thanks for the fix.

There's a comment in kmemleak that says:

	/*
	 * Percpu allocations are only scanned and not reported as leaks
	 * (min_count is set to 0).
	 */

No idea why, though...
 
> Reviewed-by: David Ahern <dsahern@gmail.com>
> 

-- 
Sincerely yours,
Mike.

^ permalink raw reply

* [RFC PATCH 2/2] net/ncsi: Configure multi-package, multi-channel modes with failover
From: Samuel Mendoza-Jonas @ 2018-10-09  3:58 UTC (permalink / raw)
  To: netdev
  Cc: Samuel Mendoza-Jonas, David S . Miller, linux-kernel, openbmc,
	Justin.Lee1
In-Reply-To: <20181009035815.5246-1-sam@mendozajonas.com>

This patch extends the ncsi-netlink interface with two new commands and
three new attributes to configure multiple packages and/or channels at
once, and configure specific failover modes.

NCSI_CMD_SET_PACKAGE mask and NCSI_CMD_SET_CHANNEL_MASK set a whitelist
of packages or channels allowed to be configured with the
NCSI_ATTR_PACKAGE_MASK and NCSI_ATTR_CHANNEL_MASK attributes
respectively. If one of these whitelists is set only packages or
channels matching the whitelist are considered for the channel queue in
ncsi_choose_active_channel().

These commands may also use the NCSI_ATTR_MULTI_FLAG to signal that
multiple packages or channels may be configured simultaneously. NCSI
hardware arbitration (HWA) must be available in order to enable
multi-package mode. Multi-channel mode is always available.

If the NCSI_ATTR_CHANNEL_ID attribute is present in the
NCSI_CMD_SET_CHANNEL_MASK command the it sets the preferred channel as
with the NCSI_CMD_SET_INTERFACE command. The combination of preferred
channel and channel whitelist defines a primary channel and the allowed
failover channels.
If the NCSI_ATTR_MULTI_FLAG attribute is also present then the preferred
channel is configured for Tx/Rx and the other channels are enabled only
for Rx.

Signed-off-by: Samuel Mendoza-Jonas <sam@mendozajonas.com>
---
 include/uapi/linux/ncsi.h |  16 +++
 net/ncsi/internal.h       |  11 +-
 net/ncsi/ncsi-aen.c       |   2 +-
 net/ncsi/ncsi-manage.c    | 138 ++++++++++++++++--------
 net/ncsi/ncsi-netlink.c   | 217 +++++++++++++++++++++++++++++++++-----
 net/ncsi/ncsi-rsp.c       |   2 +-
 6 files changed, 312 insertions(+), 74 deletions(-)

diff --git a/include/uapi/linux/ncsi.h b/include/uapi/linux/ncsi.h
index 4c292ecbb748..035fba1693f9 100644
--- a/include/uapi/linux/ncsi.h
+++ b/include/uapi/linux/ncsi.h
@@ -23,6 +23,13 @@
  *	optionally the preferred NCSI_ATTR_CHANNEL_ID.
  * @NCSI_CMD_CLEAR_INTERFACE: clear any preferred package/channel combination.
  *	Requires NCSI_ATTR_IFINDEX.
+ * @NCSI_CMD_SET_PACKAGE_MASK: set a whitelist of allowed packages.
+ * @NCSI_CMD_SET_PACKAGE_MASK: set a whitelist of allowed channels.
+ *	Requires NCSI_ATTR_IFINDEX and NCSI_ATTR_PACKAGE_MASK.
+ * @NCSI_CMD_SET_PACKAGE_MASK: set a whitelist of allowed channels.
+ *	Requires NCSI_ATTR_IFINDEX, NCSI_ATTR_PACKAGE_ID, and
+ *	NCSI_ATTR_CHANNEL_MASK. If NCSI_ATTR_CHANNEL_ID is present it sets
+ *	the primary channel.
  * @NCSI_CMD_MAX: highest command number
  */
 enum ncsi_nl_commands {
@@ -30,6 +37,8 @@ enum ncsi_nl_commands {
 	NCSI_CMD_PKG_INFO,
 	NCSI_CMD_SET_INTERFACE,
 	NCSI_CMD_CLEAR_INTERFACE,
+	NCSI_CMD_SET_PACKAGE_MASK,
+	NCSI_CMD_SET_CHANNEL_MASK,
 
 	__NCSI_CMD_AFTER_LAST,
 	NCSI_CMD_MAX = __NCSI_CMD_AFTER_LAST - 1
@@ -43,6 +52,10 @@ enum ncsi_nl_commands {
  * @NCSI_ATTR_PACKAGE_LIST: nested array of NCSI_PKG_ATTR attributes
  * @NCSI_ATTR_PACKAGE_ID: package ID
  * @NCSI_ATTR_CHANNEL_ID: channel ID
+ * @NCSI_ATTR_MULTI_FLAG: flag to signal that multi-mode should be enabled with
+ *	NCSI_CMD_SET_PACKAGE_MASK or NCSI_CMD_SET_CHANNEL_MASK.
+ * @NCSI_ATTR_PACKAGE_MASK: 32-bit mask of allowed packages.
+ * @NCSI_ATTR_CHANNEL_MASK: 32-bit mask of allowed channels.
  * @NCSI_ATTR_MAX: highest attribute number
  */
 enum ncsi_nl_attrs {
@@ -51,6 +64,9 @@ enum ncsi_nl_attrs {
 	NCSI_ATTR_PACKAGE_LIST,
 	NCSI_ATTR_PACKAGE_ID,
 	NCSI_ATTR_CHANNEL_ID,
+	NCSI_ATTR_MULTI_FLAG,
+	NCSI_ATTR_PACKAGE_MASK,
+	NCSI_ATTR_CHANNEL_MASK,
 
 	__NCSI_ATTR_AFTER_LAST,
 	NCSI_ATTR_MAX = __NCSI_ATTR_AFTER_LAST - 1
diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h
index 3d0a33b874f5..8437474d0a78 100644
--- a/net/ncsi/internal.h
+++ b/net/ncsi/internal.h
@@ -213,6 +213,10 @@ struct ncsi_package {
 	unsigned int         channel_num; /* Number of channels     */
 	struct list_head     channels;    /* List of chanels        */
 	struct list_head     node;        /* Form list of packages  */
+
+	bool                 multi_channel; /* Enable multiple channels  */
+	u32                  channel_whitelist; /* Channels to configure */
+	struct ncsi_channel  *preferred_channel; /* Primary channel      */
 };
 
 struct ncsi_request {
@@ -280,8 +284,6 @@ struct ncsi_dev_priv {
 	unsigned int        package_num;     /* Number of packages         */
 	struct list_head    packages;        /* List of packages           */
 	struct ncsi_channel *hot_channel;    /* Channel was ever active    */
-	struct ncsi_package *force_package;  /* Force a specific package   */
-	struct ncsi_channel *force_channel;  /* Force a specific channel   */
 	struct ncsi_request requests[256];   /* Request table              */
 	unsigned int        request_id;      /* Last used request ID       */
 #define NCSI_REQ_START_IDX	1
@@ -294,6 +296,9 @@ struct ncsi_dev_priv {
 	struct list_head    node;            /* Form NCSI device list      */
 #define NCSI_MAX_VLAN_VIDS	15
 	struct list_head    vlan_vids;       /* List of active VLAN IDs */
+
+	bool                multi_package;   /* Enable multiple packages   */
+	u32                 package_whitelist; /* Packages to configure    */
 };
 
 struct ncsi_cmd_arg {
@@ -345,6 +350,8 @@ struct ncsi_request *ncsi_alloc_request(struct ncsi_dev_priv *ndp,
 void ncsi_free_request(struct ncsi_request *nr);
 struct ncsi_dev *ncsi_find_dev(struct net_device *dev);
 int ncsi_process_next_channel(struct ncsi_dev_priv *ndp);
+bool ncsi_channel_is_last(struct ncsi_dev_priv *ndp,
+			  struct ncsi_channel *channel);
 
 /* Packet handlers */
 u32 ncsi_calculate_checksum(unsigned char *data, int len);
diff --git a/net/ncsi/ncsi-aen.c b/net/ncsi/ncsi-aen.c
index 65f47a648be3..eac56aee30c4 100644
--- a/net/ncsi/ncsi-aen.c
+++ b/net/ncsi/ncsi-aen.c
@@ -86,7 +86,7 @@ static int ncsi_aen_handler_lsc(struct ncsi_dev_priv *ndp,
 	    !(state == NCSI_CHANNEL_ACTIVE && !(data & 0x1)))
 		return 0;
 
-	if (state == NCSI_CHANNEL_ACTIVE)
+	if (state == NCSI_CHANNEL_ACTIVE && ncsi_channel_is_last(ndp, nc))
 		ndp->flags |= NCSI_DEV_RESHUFFLE;
 
 	ncsi_stop_channel_monitor(nc);
diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
index 665bee25ec44..6a55df700bcb 100644
--- a/net/ncsi/ncsi-manage.c
+++ b/net/ncsi/ncsi-manage.c
@@ -27,6 +27,24 @@
 LIST_HEAD(ncsi_dev_list);
 DEFINE_SPINLOCK(ncsi_dev_lock);
 
+/* Returns true if the given channel is the last channel available */
+bool ncsi_channel_is_last(struct ncsi_dev_priv *ndp,
+			  struct ncsi_channel *channel)
+{
+	struct ncsi_package *np;
+	struct ncsi_channel *nc;
+
+	NCSI_FOR_EACH_PACKAGE(ndp, np)
+		NCSI_FOR_EACH_CHANNEL(np, nc) {
+			if (nc == channel)
+				continue;
+			if (nc->state == NCSI_CHANNEL_ACTIVE)
+				return false;
+		}
+
+	return true;
+}
+
 static void ncsi_report_link(struct ncsi_dev_priv *ndp, bool force_down)
 {
 	struct ncsi_dev *nd = &ndp->ndev;
@@ -266,6 +284,7 @@ struct ncsi_package *ncsi_add_package(struct ncsi_dev_priv *ndp,
 	np->ndp = ndp;
 	spin_lock_init(&np->lock);
 	INIT_LIST_HEAD(&np->channels);
+	np->channel_whitelist = UINT_MAX;
 
 	spin_lock_irqsave(&ndp->lock, flags);
 	tmp = ncsi_find_package(ndp, id);
@@ -633,6 +652,34 @@ static int set_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc,
 	return 0;
 }
 
+/* Determine if a given channel should be the Tx channel */
+bool ncsi_channel_is_tx(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc)
+{
+	struct ncsi_package *np = nc->package;
+	struct ncsi_channel *channel;
+	struct ncsi_channel_mode *ncm;
+
+	NCSI_FOR_EACH_CHANNEL(np, channel) {
+		ncm = &channel->modes[NCSI_MODE_TX_ENABLE];
+		/* Another channel is already Tx */
+		if (ncm->enable)
+			return false;
+	}
+
+	if (!np->preferred_channel)
+		return true;
+
+	if (np->preferred_channel == nc)
+		return true;
+
+	/* The preferred channel is not in the queue and not active */
+	if (list_empty(&np->preferred_channel->link) &&
+	    np->preferred_channel->state != NCSI_CHANNEL_ACTIVE)
+		return true;
+
+	return false;
+}
+
 static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
 {
 	struct ncsi_dev *nd = &ndp->ndev;
@@ -745,18 +792,22 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
 		} else if (nd->state == ncsi_dev_state_config_ebf) {
 			nca.type = NCSI_PKT_CMD_EBF;
 			nca.dwords[0] = nc->caps[NCSI_CAP_BC].cap;
-			nd->state = ncsi_dev_state_config_ecnt;
+			if (ncsi_channel_is_tx(ndp, nc))
+				nd->state = ncsi_dev_state_config_ecnt;
+			else
+				nd->state = ncsi_dev_state_config_ec;
 #if IS_ENABLED(CONFIG_IPV6)
 			if (ndp->inet6_addr_num > 0 &&
 			    (nc->caps[NCSI_CAP_GENERIC].cap &
 			     NCSI_CAP_GENERIC_MC))
 				nd->state = ncsi_dev_state_config_egmf;
-			else
-				nd->state = ncsi_dev_state_config_ecnt;
 		} else if (nd->state == ncsi_dev_state_config_egmf) {
 			nca.type = NCSI_PKT_CMD_EGMF;
 			nca.dwords[0] = nc->caps[NCSI_CAP_MC].cap;
-			nd->state = ncsi_dev_state_config_ecnt;
+			if (ncsi_channel_is_tx(ndp, nc))
+				nd->state = ncsi_dev_state_config_ecnt;
+			else
+				nd->state = ncsi_dev_state_config_ec;
 #endif /* CONFIG_IPV6 */
 		} else if (nd->state == ncsi_dev_state_config_ecnt) {
 			nca.type = NCSI_PKT_CMD_ECNT;
@@ -840,43 +891,35 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
 
 static int ncsi_choose_active_channel(struct ncsi_dev_priv *ndp)
 {
-	struct ncsi_package *np, *force_package;
-	struct ncsi_channel *nc, *found, *hot_nc, *force_channel;
+	struct ncsi_package *np;
+	struct ncsi_channel *nc, *found, *hot_nc;
 	struct ncsi_channel_mode *ncm;
-	unsigned long flags;
+	unsigned long flags, cflags;
+	bool with_link;
 
 	spin_lock_irqsave(&ndp->lock, flags);
 	hot_nc = ndp->hot_channel;
-	force_channel = ndp->force_channel;
-	force_package = ndp->force_package;
 	spin_unlock_irqrestore(&ndp->lock, flags);
 
-	/* Force a specific channel whether or not it has link if we have been
-	 * configured to do so
-	 */
-	if (force_package && force_channel) {
-		found = force_channel;
-		ncm = &found->modes[NCSI_MODE_LINK];
-		if (!(ncm->data[2] & 0x1))
-			netdev_info(ndp->ndev.dev,
-				    "NCSI: Channel %u forced, but it is link down\n",
-				    found->id);
-		goto out;
-	}
-
-	/* The search is done once an inactive channel with up
-	 * link is found.
+	/* By default the search is done once an inactive channel with up
+	 * link is found, unless a preferred channel is set.
+	 * If multi_package or multi_channel are configured all channels in the
+	 * whitelist with link are added to the channel queue.
 	 */
 	found = NULL;
+	with_link = false;
 	NCSI_FOR_EACH_PACKAGE(ndp, np) {
-		if (ndp->force_package && np != ndp->force_package)
+		if (!(ndp->package_whitelist & (0x1 << np->id)))
 			continue;
 		NCSI_FOR_EACH_CHANNEL(np, nc) {
-			spin_lock_irqsave(&nc->lock, flags);
+			if (!(np->channel_whitelist & (0x1 << nc->id)))
+				continue;
+
+			spin_lock_irqsave(&nc->lock, cflags);
 
 			if (!list_empty(&nc->link) ||
 			    nc->state != NCSI_CHANNEL_INACTIVE) {
-				spin_unlock_irqrestore(&nc->lock, flags);
+				spin_unlock_irqrestore(&nc->lock, cflags);
 				continue;
 			}
 
@@ -888,32 +931,42 @@ static int ncsi_choose_active_channel(struct ncsi_dev_priv *ndp)
 
 			ncm = &nc->modes[NCSI_MODE_LINK];
 			if (ncm->data[2] & 0x1) {
-				spin_unlock_irqrestore(&nc->lock, flags);
 				found = nc;
-				goto out;
+				with_link = true;
+
+				spin_lock_irqsave(&ndp->lock, flags);
+				list_add_tail_rcu(&found->link,
+						  &ndp->channel_queue);
+				spin_unlock_irqrestore(&ndp->lock, flags);
+
+				netdev_dbg(ndp->ndev.dev,
+					   "NCSI: Channel %u added to queue (link %s)\n",
+					   found->id,
+					   ncm->data[2] & 0x1 ? "up" : "down");
 			}
+			spin_unlock_irqrestore(&nc->lock, cflags);
 
-			spin_unlock_irqrestore(&nc->lock, flags);
+			if (with_link && !np->multi_channel)
+				break;
 		}
+		if (with_link && !ndp->multi_package)
+			break;
 	}
 
-	if (!found) {
+	if (!with_link && found) {
+		netdev_info(ndp->ndev.dev,
+			    "NCSI: No channel with link found, configuring channel %u\n",
+			    found->id);
+		spin_lock_irqsave(&ndp->lock, flags);
+		list_add_tail_rcu(&found->link, &ndp->channel_queue);
+		spin_unlock_irqrestore(&ndp->lock, flags);
+	} else if (!found) {
 		netdev_warn(ndp->ndev.dev,
-			    "NCSI: No channel found with link\n");
+			    "NCSI: No channel found to configure!\n");
 		ncsi_report_link(ndp, true);
 		return -ENODEV;
 	}
 
-	ncm = &found->modes[NCSI_MODE_LINK];
-	netdev_dbg(ndp->ndev.dev,
-		   "NCSI: Channel %u added to queue (link %s)\n",
-		   found->id, ncm->data[2] & 0x1 ? "up" : "down");
-
-out:
-	spin_lock_irqsave(&ndp->lock, flags);
-	list_add_tail_rcu(&found->link, &ndp->channel_queue);
-	spin_unlock_irqrestore(&ndp->lock, flags);
-
 	return ncsi_process_next_channel(ndp);
 }
 
@@ -1428,6 +1481,7 @@ struct ncsi_dev *ncsi_register_dev(struct net_device *dev,
 	INIT_LIST_HEAD(&ndp->channel_queue);
 	INIT_LIST_HEAD(&ndp->vlan_vids);
 	INIT_WORK(&ndp->work, ncsi_dev_work);
+	ndp->package_whitelist = UINT_MAX;
 
 	/* Initialize private NCSI device */
 	spin_lock_init(&ndp->lock);
diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c
index 32cb7751d216..33a091e6f466 100644
--- a/net/ncsi/ncsi-netlink.c
+++ b/net/ncsi/ncsi-netlink.c
@@ -67,7 +67,7 @@ static int ncsi_write_channel_info(struct sk_buff *skb,
 	nla_put_u32(skb, NCSI_CHANNEL_ATTR_LINK_STATE, m->data[2]);
 	if (nc->state == NCSI_CHANNEL_ACTIVE)
 		nla_put_flag(skb, NCSI_CHANNEL_ATTR_ACTIVE);
-	if (ndp->force_channel == nc)
+	if (nc == nc->package->preferred_channel)
 		nla_put_flag(skb, NCSI_CHANNEL_ATTR_FORCED);
 
 	nla_put_u32(skb, NCSI_CHANNEL_ATTR_VERSION_MAJOR, nc->version.version);
@@ -112,7 +112,7 @@ static int ncsi_write_package_info(struct sk_buff *skb,
 		if (!pnest)
 			return -ENOMEM;
 		nla_put_u32(skb, NCSI_PKG_ATTR_ID, np->id);
-		if (ndp->force_package == np)
+		if ((0x1 << np->id) == ndp->package_whitelist)
 			nla_put_flag(skb, NCSI_PKG_ATTR_FORCED);
 		cnest = nla_nest_start(skb, NCSI_PKG_ATTR_CHANNEL_LIST);
 		if (!cnest) {
@@ -288,45 +288,54 @@ static int ncsi_set_interface_nl(struct sk_buff *msg, struct genl_info *info)
 	package_id = nla_get_u32(info->attrs[NCSI_ATTR_PACKAGE_ID]);
 	package = NULL;
 
-	spin_lock_irqsave(&ndp->lock, flags);
-
 	NCSI_FOR_EACH_PACKAGE(ndp, np)
 		if (np->id == package_id)
 			package = np;
 	if (!package) {
 		/* The user has set a package that does not exist */
-		spin_unlock_irqrestore(&ndp->lock, flags);
 		return -ERANGE;
 	}
 
 	channel = NULL;
-	if (!info->attrs[NCSI_ATTR_CHANNEL_ID]) {
-		/* Allow any channel */
-		channel_id = NCSI_RESERVED_CHANNEL;
-	} else {
+	if (info->attrs[NCSI_ATTR_CHANNEL_ID]) {
 		channel_id = nla_get_u32(info->attrs[NCSI_ATTR_CHANNEL_ID]);
 		NCSI_FOR_EACH_CHANNEL(package, nc)
-			if (nc->id == channel_id)
+			if (nc->id == channel_id) {
 				channel = nc;
+				break;
+			}
+		if (!channel) {
+			netdev_info(ndp->ndev.dev,
+				    "NCSI: Channel %u does not exist!\n",
+				    channel_id);
+			return -ERANGE;
+		}
 	}
 
-	if (channel_id != NCSI_RESERVED_CHANNEL && !channel) {
-		/* The user has set a channel that does not exist on this
-		 * package
-		 */
-		spin_unlock_irqrestore(&ndp->lock, flags);
-		netdev_info(ndp->ndev.dev, "NCSI: Channel %u does not exist!\n",
-			    channel_id);
-		return -ERANGE;
-	}
-
-	ndp->force_package = package;
-	ndp->force_channel = channel;
+	spin_lock_irqsave(&ndp->lock, flags);
+	ndp->package_whitelist = 0x1 << package->id;
+	ndp->multi_package = false;
 	spin_unlock_irqrestore(&ndp->lock, flags);
 
-	netdev_info(ndp->ndev.dev, "Set package 0x%x, channel 0x%x%s as preferred\n",
-		    package_id, channel_id,
-		    channel_id == NCSI_RESERVED_CHANNEL ? " (any)" : "");
+	spin_lock_irqsave(&package->lock, flags);
+	package->multi_channel = false;
+	if (channel) {
+		package->channel_whitelist = 0x1 << channel->id;
+		package->preferred_channel = channel;
+	} else {
+		/* Allow any channel */
+		package->channel_whitelist = UINT_MAX;
+		package->preferred_channel = NULL;
+	}
+	spin_unlock_irqrestore(&package->lock, flags);
+
+	if (channel)
+		netdev_info(ndp->ndev.dev,
+			    "Set package 0x%x, channel 0x%x as preferred\n",
+			    package_id, channel_id);
+	else
+		netdev_info(ndp->ndev.dev, "Set package 0x%x as preferred\n",
+			    package_id);
 
 	/* Bounce the NCSI channel to set changes */
 	ncsi_stop_dev(&ndp->ndev);
@@ -338,6 +347,7 @@ static int ncsi_set_interface_nl(struct sk_buff *msg, struct genl_info *info)
 static int ncsi_clear_interface_nl(struct sk_buff *msg, struct genl_info *info)
 {
 	struct ncsi_dev_priv *ndp;
+	struct ncsi_package *np;
 	unsigned long flags;
 
 	if (!info || !info->attrs)
@@ -351,11 +361,19 @@ static int ncsi_clear_interface_nl(struct sk_buff *msg, struct genl_info *info)
 	if (!ndp)
 		return -ENODEV;
 
-	/* Clear any override */
+	/* Reset any whitelists and disable multi mode */
 	spin_lock_irqsave(&ndp->lock, flags);
-	ndp->force_package = NULL;
-	ndp->force_channel = NULL;
+	ndp->package_whitelist = UINT_MAX;
+	ndp->multi_package = false;
 	spin_unlock_irqrestore(&ndp->lock, flags);
+
+	NCSI_FOR_EACH_PACKAGE(ndp, np) {
+		spin_lock_irqsave(&np->lock, flags);
+		np->multi_channel = false;
+		np->channel_whitelist = UINT_MAX;
+		np->preferred_channel = NULL;
+		spin_unlock_irqrestore(&np->lock, flags);
+	}
 	netdev_info(ndp->ndev.dev, "NCSI: Cleared preferred package/channel\n");
 
 	/* Bounce the NCSI channel to set changes */
@@ -365,6 +383,137 @@ static int ncsi_clear_interface_nl(struct sk_buff *msg, struct genl_info *info)
 	return 0;
 }
 
+static int ncsi_set_package_mask_nl(struct sk_buff *msg,
+				    struct genl_info *info)
+{
+	struct ncsi_dev_priv *ndp;
+	unsigned long flags;
+	int rc;
+
+	if (!info || !info->attrs)
+		return -EINVAL;
+
+	if (!info->attrs[NCSI_ATTR_IFINDEX])
+		return -EINVAL;
+
+	if (!info->attrs[NCSI_ATTR_PACKAGE_MASK])
+		return -EINVAL;
+
+	ndp = ndp_from_ifindex(get_net(sock_net(msg->sk)),
+			       nla_get_u32(info->attrs[NCSI_ATTR_IFINDEX]));
+	if (!ndp)
+		return -ENODEV;
+
+	spin_lock_irqsave(&ndp->lock, flags);
+	ndp->package_whitelist =
+		nla_get_u32(info->attrs[NCSI_ATTR_PACKAGE_MASK]);
+
+	if (nla_get_flag(info->attrs[NCSI_ATTR_MULTI_FLAG])) {
+		if (ndp->flags & NCSI_DEV_HWA) {
+			ndp->multi_package = true;
+			rc = 0;
+		} else {
+			netdev_err(ndp->ndev.dev,
+				   "NCSI: Can't use multiple packages without HWA\n");
+			rc = -EPERM;
+		}
+	} else {
+		rc = 0;
+	}
+
+	spin_unlock_irqrestore(&ndp->lock, flags);
+
+	if (!rc) {
+		/* Bounce the NCSI channel to set changes */
+		ncsi_stop_dev(&ndp->ndev);
+		ncsi_start_dev(&ndp->ndev);
+	}
+
+	return rc;
+}
+
+static int ncsi_set_channel_mask_nl(struct sk_buff *msg,
+				    struct genl_info *info)
+{
+	struct ncsi_package *np, *package;
+	struct ncsi_channel *nc, *channel;
+	struct ncsi_dev_priv *ndp;
+	unsigned long flags;
+	u32 package_id, channel_id;
+
+	if (!info || !info->attrs)
+		return -EINVAL;
+
+	if (!info->attrs[NCSI_ATTR_IFINDEX])
+		return -EINVAL;
+
+	if (!info->attrs[NCSI_ATTR_PACKAGE_ID])
+		return -EINVAL;
+
+	if (!info->attrs[NCSI_ATTR_CHANNEL_MASK])
+		return -EINVAL;
+
+	ndp = ndp_from_ifindex(get_net(sock_net(msg->sk)),
+			       nla_get_u32(info->attrs[NCSI_ATTR_IFINDEX]));
+	if (!ndp)
+		return -ENODEV;
+
+	package_id = nla_get_u32(info->attrs[NCSI_ATTR_PACKAGE_ID]);
+	package = NULL;
+	NCSI_FOR_EACH_PACKAGE(ndp, np)
+		if (np->id == package_id) {
+			package = np;
+			break;
+		}
+	if (!package)
+		return -ERANGE;
+
+	spin_lock_irqsave(&package->lock, flags);
+
+	channel = NULL;
+	if (info->attrs[NCSI_ATTR_CHANNEL_ID]) {
+		channel_id = nla_get_u32(info->attrs[NCSI_ATTR_CHANNEL_ID]);
+		NCSI_FOR_EACH_CHANNEL(np, nc)
+			if (nc->id == channel_id) {
+				channel = nc;
+				break;
+			}
+		if (!channel) {
+			spin_unlock_irqrestore(&package->lock, flags);
+			return -ERANGE;
+		}
+		netdev_dbg(ndp->ndev.dev,
+			   "NCSI: Channel %u set as preferred channel\n",
+			   channel->id);
+	}
+
+	package->channel_whitelist =
+		nla_get_u32(info->attrs[NCSI_ATTR_CHANNEL_MASK]);
+	if (package->channel_whitelist == 0)
+		netdev_dbg(ndp->ndev.dev,
+			   "NCSI: Package %u set to all channels disabled\n",
+			   package->id);
+
+	package->preferred_channel = channel;
+
+	if (nla_get_flag(info->attrs[NCSI_ATTR_MULTI_FLAG])) {
+		package->multi_channel = true;
+		netdev_info(ndp->ndev.dev,
+			    "NCSI: Multi-channel enabled on package %u\n",
+			    package_id);
+	} else {
+		package->multi_channel = false;
+	}
+
+	spin_unlock_irqrestore(&package->lock, flags);
+
+	/* Bounce the NCSI channel to set changes */
+	ncsi_stop_dev(&ndp->ndev);
+	ncsi_start_dev(&ndp->ndev);
+
+	return 0;
+}
+
 static const struct genl_ops ncsi_ops[] = {
 	{
 		.cmd = NCSI_CMD_PKG_INFO,
@@ -385,6 +534,18 @@ static const struct genl_ops ncsi_ops[] = {
 		.doit = ncsi_clear_interface_nl,
 		.flags = GENL_ADMIN_PERM,
 	},
+	{
+		.cmd = NCSI_CMD_SET_PACKAGE_MASK,
+		.policy = ncsi_genl_policy,
+		.doit = ncsi_set_package_mask_nl,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = NCSI_CMD_SET_CHANNEL_MASK,
+		.policy = ncsi_genl_policy,
+		.doit = ncsi_set_channel_mask_nl,
+		.flags = GENL_ADMIN_PERM,
+	},
 };
 
 static struct genl_family ncsi_genl_family __ro_after_init = {
diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c
index d66b34749027..02ce7626b579 100644
--- a/net/ncsi/ncsi-rsp.c
+++ b/net/ncsi/ncsi-rsp.c
@@ -241,7 +241,7 @@ static int ncsi_rsp_handler_dcnt(struct ncsi_request *nr)
 	if (!ncm->enable)
 		return 0;
 
-	ncm->enable = 1;
+	ncm->enable = 0;
 	return 0;
 }
 
-- 
2.19.0

^ permalink raw reply related

* [RFC PATCH 1/2] net/ncsi: Don't enable all channels when HWA available
From: Samuel Mendoza-Jonas @ 2018-10-09  3:58 UTC (permalink / raw)
  To: netdev
  Cc: Samuel Mendoza-Jonas, David S . Miller, linux-kernel, openbmc,
	Justin.Lee1

NCSI hardware arbitration allows multiple packages to be enabled at once
and share the same wiring. If the NCSI driver recognises that HWA is
available it unconditionally enables all packages and channels; but that
is a configuration decision rather than something required by HWA.
Additionally the current implementation will not failover on link events
which can cause connectivity to be lost unless the interface is manually
bounced.

Retain basic HWA support but remove the separate configuration path to
enable all channels, leaving this to be handled by a later
implementation.

Signed-off-by: Samuel Mendoza-Jonas <sam@mendozajonas.com>
---
 net/ncsi/ncsi-aen.c    |  3 +--
 net/ncsi/ncsi-manage.c | 51 +++++++-----------------------------------
 2 files changed, 9 insertions(+), 45 deletions(-)

diff --git a/net/ncsi/ncsi-aen.c b/net/ncsi/ncsi-aen.c
index 25e483e8278b..65f47a648be3 100644
--- a/net/ncsi/ncsi-aen.c
+++ b/net/ncsi/ncsi-aen.c
@@ -86,8 +86,7 @@ static int ncsi_aen_handler_lsc(struct ncsi_dev_priv *ndp,
 	    !(state == NCSI_CHANNEL_ACTIVE && !(data & 0x1)))
 		return 0;
 
-	if (!(ndp->flags & NCSI_DEV_HWA) &&
-	    state == NCSI_CHANNEL_ACTIVE)
+	if (state == NCSI_CHANNEL_ACTIVE)
 		ndp->flags |= NCSI_DEV_RESHUFFLE;
 
 	ncsi_stop_channel_monitor(nc);
diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
index 091284760d21..665bee25ec44 100644
--- a/net/ncsi/ncsi-manage.c
+++ b/net/ncsi/ncsi-manage.c
@@ -112,10 +112,8 @@ static void ncsi_channel_monitor(struct timer_list *t)
 	default:
 		netdev_err(ndp->ndev.dev, "NCSI Channel %d timed out!\n",
 			   nc->id);
-		if (!(ndp->flags & NCSI_DEV_HWA)) {
-			ncsi_report_link(ndp, true);
-			ndp->flags |= NCSI_DEV_RESHUFFLE;
-		}
+		ncsi_report_link(ndp, true);
+		ndp->flags |= NCSI_DEV_RESHUFFLE;
 
 		ncsi_stop_channel_monitor(nc);
 
@@ -952,35 +950,6 @@ static bool ncsi_check_hwa(struct ncsi_dev_priv *ndp)
 	return false;
 }
 
-static int ncsi_enable_hwa(struct ncsi_dev_priv *ndp)
-{
-	struct ncsi_package *np;
-	struct ncsi_channel *nc;
-	unsigned long flags;
-
-	/* Move all available channels to processing queue */
-	spin_lock_irqsave(&ndp->lock, flags);
-	NCSI_FOR_EACH_PACKAGE(ndp, np) {
-		NCSI_FOR_EACH_CHANNEL(np, nc) {
-			WARN_ON_ONCE(nc->state != NCSI_CHANNEL_INACTIVE ||
-				     !list_empty(&nc->link));
-			ncsi_stop_channel_monitor(nc);
-			list_add_tail_rcu(&nc->link, &ndp->channel_queue);
-		}
-	}
-	spin_unlock_irqrestore(&ndp->lock, flags);
-
-	/* We can have no channels in extremely case */
-	if (list_empty(&ndp->channel_queue)) {
-		netdev_err(ndp->ndev.dev,
-			   "NCSI: No available channels for HWA\n");
-		ncsi_report_link(ndp, false);
-		return -ENOENT;
-	}
-
-	return ncsi_process_next_channel(ndp);
-}
-
 static void ncsi_probe_channel(struct ncsi_dev_priv *ndp)
 {
 	struct ncsi_dev *nd = &ndp->ndev;
@@ -1047,6 +1016,10 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp)
 			ndp->active_package = list_next_entry(
 				ndp->active_package, node);
 
+		/* Check for HWA support */
+		if (ncsi_check_hwa(ndp))
+			netdev_info(ndp->ndev.dev, "NCSI: HWA available\n");
+
 		/* All available packages and channels are enumerated. The
 		 * enumeration happens for once when the NCSI interface is
 		 * started. So we need continue to start the interface after
@@ -1058,10 +1031,7 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp)
 		 */
 		if (!ndp->active_package) {
 			ndp->flags |= NCSI_DEV_PROBED;
-			if (ncsi_check_hwa(ndp))
-				ncsi_enable_hwa(ndp);
-			else
-				ncsi_choose_active_channel(ndp);
+			ncsi_choose_active_channel(ndp);
 			return;
 		}
 
@@ -1506,12 +1476,7 @@ int ncsi_start_dev(struct ncsi_dev *nd)
 		return 0;
 	}
 
-	if (ndp->flags & NCSI_DEV_HWA) {
-		netdev_info(ndp->ndev.dev, "NCSI: Enabling HWA mode\n");
-		ret = ncsi_enable_hwa(ndp);
-	} else {
-		ret = ncsi_choose_active_channel(ndp);
-	}
+	ret = ncsi_choose_active_channel(ndp);
 
 	return ret;
 }
-- 
2.19.0

^ permalink raw reply related

* Re: [RFC PATCH v2 bpf-next 0/2] verifier liveness simplification
From: Jiong Wang @ 2018-10-08 20:18 UTC (permalink / raw)
  To: Alexei Starovoitov; +Cc: Edward Cree, ast, daniel, netdev
In-Reply-To: <cf69190e-93c4-2935-1a8f-23e731003073@netronome.com>

On 03/10/2018 17:53, Jiong Wang wrote:
> On 03/10/2018 16:59, Alexei Starovoitov wrote:
>> On Wed, Oct 03, 2018 at 04:36:31PM +0100, Jiong Wang wrote:
> <snip...>
>>> Now this hasn't happened. I am still debugging the root cause, but kind of
>>> feel
>>> "64-bit" attribute propagation is the issue, it seems to me it can't be
>>> nicely
>>> integrated into the existing register read/write propagation infrastructure.
>>
>> may be share your patches that modify the liveness propagation?
>
> OK, I will share it after some clean up.

Have done more experiments on the algo, and finally got something passed my
local tests. bpf selftest passed as well except several test_verifier failures
due to some corner cases I guess, will have a look later.

In these test, x86-64 is using the 32-bit information. In the insn loop inside
sanitize_dead_code, once an insn is not marked as 64-bit and if it is ALU64, it
will then be changed to ALU32. When enable tracking using printk, I could see
quite a few ALU64 instructions really are rewritten into ALU32, so tests like
test_l4lb runs OK looks like a positive signal of the correctness.

The algo separates the problem into two steps.

   - First, assume there is no code path prune and all code paths will be walked
     through. In this case, if we could propagate 64-bit info backward along the
     use-def chains for all paths walked, one insn must will be marked as 64-bit
     correctly. Finish this step requires building use-def chain, and it is done
     in the following way:

      1. each insn could have two explicit uses, so add two chain fields in
         bpf_insn_aux_data.
      2. we need finer enum to describe register use, so split SRC_OP into
         SRC_OP_0, SRC_OP64_0, SRC_OP_1, SRC_OP64_1 to indicate the source
         is the first/second source and whether it is a 64-bit source.
      3. create the chain at check_reg_arg which is exactly covering all
         register use sites. The function to create the chain is link_reg_to_def.
      4. when creating the chain, if a source is a 64-bit source, also
         propagating the information backward along use-def chain straight away.
         This is done in mark_reg_read which further calls the new function
         "mark_insn_64bit" which is doing the real job. "mark_insn_64bit" fetches
         the def insn for the 64-bit source, and further marks the def insns of
         its sources as 64-bit. This will be repeated until the whole use-def
         chain consumed.
      5. by use-def chain described above, if there is no code path prune, one
         insn must have been marked as 64-bit when it's result has 64-bit use.
      6. helper call causing implicit reg use and must be conservative treated
         as 64-bit use, bpf-to-bpf function call has insn connected by use-def
         so doesn't need to make that conservative assumption.

   - Second, to handle code path prune, define new liveness enum
     REG_LIVE_READ64 and REG_LIVE_UNIQUE_WRITTEN. The latter will only be
     set if reg_arg_type is the new U_DST_OP or U_DST_OP_NO_MARK, and
     REG_LIVE_READ64 will be set if one 64-bit read is not screened off by
     REG_LIVE_UNIQUE_WRITTEN.

     The thought is 64-bit use info will only be screened off if the dst register
     is unique in all register operands, meaning not the same as any source. For
     example, insn 18 below will screen off r4 64-bit propagation.

       17: r3 += r7
       18: r4 = 1
       19: *(u64 *)(r10 - 32) = r3
       20: *(u64 *)(r10 - 40) = r4

     So, U_DST_OP/U_DST_OP_NO_MARK have been introduced to differentiate with
     DST_OP/DST_OP_NO_MARK. Inside check_reg_arg, checks are done on dst_reg,
     and would pass U_DST_* as reg_arg_type when it is unique. U_DST_* then
     will set liveness to REG_LIVE_UNIQUE_WRITTEN. In side mark_reg_read, if
     one 64-bit read is not screened off by REG_LIVE_UNIQUE_WRITTEN, then
     REG_LIVE_READ64 will be set in the reg_state. REG_LIVE_READ64 further
     triggers propagating downstream 64-bit uses from the pruned paths into the
     current path inside propagate_liveness when path prune happened.

     Compared with propagating REG_LIVE_READ, propagating REG_LIVE_READ64 needs
     more work. Because one 64-bit read could indicating more than one registers
     are 64-bit. For example, insn 19 above shows r3 is 64-bit source, so its
     define at insn 17 are 64-bit, then all sources of insn 17 must be 64-bit,
     meaning both r3 and r7 are 64-bit. Therefore, REG_LIVE_READ64 needs to be
     propagated on both r3 and r7 upward along the register parentage chain.
     During walking def-use chain, we record all such affected reg_state, and
     propagate REG_LIVE_READ64 for all of them. This logic is done inside
     mark_insn_64bit as well.

   - For short, the implementation treating the new 64-bit info (REG_LIVE_READ64)
     propagation the same as REG_LIVE_READ propagation.

     REG_LIVE_READ triggers more path prune during state equal comparison, while
     REG_LIVE_READ64 triggers 64-bit insn marking during def-use chain walking.

     Use-def chain is separate from reg state parentage chain chain, the prior is
     helping the later, reg states that needs REG_LIVE_READ64 propagation are
     collected during use-def chain walking.

Please review the following implementation.

Thanks.

Regards,
Jiong

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index b42b60a..ea22f43 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -34,11 +34,15 @@
   * but of the link between it and its parent.  See mark_reg_read() and
   * mark_stack_slot_read() in kernel/bpf/verifier.c.
   */
-enum bpf_reg_liveness {
-	REG_LIVE_NONE = 0, /* reg hasn't been read or written this branch */
-	REG_LIVE_READ, /* reg was read, so we're sensitive to initial value */
-	REG_LIVE_WRITTEN, /* reg was written first, screening off later reads */
-};
+/* Reg hasn't been read or written this branch. */
+#define REG_LIVE_NONE		0
+/* Reg was read, so we're sensitive to initial value. */
+#define REG_LIVE_READ		0x1
+/* The read is also 64-bit. */
+#define REG_LIVE_READ64		0x2
+#define REG_LIVE_WRITTEN	0x4
+/* The write also should screen off 64-bit backward propagation.  */
+#define REG_LIVE_UNIQUE_WRITTEN	0x8
  
  struct bpf_reg_state {
  	/* Ordering of fields matters.  See states_equal() */
@@ -85,7 +89,11 @@ struct bpf_reg_state {
  	 * pointing to bpf_func_state.
  	 */
  	u32 frameno;
-	enum bpf_reg_liveness live;
+	u32 live;
+	struct {
+		s16 active_def;
+		bool full_ref;
+	};
  };
  
  enum bpf_stack_slot_type {
@@ -145,6 +153,11 @@ struct bpf_insn_aux_data {
  	};
  	int ctx_field_size; /* the ctx field size for load insn, maybe 0 */
  	int sanitize_stack_off; /* stack slot to be cleared */
+	struct {
+		s16 def[2]; /* The insns defining the uses of this insn. */
+		bool full_ref; /* This insn should operate on full 64-bit. */
+	};
+	struct bpf_reg_state *reg_state;
  	bool seen; /* this insn was processed by the verifier */
  };
  
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 8ccbff4..061345d 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -271,8 +271,7 @@ static char slot_type_char[] = {
  	[STACK_ZERO]	= '0',
  };
  
-static void print_liveness(struct bpf_verifier_env *env,
-			   enum bpf_reg_liveness live)
+static void print_liveness(struct bpf_verifier_env *env, u32 live)
  {
  	if (live & (REG_LIVE_READ | REG_LIVE_WRITTEN))
  	    verbose(env, "_");
@@ -755,6 +754,7 @@ static void init_reg_state(struct bpf_verifier_env *env,
  		mark_reg_not_init(env, regs, i);
  		regs[i].live = REG_LIVE_NONE;
  		regs[i].parent = NULL;
+		regs[i].active_def = -1;
  	}
  
  	/* frame pointer */
@@ -779,9 +779,16 @@ static void init_func_state(struct bpf_verifier_env *env,
  }
  
  enum reg_arg_type {
-	SRC_OP,		/* register is used as source operand */
+	SRC_OP_0,	/* register is used as source operand */
+	SRC_OP64_0,	/* likewise, and all 64 bits of the source matter */
+	SRC_OP_1,	/* the second source */
+	SRC_OP64_1,	/* likewise */
+	SRC_OP64_IMP,	/* implicit source, always 64-bit */
+	SRC_OP_SPILL,	/* stack spill */
  	DST_OP,		/* register is used as destination operand */
-	DST_OP_NO_MARK	/* same as above, check only, don't mark */
+	U_DST_OP,	/* likewise, and no overlap with any source */
+	DST_OP_NO_MARK,	/* same as above, check only, don't mark */
+	U_DST_OP_NO_MARK/* likewise */
  };
  
  static int cmp_subprogs(const void *a, const void *b)
@@ -899,14 +906,101 @@ static int check_subprogs(struct bpf_verifier_env *env)
  	return 0;
  }
  
+static void
+link_reg_to_def(struct bpf_verifier_env *env, const struct bpf_reg_state *rstate,
+		enum reg_arg_type read_t, s16 insn_idx)
+{
+	s16 slot_idx, def_idx;
+
+	if (insn_idx == -1)
+		return;
+
+	slot_idx = (read_t - SRC_OP_0) % 2;
+	def_idx = rstate->active_def;
+	env->insn_aux_data[insn_idx].def[slot_idx] = def_idx + 1;
+}
+
+/* Stack of insns to process, this is also used by check_cfg. */
+static int *insn_stack;
+
+struct prop_pair {
+  struct bpf_reg_state *reg_state;
+  u8 regno;
+};
+
+/* Mark insn upward along the chain, also propagate READ64 liveness. */
+static int mark_insn_64bit(struct bpf_verifier_env *env, int insn_idx)
+{
+	struct bpf_insn_aux_data *aux = env->insn_aux_data;
+	u16 u2d0, u2d1, stack_idx = 0, prop_idx = 0;
+	struct prop_pair *prop_stack;
+
+	prop_stack = kcalloc(1024, sizeof(struct prop_pair), GFP_KERNEL);
+	if (!prop_stack)
+		return -ENOMEM;
+
+	insn_stack[stack_idx++] = insn_idx;
+
+	while (stack_idx) {
+		u16 def_idx = insn_stack[--stack_idx];
+
+		if (!def_idx)
+			continue;
+
+		def_idx--;
+
+		aux[def_idx].full_ref = true;
+		u2d0 = aux[def_idx].def[0];
+		if (u2d0) {
+			insn_stack[stack_idx++] = u2d0;
+			prop_stack[prop_idx].reg_state = aux[def_idx].reg_state;
+			prop_stack[prop_idx++].regno =
+				env->prog->insnsi[u2d0 - 1].dst_reg;
+		}
+		u2d1 = aux[def_idx].def[1];
+		if (u2d1) {
+			insn_stack[stack_idx++] = u2d1;
+			prop_stack[prop_idx].reg_state = aux[def_idx].reg_state;
+			prop_stack[prop_idx++].regno =
+				env->prog->insnsi[u2d1 - 1].dst_reg;
+		}
+	}
+
+	while (prop_idx) {
+		struct prop_pair pair = prop_stack[--prop_idx];
+		struct bpf_reg_state *state, *parent;
+		u8 regno = pair.regno;
+
+		state = pair.reg_state + regno;
+		parent = state->parent;
+
+		while (parent) {
+			if (state->live & REG_LIVE_UNIQUE_WRITTEN)
+				break;
+
+			parent->live |= REG_LIVE_READ64;
+			state = parent;
+			parent = state->parent;
+		}
+	}
+
+	kfree(prop_stack);
+
+	return 0;
+}
+
  /* Parentage chain of this register (or stack slot) should take care of all
   * issues like callee-saved registers, stack slot allocation time, etc.
   */
  static int mark_reg_read(struct bpf_verifier_env *env,
  			 const struct bpf_reg_state *state,
-			 struct bpf_reg_state *parent)
+			 struct bpf_reg_state *parent, bool full_reg_read)
  {
  	bool writes = parent == state->parent; /* Observe write marks */
+	const struct bpf_reg_state *orig_state = state;
+	struct bpf_reg_state *orig_parent = parent;
+	bool orig_writes = writes;
+	int def_idx;
  
  	while (parent) {
  		/* if read wasn't screened by an earlier write ... */
@@ -918,11 +1012,20 @@ static int mark_reg_read(struct bpf_verifier_env *env,
  		parent = state->parent;
  		writes = true;
  	}
-	return 0;
+
+	writes = orig_writes;
+	parent = orig_parent;
+	state = orig_state;
+
+	def_idx = writes ? state->active_def : parent->active_def;
+	if (!full_reg_read || def_idx == -1)
+		return 0;
+
+	return mark_insn_64bit(env, def_idx + 1);
  }
  
  static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
-			 enum reg_arg_type t)
+			 enum reg_arg_type t, s16 insn_idx)
  {
  	struct bpf_verifier_state *vstate = env->cur_state;
  	struct bpf_func_state *state = vstate->frame[vstate->curframe];
@@ -933,16 +1036,24 @@ static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
  		return -EINVAL;
  	}
  
-	if (t == SRC_OP) {
+	if (t == SRC_OP_0 || t == SRC_OP_1 ||
+	    t == SRC_OP64_0 || t == SRC_OP64_1 || t == SRC_OP64_IMP) {
+		struct bpf_reg_state *rstate = regs + regno;
+
  		/* check whether register used as source operand can be read */
-		if (regs[regno].type == NOT_INIT) {
+		if (rstate->type == NOT_INIT) {
  			verbose(env, "R%d !read_ok\n", regno);
  			return -EACCES;
  		}
+
+		link_reg_to_def(env, rstate, t, insn_idx);
+
  		/* We don't need to worry about FP liveness because it's read-only */
  		if (regno != BPF_REG_FP)
-			return mark_reg_read(env, &regs[regno],
-					     regs[regno].parent);
+			return mark_reg_read(env, rstate, rstate->parent,
+					     t == SRC_OP64_0 ||
+					     t == SRC_OP64_1 ||
+					     t == SRC_OP64_IMP);
  	} else {
  		/* check whether register used as dest operand can be written to */
  		if (regno == BPF_REG_FP) {
@@ -950,8 +1061,14 @@ static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
  			return -EACCES;
  		}
  		regs[regno].live |= REG_LIVE_WRITTEN;
-		if (t == DST_OP)
+		if (t == U_DST_OP || t == U_DST_OP_NO_MARK)
+			regs[regno].live |= REG_LIVE_UNIQUE_WRITTEN;
+		if (t == DST_OP || t == U_DST_OP)
  			mark_reg_unknown(env, regs, regno);
+		regs[regno].active_def = insn_idx;
+		if (insn_idx != -1) {
+			env->insn_aux_data[insn_idx].reg_state = regs;
+		}
  	}
  	return 0;
  }
@@ -1118,7 +1235,8 @@ static int check_stack_read(struct bpf_verifier_env *env,
  			state->regs[value_regno].live |= REG_LIVE_WRITTEN;
  		}
  		mark_reg_read(env, &reg_state->stack[spi].spilled_ptr,
-			      reg_state->stack[spi].spilled_ptr.parent);
+			      reg_state->stack[spi].spilled_ptr.parent,
+			      SRC_OP_SPILL);
  		return 0;
  	} else {
  		int zeros = 0;
@@ -1135,7 +1253,8 @@ static int check_stack_read(struct bpf_verifier_env *env,
  			return -EACCES;
  		}
  		mark_reg_read(env, &reg_state->stack[spi].spilled_ptr,
-			      reg_state->stack[spi].spilled_ptr.parent);
+			      reg_state->stack[spi].spilled_ptr.parent,
+			      SRC_OP_SPILL);
  		if (value_regno >= 0) {
  			if (zeros == size) {
  				/* any size read into register is zero extended,
@@ -1735,23 +1854,26 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
  	return err;
  }
  
-static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
+static int
+check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
  {
+	bool is_xadd_64 = BPF_SIZE(insn->code) == BPF_DW;
+	bool is_xadd_32 = BPF_SIZE(insn->code) == BPF_W;
  	int err;
  
-	if ((BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) ||
-	    insn->imm != 0) {
+	if (!(is_xadd_32 || is_xadd_64) || insn->imm != 0) {
  		verbose(env, "BPF_XADD uses reserved fields\n");
  		return -EINVAL;
  	}
  
  	/* check src1 operand */
-	err = check_reg_arg(env, insn->src_reg, SRC_OP);
+	err = check_reg_arg(env, insn->src_reg,
+			    is_xadd_32 ? SRC_OP_0 : SRC_OP64_0, insn_idx);
  	if (err)
  		return err;
  
  	/* check src2 operand */
-	err = check_reg_arg(env, insn->dst_reg, SRC_OP);
+	err = check_reg_arg(env, insn->dst_reg, SRC_OP64_1, insn_idx);
  	if (err)
  		return err;
  
@@ -1852,7 +1974,8 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
  		 * the whole slot to be marked as 'read'
  		 */
  		mark_reg_read(env, &state->stack[spi].spilled_ptr,
-			      state->stack[spi].spilled_ptr.parent);
+			      state->stack[spi].spilled_ptr.parent,
+			      SRC_OP_SPILL);
  	}
  	return update_stack_depth(env, state, off);
  }
@@ -1903,7 +2026,7 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
  	if (arg_type == ARG_DONTCARE)
  		return 0;
  
-	err = check_reg_arg(env, regno, SRC_OP);
+	err = check_reg_arg(env, regno, SRC_OP64_IMP, -1);
  	if (err)
  		return err;
  
@@ -2320,7 +2443,7 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
  	/* after the call registers r0 - r5 were scratched */
  	for (i = 0; i < CALLER_SAVED_REGS; i++) {
  		mark_reg_not_init(env, caller->regs, caller_saved[i]);
-		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
+		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK, -1);
  	}
  
  	/* only increment it after check_reg_arg() finished */
@@ -2510,7 +2633,7 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
  	/* reset caller saved regs */
  	for (i = 0; i < CALLER_SAVED_REGS; i++) {
  		mark_reg_not_init(env, regs, caller_saved[i]);
-		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
+		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK, -1);
  	}
  
  	/* update return register (already marked as written above) */
@@ -3161,7 +3284,10 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
  {
  	struct bpf_reg_state *regs = cur_regs(env);
  	u8 opcode = BPF_OP(insn->code);
-	int err;
+	enum reg_arg_type dst_type;
+	int err, insn_idx;
+
+	insn_idx = insn - env->prog->insnsi;
  
  	if (opcode == BPF_END || opcode == BPF_NEG) {
  		if (opcode == BPF_NEG) {
@@ -3181,7 +3307,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
  		}
  
  		/* check src operand */
-		err = check_reg_arg(env, insn->dst_reg, SRC_OP);
+		err = check_reg_arg(env, insn->dst_reg, SRC_OP_0, insn_idx);
  		if (err)
  			return err;
  
@@ -3191,8 +3317,9 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
  			return -EACCES;
  		}
  
+		dst_type = insn->dst_reg != insn->src_reg ? U_DST_OP : DST_OP;
  		/* check dest operand */
-		err = check_reg_arg(env, insn->dst_reg, DST_OP);
+		err = check_reg_arg(env, insn->dst_reg, dst_type, insn_idx);
  		if (err)
  			return err;
  
@@ -3205,18 +3332,24 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
  			}
  
  			/* check src operand */
-			err = check_reg_arg(env, insn->src_reg, SRC_OP);
+			err = check_reg_arg(env, insn->src_reg, SRC_OP_0,
+					    insn_idx);
  			if (err)
  				return err;
+
+			dst_type = insn->dst_reg != insn->src_reg ?
+					U_DST_OP_NO_MARK : DST_OP_NO_MARK;
  		} else {
  			if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
  				verbose(env, "BPF_MOV uses reserved fields\n");
  				return -EINVAL;
  			}
+
+			dst_type = U_DST_OP_NO_MARK;
  		}
  
  		/* check dest operand, mark as required later */
-		err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
+		err = check_reg_arg(env, insn->dst_reg, dst_type, insn_idx);
  		if (err)
  			return err;
  
@@ -3225,8 +3358,12 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
  				/* case: R1 = R2
  				 * copy register state to dest reg
  				 */
+				u32 live_old = regs[insn->dst_reg].live;
+				s16 def_old = regs[insn->dst_reg].active_def;
+
  				regs[insn->dst_reg] = regs[insn->src_reg];
-				regs[insn->dst_reg].live |= REG_LIVE_WRITTEN;
+				regs[insn->dst_reg].live |= live_old;
+				regs[insn->dst_reg].active_def = def_old;
  			} else {
  				/* R1 = (u32) R2 */
  				if (is_pointer_value(env, insn->src_reg)) {
@@ -3266,18 +3403,23 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
  				return -EINVAL;
  			}
  			/* check src1 operand */
-			err = check_reg_arg(env, insn->src_reg, SRC_OP);
+			err = check_reg_arg(env, insn->src_reg, SRC_OP_0,
+					    insn_idx);
  			if (err)
  				return err;
+			dst_type = insn->dst_reg != insn->src_reg ?
+					U_DST_OP_NO_MARK : DST_OP_NO_MARK;
  		} else {
  			if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
  				verbose(env, "BPF_ALU uses reserved fields\n");
  				return -EINVAL;
  			}
+
+			dst_type = DST_OP_NO_MARK;
  		}
  
  		/* check src2 operand */
-		err = check_reg_arg(env, insn->dst_reg, SRC_OP);
+		err = check_reg_arg(env, insn->dst_reg, SRC_OP_1, insn_idx);
  		if (err)
  			return err;
  
@@ -3303,7 +3445,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
  		}
  
  		/* check dest operand */
-		err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
+		err = check_reg_arg(env, insn->dst_reg, dst_type, insn_idx);
  		if (err)
  			return err;
  
@@ -3759,6 +3901,7 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
  	struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
  	struct bpf_reg_state *dst_reg, *other_branch_regs;
  	u8 opcode = BPF_OP(insn->code);
+	int old_insn_idx = *insn_idx;
  	int err;
  
  	if (opcode > BPF_JSLE) {
@@ -3773,7 +3916,8 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
  		}
  
  		/* check src1 operand */
-		err = check_reg_arg(env, insn->src_reg, SRC_OP);
+		err = check_reg_arg(env, insn->src_reg, SRC_OP64_0,
+				    old_insn_idx);
  		if (err)
  			return err;
  
@@ -3790,7 +3934,7 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
  	}
  
  	/* check src2 operand */
-	err = check_reg_arg(env, insn->dst_reg, SRC_OP);
+	err = check_reg_arg(env, insn->dst_reg, SRC_OP64_1, old_insn_idx);
  	if (err)
  		return err;
  
@@ -3896,7 +4040,8 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
  		return -EINVAL;
  	}
  
-	err = check_reg_arg(env, insn->dst_reg, DST_OP);
+	err = check_reg_arg(env, insn->dst_reg, U_DST_OP,
+			    insn - env->prog->insnsi);
  	if (err)
  		return err;
  
@@ -3945,9 +4090,9 @@ static bool may_access_skb(enum bpf_prog_type type)
   */
  static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
  {
+	int i, err, insn_idx = insn - env->prog->insnsi;
  	struct bpf_reg_state *regs = cur_regs(env);
  	u8 mode = BPF_MODE(insn->code);
-	int i, err;
  
  	if (!may_access_skb(env->prog->type)) {
  		verbose(env, "BPF_LD_[ABS|IND] instructions not allowed for this program type\n");
@@ -3979,7 +4124,7 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
  	}
  
  	/* check whether implicit source operand (register R6) is readable */
-	err = check_reg_arg(env, BPF_REG_6, SRC_OP);
+	err = check_reg_arg(env, BPF_REG_6, SRC_OP64_1, insn_idx);
  	if (err)
  		return err;
  
@@ -3991,7 +4136,7 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
  
  	if (mode == BPF_IND) {
  		/* check explicit source operand */
-		err = check_reg_arg(env, insn->src_reg, SRC_OP);
+		err = check_reg_arg(env, insn->src_reg, SRC_OP_0, insn_idx);
  		if (err)
  			return err;
  	}
@@ -3999,7 +4144,7 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
  	/* reset caller saved regs to unreadable */
  	for (i = 0; i < CALLER_SAVED_REGS; i++) {
  		mark_reg_not_init(env, regs, caller_saved[i]);
-		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
+		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK, -1);
  	}
  
  	/* mark destination R0 register as readable, since it contains
@@ -4091,7 +4236,6 @@ enum {
  
  #define STATE_LIST_MARK ((struct bpf_verifier_state_list *) -1L)
  
-static int *insn_stack;	/* stack of insns to process */
  static int cur_stack;	/* current stack index */
  static int *insn_state;
  
@@ -4554,10 +4698,11 @@ static bool states_equal(struct bpf_verifier_env *env,
   */
  static int propagate_liveness(struct bpf_verifier_env *env,
  			      const struct bpf_verifier_state *vstate,
-			      struct bpf_verifier_state *vparent)
+			      struct bpf_verifier_state *vparent, int insn_idx)
  {
-	int i, frame, err = 0;
+	struct bpf_reg_state *regs, *parent_regs;
  	struct bpf_func_state *state, *parent;
+	int i, frame, err = 0;
  
  	if (vparent->curframe != vstate->curframe) {
  		WARN(1, "propagate_live: parent frame %d current frame %d\n",
@@ -4566,13 +4711,24 @@ static int propagate_liveness(struct bpf_verifier_env *env,
  	}
  	/* Propagate read liveness of registers... */
  	BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
+	parent_regs = vparent->frame[vparent->curframe]->regs;
+	regs = vstate->frame[vparent->curframe]->regs;
  	/* We don't need to worry about FP liveness because it's read-only */
  	for (i = 0; i < BPF_REG_FP; i++) {
-		if (vparent->frame[vparent->curframe]->regs[i].live & REG_LIVE_READ)
+		if (!(parent_regs[i].live & REG_LIVE_READ64) &&
+		    regs[i].live & REG_LIVE_READ64) {
+			err = mark_reg_read(env, &regs[i], &parent_regs[i],
+					    true);
+			if (err)
+				return err;
  			continue;
-		if (vstate->frame[vstate->curframe]->regs[i].live & REG_LIVE_READ) {
-			err = mark_reg_read(env, &vstate->frame[vstate->curframe]->regs[i],
-					    &vparent->frame[vstate->curframe]->regs[i]);
+		}
+
+		if (parent_regs[i].live & REG_LIVE_READ)
+			continue;
+		if (regs[i].live & REG_LIVE_READ) {
+			err = mark_reg_read(env, &regs[i], &parent_regs[i],
+					    false);
  			if (err)
  				return err;
  		}
@@ -4588,7 +4744,8 @@ static int propagate_liveness(struct bpf_verifier_env *env,
  				continue;
  			if (state->stack[i].spilled_ptr.live & REG_LIVE_READ)
  				mark_reg_read(env, &state->stack[i].spilled_ptr,
-					      &parent->stack[i].spilled_ptr);
+					      &parent->stack[i].spilled_ptr,
+					      false);
  		}
  	}
  	return err;
@@ -4620,7 +4777,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
  			 * they'll be immediately forgotten as we're pruning
  			 * this state and will pop a new one.
  			 */
-			err = propagate_liveness(env, &sl->state, cur);
+			err = propagate_liveness(env, &sl->state, cur, insn_idx);
  			if (err)
  				return err;
  			return 1;
@@ -4699,6 +4856,12 @@ static int do_check(struct bpf_verifier_env *env)
  			BPF_MAIN_FUNC /* callsite */,
  			0 /* frameno */,
  			0 /* subprogno, zero == main subprog */);
+
+	/* insn_stack will be used by propagate_64bit_usage. */
+	insn_stack = kcalloc(insn_cnt * 2, sizeof(int), GFP_KERNEL);
+	if (!insn_stack)
+		return -ENOMEM;
+
  	insn_idx = 0;
  	for (;;) {
  		struct bpf_insn *insn;
@@ -4779,11 +4942,13 @@ static int do_check(struct bpf_verifier_env *env)
  			/* check for reserved fields is already done */
  
  			/* check src operand */
-			err = check_reg_arg(env, insn->src_reg, SRC_OP);
+			err = check_reg_arg(env, insn->src_reg, SRC_OP64_0,
+					    insn_idx);
  			if (err)
  				return err;
  
-			err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
+			err = check_reg_arg(env, insn->dst_reg,
+					    U_DST_OP_NO_MARK, insn_idx);
  			if (err)
  				return err;
  
@@ -4823,6 +4988,12 @@ static int do_check(struct bpf_verifier_env *env)
  
  		} else if (class == BPF_STX) {
  			enum bpf_reg_type *prev_dst_type, dst_reg_type;
+			enum reg_arg_type source_type;
+
+			/* Let the back-end decide which insn to use according
+			 * to store width.
+			 */
+			env->insn_aux_data[insn_idx].full_ref = true;
  
  			if (BPF_MODE(insn->code) == BPF_XADD) {
  				err = check_xadd(env, insn_idx, insn);
@@ -4832,12 +5003,17 @@ static int do_check(struct bpf_verifier_env *env)
  				continue;
  			}
  
+			source_type = BPF_SIZE(insn->code) == BPF_DW ?
+					SRC_OP64_0 : SRC_OP_0;
+
  			/* check src1 operand */
-			err = check_reg_arg(env, insn->src_reg, SRC_OP);
+			err = check_reg_arg(env, insn->src_reg, source_type,
+					    insn_idx);
  			if (err)
  				return err;
  			/* check src2 operand */
-			err = check_reg_arg(env, insn->dst_reg, SRC_OP);
+			err = check_reg_arg(env, insn->dst_reg, SRC_OP64_1,
+					    insn_idx);
  			if (err)
  				return err;
  
@@ -4867,8 +5043,12 @@ static int do_check(struct bpf_verifier_env *env)
  				verbose(env, "BPF_ST uses reserved fields\n");
  				return -EINVAL;
  			}
+
+			env->insn_aux_data[insn_idx].full_ref = true;
+
  			/* check src operand */
-			err = check_reg_arg(env, insn->dst_reg, SRC_OP);
+			err = check_reg_arg(env, insn->dst_reg, SRC_OP64_0,
+					    insn_idx);
  			if (err)
  				return err;
  
@@ -4888,6 +5068,12 @@ static int do_check(struct bpf_verifier_env *env)
  		} else if (class == BPF_JMP) {
  			u8 opcode = BPF_OP(insn->code);
  
+			/* TODO: could potentially use range info to check if
+			 * the comparison setting condition could be done on
+			 * 32-bit sub-register.
+			 */
+			env->insn_aux_data[insn_idx].full_ref = true;
+
  			if (opcode == BPF_CALL) {
  				if (BPF_SRC(insn->code) != BPF_K ||
  				    insn->off != 0 ||
@@ -4918,6 +5104,9 @@ static int do_check(struct bpf_verifier_env *env)
  				continue;
  
  			} else if (opcode == BPF_EXIT) {
+				bool is_main_exit;
+				u32 frame_idx;
+
  				if (BPF_SRC(insn->code) != BPF_K ||
  				    insn->imm != 0 ||
  				    insn->src_reg != BPF_REG_0 ||
@@ -4926,7 +5115,8 @@ static int do_check(struct bpf_verifier_env *env)
  					return -EINVAL;
  				}
  
-				if (state->curframe) {
+				frame_idx = state->curframe;
+				if (frame_idx) {
  					/* exit from nested function */
  					prev_insn_idx = insn_idx;
  					err = prepare_func_exit(env, &insn_idx);
@@ -4942,7 +5132,12 @@ static int do_check(struct bpf_verifier_env *env)
  				 * of bpf_exit, which means that program wrote
  				 * something into it earlier
  				 */
-				err = check_reg_arg(env, BPF_REG_0, SRC_OP);
+				is_main_exit =
+					!state->frame[frame_idx]->subprogno;
+				err = check_reg_arg(env, BPF_REG_0,
+						    is_main_exit ?
+							SRC_OP64_0 :SRC_OP_0,
+						    insn_idx);
  				if (err)
  					return err;
  
@@ -5267,6 +5462,13 @@ static void sanitize_dead_code(struct bpf_verifier_env *env)
  	int i;
  
  	for (i = 0; i < insn_cnt; i++) {
+		/* Demonstration the usage of "full_ref" info. We could rewrite
+		 * BPF_ALU64 into BPF_ALU. 64-bit architecture could
+		 * then automatically get more 32-bit sub-register.
+		 */
+		if (!aux_data[i].full_ref &&
+		    BPF_CLASS(insn[i].code) == BPF_ALU64)
+			insn[i].code = (insn[i].code & ~0x7) | BPF_ALU;
  		if (aux_data[i].seen)
  			continue;
  		memcpy(insn + i, &trap, sizeof(trap));
@@ -5910,11 +6112,13 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
  	if (ret < 0)
  		goto skip_full_check;
  
+	insn_stack = NULL;
  	ret = do_check(env);
  	if (env->cur_state) {
  		free_verifier_state(env->cur_state, true);
  		env->cur_state = NULL;
  	}
+	kfree(insn_stack);
  
  skip_full_check:
  	while (!pop_stack(env, NULL, NULL));

^ permalink raw reply related

* [PATCH net-next v2] net: core: change bool members of struct net_device to bitfield members
From: Heiner Kallweit @ 2018-10-08 20:17 UTC (permalink / raw)
  To: David Miller; +Cc: netdev@vger.kernel.org

bool is good as parameter type or function return type, but if used
for struct members it consumes more memory than needed.
Changing the bool members of struct net_device to bitfield members
allows to decrease the memory footprint of this struct.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
---
v2:
- Change Counter to Flag in description of uc_promisc
---
 include/linux/netdevice.h | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 76603ee13..3d7b8df2e 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1651,10 +1651,6 @@ enum netdev_priv_flags {
  * 	@dev_port:		Used to differentiate devices that share
  * 				the same function
  *	@addr_list_lock:	XXX: need comments on this one
- *	@uc_promisc:		Counter that indicates promiscuous mode
- *				has been enabled due to the need to listen to
- *				additional unicast addresses in a device that
- *				does not implement ndo_set_rx_mode()
  *	@uc:			unicast mac addresses
  *	@mc:			multicast mac addresses
  *	@dev_addrs:		list of device hw addresses
@@ -1714,11 +1710,9 @@ enum netdev_priv_flags {
  *	@link_watch_list:	XXX: need comments on this one
  *
  *	@reg_state:		Register/unregister state machine
- *	@dismantle:		Device is going to be freed
  *	@rtnl_link_state:	This enum represents the phases of creating
  *				a new link
  *
- *	@needs_free_netdev:	Should unregister perform free_netdev?
  *	@priv_destructor:	Called from unregister
  *	@npinfo:		XXX: need comments on this one
  * 	@nd_net:		Network namespace this network device is inside
@@ -1758,6 +1752,15 @@ enum netdev_priv_flags {
  *	@qdisc_tx_busylock: lockdep class annotating Qdisc->busylock spinlock
  *	@qdisc_running_key: lockdep class annotating Qdisc->running seqcount
  *
+ *	@uc_promisc:	Flag that indicates promiscuous mode
+ *			has been enabled due to the need to listen to
+ *			additional unicast addresses in a device that
+ *			does not implement ndo_set_rx_mode()
+ *
+ *	@dismantle:	Device is going to be freed
+ *
+ *	@needs_free_netdev:	Should unregister perform free_netdev?
+ *
  *	@proto_down:	protocol port state information can be sent to the
  *			switch driver and used to set the phys state of the
  *			switch port.
@@ -1879,7 +1882,6 @@ struct net_device {
 	unsigned short          dev_port;
 	spinlock_t		addr_list_lock;
 	unsigned char		name_assign_type;
-	bool			uc_promisc;
 	struct netdev_hw_addr_list	uc;
 	struct netdev_hw_addr_list	mc;
 	struct netdev_hw_addr_list	dev_addrs;
@@ -1986,14 +1988,11 @@ struct net_device {
 	       NETREG_DUMMY,		/* dummy device for NAPI poll */
 	} reg_state:8;
 
-	bool dismantle;
-
 	enum {
 		RTNL_LINK_INITIALIZED,
 		RTNL_LINK_INITIALIZING,
 	} rtnl_link_state:16;
 
-	bool needs_free_netdev;
 	void (*priv_destructor)(struct net_device *dev);
 
 #ifdef CONFIG_NETPOLL
@@ -2046,7 +2045,10 @@ struct net_device {
 	struct sfp_bus		*sfp_bus;
 	struct lock_class_key	*qdisc_tx_busylock;
 	struct lock_class_key	*qdisc_running_key;
-	bool			proto_down;
+	unsigned		uc_promisc:1;
+	unsigned		dismantle:1;
+	unsigned		needs_free_netdev:1;
+	unsigned		proto_down:1;
 	unsigned		wol_enabled:1;
 };
 #define to_net_dev(d) container_of(d, struct net_device, dev)
-- 
2.19.1

^ permalink raw reply related

* Re: [PATCH net-next] net: core: change bool members of struct net_device to bitfield members
From: Heiner Kallweit @ 2018-10-08 20:14 UTC (permalink / raw)
  To: Randy Dunlap, David Miller; +Cc: netdev@vger.kernel.org
In-Reply-To: <a34797e3-4330-bb9c-1447-6dcd27b5594e@infradead.org>

On 08.10.2018 22:07, Randy Dunlap wrote:
> On 10/8/18 1:00 PM, Heiner Kallweit wrote:
>> bool is good as parameter type or function return type, but if used
>> for struct members it consumes more memory than needed.
>> Changing the bool members of struct net_device to bitfield members
>> allows to decrease the memory footprint of this struct.
>>
>> Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
>> ---
>>  include/linux/netdevice.h | 24 +++++++++++++-----------
>>  1 file changed, 13 insertions(+), 11 deletions(-)
>>
>> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
>> index 76603ee13..3d7b8df2e 100644
>> --- a/include/linux/netdevice.h
>> +++ b/include/linux/netdevice.h
>> @@ -1651,10 +1651,6 @@ enum netdev_priv_flags {
>>   * 	@dev_port:		Used to differentiate devices that share
>>   * 				the same function
>>   *	@addr_list_lock:	XXX: need comments on this one
>> - *	@uc_promisc:		Counter that indicates promiscuous mode
>> - *				has been enabled due to the need to listen to
>> - *				additional unicast addresses in a device that
>> - *				does not implement ndo_set_rx_mode()
>>   *	@uc:			unicast mac addresses
>>   *	@mc:			multicast mac addresses
>>   *	@dev_addrs:		list of device hw addresses
>> @@ -1714,11 +1710,9 @@ enum netdev_priv_flags {
>>   *	@link_watch_list:	XXX: need comments on this one
>>   *
>>   *	@reg_state:		Register/unregister state machine
>> - *	@dismantle:		Device is going to be freed
>>   *	@rtnl_link_state:	This enum represents the phases of creating
>>   *				a new link
>>   *
>> - *	@needs_free_netdev:	Should unregister perform free_netdev?
>>   *	@priv_destructor:	Called from unregister
>>   *	@npinfo:		XXX: need comments on this one
>>   * 	@nd_net:		Network namespace this network device is inside
>> @@ -1758,6 +1752,15 @@ enum netdev_priv_flags {
>>   *	@qdisc_tx_busylock: lockdep class annotating Qdisc->busylock spinlock
>>   *	@qdisc_running_key: lockdep class annotating Qdisc->running seqcount
>>   *
>> + *	@uc_promisc:	Counter that indicates promiscuous mode
>> + *			has been enabled due to the need to listen to
>> + *			additional unicast addresses in a device that
>> + *			does not implement ndo_set_rx_mode()
> 
> Hi,
> 
> I see that all you did is copy/paste that text (above), but I wouldn't call
> a single bit a [1-bit] Counter.
> 
I stumbled across this comment too. Neither a bool member nor a 1-bit
bitfield member should be called a counter. I kept the original comment, 
but I'm totally fine with changing Counter -> Flag and will provide a v2.

> thanks,
> 

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox