Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH 12/12] netfilter: xtables: fix build failure from COMPAT_XT_ALIGN outside CONFIG_COMPAT
From: Pablo Neira Ayuso @ 2017-05-19  8:33 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1495182833-2272-1-git-send-email-pablo@netfilter.org>

From: Willem de Bruijn <willemb@google.com>

The patch in the Fixes references COMPAT_XT_ALIGN in the definition
of XT_DATA_TO_USER, outside an #ifdef CONFIG_COMPAT block.

Split XT_DATA_TO_USER into separate compat and non compat variants and
define the first inside an CONFIG_COMPAT block.

This simplifies both variants by removing branches inside the macro.

Fixes: 324318f0248c ("netfilter: xtables: zero padding in data_to_user")
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/x_tables.c | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index d17769599c10..1770c1d9b37f 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -296,18 +296,17 @@ int xt_data_to_user(void __user *dst, const void *src,
 }
 EXPORT_SYMBOL_GPL(xt_data_to_user);
 
-#define XT_DATA_TO_USER(U, K, TYPE, C_SIZE)				\
+#define XT_DATA_TO_USER(U, K, TYPE)					\
 	xt_data_to_user(U->data, K->data,				\
 			K->u.kernel.TYPE->usersize,			\
-			C_SIZE ? : K->u.kernel.TYPE->TYPE##size,	\
-			C_SIZE ? COMPAT_XT_ALIGN(C_SIZE) :		\
-				 XT_ALIGN(K->u.kernel.TYPE->TYPE##size))
+			K->u.kernel.TYPE->TYPE##size,			\
+			XT_ALIGN(K->u.kernel.TYPE->TYPE##size))
 
 int xt_match_to_user(const struct xt_entry_match *m,
 		     struct xt_entry_match __user *u)
 {
 	return XT_OBJ_TO_USER(u, m, match, 0) ||
-	       XT_DATA_TO_USER(u, m, match, 0);
+	       XT_DATA_TO_USER(u, m, match);
 }
 EXPORT_SYMBOL_GPL(xt_match_to_user);
 
@@ -315,7 +314,7 @@ int xt_target_to_user(const struct xt_entry_target *t,
 		      struct xt_entry_target __user *u)
 {
 	return XT_OBJ_TO_USER(u, t, target, 0) ||
-	       XT_DATA_TO_USER(u, t, target, 0);
+	       XT_DATA_TO_USER(u, t, target);
 }
 EXPORT_SYMBOL_GPL(xt_target_to_user);
 
@@ -614,6 +613,12 @@ void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr,
 }
 EXPORT_SYMBOL_GPL(xt_compat_match_from_user);
 
+#define COMPAT_XT_DATA_TO_USER(U, K, TYPE, C_SIZE)			\
+	xt_data_to_user(U->data, K->data,				\
+			K->u.kernel.TYPE->usersize,			\
+			C_SIZE,						\
+			COMPAT_XT_ALIGN(C_SIZE))
+
 int xt_compat_match_to_user(const struct xt_entry_match *m,
 			    void __user **dstptr, unsigned int *size)
 {
@@ -629,7 +634,7 @@ int xt_compat_match_to_user(const struct xt_entry_match *m,
 		if (match->compat_to_user((void __user *)cm->data, m->data))
 			return -EFAULT;
 	} else {
-		if (XT_DATA_TO_USER(cm, m, match, msize - sizeof(*cm)))
+		if (COMPAT_XT_DATA_TO_USER(cm, m, match, msize - sizeof(*cm)))
 			return -EFAULT;
 	}
 
@@ -975,7 +980,7 @@ int xt_compat_target_to_user(const struct xt_entry_target *t,
 		if (target->compat_to_user((void __user *)ct->data, t->data))
 			return -EFAULT;
 	} else {
-		if (XT_DATA_TO_USER(ct, t, target, tsize - sizeof(*ct)))
+		if (COMPAT_XT_DATA_TO_USER(ct, t, target, tsize - sizeof(*ct)))
 			return -EFAULT;
 	}
 
-- 
2.1.4

^ permalink raw reply related

* [PATCH 11/12] ebtables: arpreply: Add the standard target sanity check
From: Pablo Neira Ayuso @ 2017-05-19  8:33 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1495182833-2272-1-git-send-email-pablo@netfilter.org>

From: Gao Feng <gfree.wind@vip.163.com>

The info->target comes from userspace and it would be used directly.
So we need to add the sanity check to make sure it is a valid standard
target, although the ebtables tool has already checked it. Kernel needs
to validate anything coming from userspace.

If the target is set as an evil value, it would break the ebtables
and cause a panic. Because the non-standard target is treated as one
offset.

Now add one helper function ebt_invalid_target, and we would replace
the macro INVALID_TARGET later.

Signed-off-by: Gao Feng <gfree.wind@vip.163.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_bridge/ebtables.h | 5 +++++
 net/bridge/netfilter/ebt_arpreply.c       | 3 +++
 2 files changed, 8 insertions(+)

diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h
index a30efb437e6d..e0cbf17af780 100644
--- a/include/linux/netfilter_bridge/ebtables.h
+++ b/include/linux/netfilter_bridge/ebtables.h
@@ -125,4 +125,9 @@ extern unsigned int ebt_do_table(struct sk_buff *skb,
 /* True if the target is not a standard target */
 #define INVALID_TARGET (info->target < -NUM_STANDARD_TARGETS || info->target >= 0)
 
+static inline bool ebt_invalid_target(int target)
+{
+	return (target < -NUM_STANDARD_TARGETS || target >= 0);
+}
+
 #endif
diff --git a/net/bridge/netfilter/ebt_arpreply.c b/net/bridge/netfilter/ebt_arpreply.c
index 5929309beaa1..db85230e49c3 100644
--- a/net/bridge/netfilter/ebt_arpreply.c
+++ b/net/bridge/netfilter/ebt_arpreply.c
@@ -68,6 +68,9 @@ static int ebt_arpreply_tg_check(const struct xt_tgchk_param *par)
 	if (e->ethproto != htons(ETH_P_ARP) ||
 	    e->invflags & EBT_IPROTO)
 		return -EINVAL;
+	if (ebt_invalid_target(info->target))
+		return -EINVAL;
+
 	return 0;
 }
 
-- 
2.1.4

^ permalink raw reply related

* [PATCH 06/12] netfilter: xtables: zero padding in data_to_user
From: Pablo Neira Ayuso @ 2017-05-19  8:33 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1495182833-2272-1-git-send-email-pablo@netfilter.org>

From: Willem de Bruijn <willemb@google.com>

When looking up an iptables rule, the iptables binary compares the
aligned match and target data (XT_ALIGN). In some cases this can
exceed the actual data size to include padding bytes.

Before commit f77bc5b23fb1 ("iptables: use match, target and data
copy_to_user helpers") the malloc()ed bytes were overwritten by the
kernel with kzalloced contents, zeroing the padding and making the
comparison succeed. After this patch, the kernel copies and clears
only data, leaving the padding bytes undefined.

Extend the clear operation from data size to aligned data size to
include the padding bytes, if any.

Padding bytes can be observed in both match and target, and the bug
triggered, by issuing a rule with match icmp and target ACCEPT:

  iptables -t mangle -A INPUT -i lo -p icmp --icmp-type 1 -j ACCEPT
  iptables -t mangle -D INPUT -i lo -p icmp --icmp-type 1 -j ACCEPT

Fixes: f77bc5b23fb1 ("iptables: use match, target and data copy_to_user helpers")
Reported-by: Paul Moore <pmoore@redhat.com>
Reported-by: Richard Guy Briggs <rgb@redhat.com>
Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/x_tables.h | 2 +-
 net/bridge/netfilter/ebtables.c    | 9 ++++++---
 net/netfilter/x_tables.c           | 9 ++++++---
 3 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index be378cf47fcc..b3044c2c62cb 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -294,7 +294,7 @@ int xt_match_to_user(const struct xt_entry_match *m,
 int xt_target_to_user(const struct xt_entry_target *t,
 		      struct xt_entry_target __user *u);
 int xt_data_to_user(void __user *dst, const void *src,
-		    int usersize, int size);
+		    int usersize, int size, int aligned_size);
 
 void *xt_copy_counters_from_user(const void __user *user, unsigned int len,
 				 struct xt_counters_info *info, bool compat);
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 9ec0c9f908fa..9c6e619f452b 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1373,7 +1373,8 @@ static inline int ebt_obj_to_user(char __user *um, const char *_name,
 	strlcpy(name, _name, sizeof(name));
 	if (copy_to_user(um, name, EBT_FUNCTION_MAXNAMELEN) ||
 	    put_user(datasize, (int __user *)(um + EBT_FUNCTION_MAXNAMELEN)) ||
-	    xt_data_to_user(um + entrysize, data, usersize, datasize))
+	    xt_data_to_user(um + entrysize, data, usersize, datasize,
+			    XT_ALIGN(datasize)))
 		return -EFAULT;
 
 	return 0;
@@ -1658,7 +1659,8 @@ static int compat_match_to_user(struct ebt_entry_match *m, void __user **dstptr,
 		if (match->compat_to_user(cm->data, m->data))
 			return -EFAULT;
 	} else {
-		if (xt_data_to_user(cm->data, m->data, match->usersize, msize))
+		if (xt_data_to_user(cm->data, m->data, match->usersize, msize,
+				    COMPAT_XT_ALIGN(msize)))
 			return -EFAULT;
 	}
 
@@ -1687,7 +1689,8 @@ static int compat_target_to_user(struct ebt_entry_target *t,
 		if (target->compat_to_user(cm->data, t->data))
 			return -EFAULT;
 	} else {
-		if (xt_data_to_user(cm->data, t->data, target->usersize, tsize))
+		if (xt_data_to_user(cm->data, t->data, target->usersize, tsize,
+				    COMPAT_XT_ALIGN(tsize)))
 			return -EFAULT;
 	}
 
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 8876b7da6884..d17769599c10 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -283,12 +283,13 @@ static int xt_obj_to_user(u16 __user *psize, u16 size,
 		       &U->u.user.revision, K->u.kernel.TYPE->revision)
 
 int xt_data_to_user(void __user *dst, const void *src,
-		    int usersize, int size)
+		    int usersize, int size, int aligned_size)
 {
 	usersize = usersize ? : size;
 	if (copy_to_user(dst, src, usersize))
 		return -EFAULT;
-	if (usersize != size && clear_user(dst + usersize, size - usersize))
+	if (usersize != aligned_size &&
+	    clear_user(dst + usersize, aligned_size - usersize))
 		return -EFAULT;
 
 	return 0;
@@ -298,7 +299,9 @@ EXPORT_SYMBOL_GPL(xt_data_to_user);
 #define XT_DATA_TO_USER(U, K, TYPE, C_SIZE)				\
 	xt_data_to_user(U->data, K->data,				\
 			K->u.kernel.TYPE->usersize,			\
-			C_SIZE ? : K->u.kernel.TYPE->TYPE##size)
+			C_SIZE ? : K->u.kernel.TYPE->TYPE##size,	\
+			C_SIZE ? COMPAT_XT_ALIGN(C_SIZE) :		\
+				 XT_ALIGN(K->u.kernel.TYPE->TYPE##size))
 
 int xt_match_to_user(const struct xt_entry_match *m,
 		     struct xt_entry_match __user *u)
-- 
2.1.4

^ permalink raw reply related

* [PATCH 04/12] netfilter: introduce nf_conntrack_helper_put helper function
From: Pablo Neira Ayuso @ 2017-05-19  8:33 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1495182833-2272-1-git-send-email-pablo@netfilter.org>

From: Liping Zhang <zlpnobody@gmail.com>

And convert module_put invocation to nf_conntrack_helper_put, this is
prepared for the followup patch, which will add a refcnt for cthelper,
so we can reject the deleting request when cthelper is in use.

Signed-off-by: Liping Zhang <zlpnobody@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_helper.h | 2 ++
 net/netfilter/nf_conntrack_helper.c         | 6 ++++++
 net/netfilter/nft_ct.c                      | 4 ++--
 net/netfilter/xt_CT.c                       | 6 +++---
 net/openvswitch/conntrack.c                 | 4 ++--
 5 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h
index e04fa7691e5d..c1c12411103a 100644
--- a/include/net/netfilter/nf_conntrack_helper.h
+++ b/include/net/netfilter/nf_conntrack_helper.h
@@ -79,6 +79,8 @@ struct nf_conntrack_helper *__nf_conntrack_helper_find(const char *name,
 struct nf_conntrack_helper *nf_conntrack_helper_try_module_get(const char *name,
 							       u16 l3num,
 							       u8 protonum);
+void nf_conntrack_helper_put(struct nf_conntrack_helper *helper);
+
 void nf_ct_helper_init(struct nf_conntrack_helper *helper,
 		       u16 l3num, u16 protonum, const char *name,
 		       u16 default_port, u16 spec_port, u32 id,
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 3a60efa7799b..e17006b6e434 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -181,6 +181,12 @@ nf_conntrack_helper_try_module_get(const char *name, u16 l3num, u8 protonum)
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_helper_try_module_get);
 
+void nf_conntrack_helper_put(struct nf_conntrack_helper *helper)
+{
+	module_put(helper->me);
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_helper_put);
+
 struct nf_conn_help *
 nf_ct_helper_ext_add(struct nf_conn *ct,
 		     struct nf_conntrack_helper *helper, gfp_t gfp)
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index a34ceb38fc55..1678e9e75e8e 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -826,9 +826,9 @@ static void nft_ct_helper_obj_destroy(struct nft_object *obj)
 	struct nft_ct_helper_obj *priv = nft_obj_data(obj);
 
 	if (priv->helper4)
-		module_put(priv->helper4->me);
+		nf_conntrack_helper_put(priv->helper4);
 	if (priv->helper6)
-		module_put(priv->helper6->me);
+		nf_conntrack_helper_put(priv->helper6);
 }
 
 static void nft_ct_helper_obj_eval(struct nft_object *obj,
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index bb7ad82dcd56..623ef37de886 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -96,7 +96,7 @@ xt_ct_set_helper(struct nf_conn *ct, const char *helper_name,
 
 	help = nf_ct_helper_ext_add(ct, helper, GFP_KERNEL);
 	if (help == NULL) {
-		module_put(helper->me);
+		nf_conntrack_helper_put(helper);
 		return -ENOMEM;
 	}
 
@@ -263,7 +263,7 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par,
 err4:
 	help = nfct_help(ct);
 	if (help)
-		module_put(help->helper->me);
+		nf_conntrack_helper_put(help->helper);
 err3:
 	nf_ct_tmpl_free(ct);
 err2:
@@ -346,7 +346,7 @@ static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par,
 	if (ct) {
 		help = nfct_help(ct);
 		if (help)
-			module_put(help->helper->me);
+			nf_conntrack_helper_put(help->helper);
 
 		nf_ct_netns_put(par->net, par->family);
 
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index bf602e33c40a..08679ebb3068 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -1123,7 +1123,7 @@ static int ovs_ct_add_helper(struct ovs_conntrack_info *info, const char *name,
 
 	help = nf_ct_helper_ext_add(info->ct, helper, GFP_KERNEL);
 	if (!help) {
-		module_put(helper->me);
+		nf_conntrack_helper_put(helper);
 		return -ENOMEM;
 	}
 
@@ -1584,7 +1584,7 @@ void ovs_ct_free_action(const struct nlattr *a)
 static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info)
 {
 	if (ct_info->helper)
-		module_put(ct_info->helper->me);
+		nf_conntrack_helper_put(ct_info->helper);
 	if (ct_info->ct)
 		nf_ct_tmpl_free(ct_info->ct);
 }
-- 
2.1.4

^ permalink raw reply related

* [PATCH 03/12] netfilter: don't setup nat info for confirmed ct
From: Pablo Neira Ayuso @ 2017-05-19  8:33 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1495182833-2272-1-git-send-email-pablo@netfilter.org>

From: Liping Zhang <zlpnobody@gmail.com>

We cannot setup nat info if the ct has been confirmed already, else,
different cpu may race to handle the same ct. In extreme situation,
we may hit the "BUG_ON(nf_nat_initialized(ct, maniptype))" in the
nf_nat_setup_info.

Also running the following commands will easily hit NF_CT_ASSERT in
nf_conntrack_alter_reply:
  # nft flush ruleset
  # ping -c 2 -W 1 1.1.1.111 &
  # nft add table t
  # nft add chain t c {type nat hook postrouting priority 0 \;}
  # nft add rule t c snat to 4.5.6.7
  WARNING: CPU: 1 PID: 10065 at net/netfilter/nf_conntrack_core.c:1472
  nf_conntrack_alter_reply+0x9a/0x1a0 [nf_conntrack]
  [...]
  Call Trace:
   nf_nat_setup_info+0xad/0x840 [nf_nat]
   ? deactivate_slab+0x65d/0x6c0
   nft_nat_eval+0xcd/0x100 [nft_nat]
   nft_do_chain+0xff/0x5d0 [nf_tables]
   ? mark_held_locks+0x6f/0xa0
   ? __local_bh_enable_ip+0x70/0xa0
   ? trace_hardirqs_on_caller+0x11f/0x190
   ? ipt_do_table+0x310/0x610
   ? trace_hardirqs_on+0xd/0x10
   ? __local_bh_enable_ip+0x70/0xa0
   ? ipt_do_table+0x32b/0x610
   ? __lock_acquire+0x2ac/0x1580
   ? ipt_do_table+0x32b/0x610
   nft_nat_do_chain+0x65/0x80 [nft_chain_nat_ipv4]
   nf_nat_ipv4_fn+0x1ae/0x240 [nf_nat_ipv4]
   nf_nat_ipv4_out+0x4a/0xf0 [nf_nat_ipv4]
   nft_nat_ipv4_out+0x15/0x20 [nft_chain_nat_ipv4]
   nf_hook_slow+0x2c/0xf0
   ip_output+0x154/0x270

So for the confirmed ct, just ignore it and return NF_ACCEPT.

Fixes: 9a08ecfe74d7 ("netfilter: don't attach a nat extension by default")
Signed-off-by: Liping Zhang <zlpnobody@gmail.com>
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_nat_core.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index b48d6b5aae8a..ef0be325a0c6 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -409,6 +409,10 @@ nf_nat_setup_info(struct nf_conn *ct,
 {
 	struct nf_conntrack_tuple curr_tuple, new_tuple;

+	/* Can't setup nat info for confirmed ct. */
+	if (nf_ct_is_confirmed(ct))
+		return NF_ACCEPT;
+
 	NF_CT_ASSERT(maniptype == NF_NAT_MANIP_SRC ||
 		     maniptype == NF_NAT_MANIP_DST);
 	BUG_ON(nf_nat_initialized(ct, maniptype));
-- 
2.1.4

^ permalink raw reply related

* [PATCH 02/12] netfilter: ctnetlink: Make some parameters integer to avoid enum mismatch
From: Pablo Neira Ayuso @ 2017-05-19  8:33 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1495182833-2272-1-git-send-email-pablo@netfilter.org>

From: Matthias Kaehlcke <mka@chromium.org>

Not all parameters passed to ctnetlink_parse_tuple() and
ctnetlink_exp_dump_tuple() match the enum type in the signatures of these
functions. Since this is intended change the argument type of to be an
unsigned integer value.

Signed-off-by: Matthias Kaehlcke <mka@chromium.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_netlink.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index dcf561b5c97a..fa752626029e 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1007,9 +1007,8 @@ static const struct nla_policy tuple_nla_policy[CTA_TUPLE_MAX+1] = {
 
 static int
 ctnetlink_parse_tuple(const struct nlattr * const cda[],
-		      struct nf_conntrack_tuple *tuple,
-		      enum ctattr_type type, u_int8_t l3num,
-		      struct nf_conntrack_zone *zone)
+		      struct nf_conntrack_tuple *tuple, u32 type,
+		      u_int8_t l3num, struct nf_conntrack_zone *zone)
 {
 	struct nlattr *tb[CTA_TUPLE_MAX+1];
 	int err;
@@ -2447,7 +2446,7 @@ static struct nfnl_ct_hook ctnetlink_glue_hook = {
 
 static int ctnetlink_exp_dump_tuple(struct sk_buff *skb,
 				    const struct nf_conntrack_tuple *tuple,
-				    enum ctattr_expect type)
+				    u32 type)
 {
 	struct nlattr *nest_parms;
 
-- 
2.1.4

^ permalink raw reply related

* [PATCH 00/12] Netfilter/IPVS fixes for net
From: Pablo Neira Ayuso @ 2017-05-19  8:33 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev

Hi David,

The following patchset contains Netfilter/IPVS fixes for your net tree,
they are:

1) When using IPVS in direct-routing mode, normal traffic from the LVS
   host to a back-end server is sometimes incorrectly NATed on the way
   back into the LVS host. Patch to fix this from Julian Anastasov.

2) Calm down clang compilation warning in ctnetlink due to type
   mismatch, from Matthias Kaehlcke.

3) Do not re-setup NAT for conntracks that are already confirmed, this
   is fixing a problem that was introduced in the previous nf-next batch.
   Patch from Liping Zhang.

4) Do not allow conntrack helper removal from userspace cthelper
   infrastructure if already in used. This comes with an initial patch
   to introduce nf_conntrack_helper_put() that is required by this fix.
   From Liping Zhang.

5) Zero the pad when copying data to userspace, otherwise iptables fails
   to remove rules. This is a follow up on the patchset that sorts out
   the internal match/target structure pointer leak to userspace. Patch
   from the same author, Willem de Bruijn. This also comes with a build
   failure when CONFIG_COMPAT is not on, coming in the last patch of
   this series.

6) SYNPROXY crashes with conntrack entries that are created via
   ctnetlink, more specifically via conntrackd state sync. Patch from
   Eric Leblond.

7) RCU safe iteration on set element dumping in nf_tables, from
   Liping Zhang.

8) Missing sanitization of immediate date for the bitwise and cmp
   expressions in nf_tables.

9) Refcounting logic for chain and objects from set elements does not
   integrate into the nf_tables 2-phase commit protocol.

10) Missing sanitization of target verdict in ebtables arpreply target,
    from Gao Feng.

You can pull these changes from:

  git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf.git

Thanks!

----------------------------------------------------------------

The following changes since commit 1c4d5f51a812a82de97beee24f48ed05c65ebda5:

  vmxnet3: ensure that adapter is in proper state during force_close (2017-05-12 12:23:52 -0400)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf.git HEAD

for you to fetch changes up to 751a9c763849f5859cb69ea44b0430d00672f637:

  netfilter: xtables: fix build failure from COMPAT_XT_ALIGN outside CONFIG_COMPAT (2017-05-18 13:10:03 +0200)

----------------------------------------------------------------
Eric Leblond (1):
      netfilter: synproxy: fix conntrackd interaction

Gao Feng (1):
      ebtables: arpreply: Add the standard target sanity check

Julian Anastasov (1):
      ipvs: SNAT packet replies only for NATed connections

Liping Zhang (4):
      netfilter: don't setup nat info for confirmed ct
      netfilter: introduce nf_conntrack_helper_put helper function
      netfilter: nfnl_cthelper: reject del request if helper obj is in use
      netfilter: nf_tables: can't assume lock is acquired when dumping set elems

Matthias Kaehlcke (1):
      netfilter: ctnetlink: Make some parameters integer to avoid enum mismatch

Pablo Neira Ayuso (3):
      Merge tag 'ipvs-fixes-for-v4.12' of http://git.kernel.org/.../horms/ipvs
      netfilter: nf_tables: missing sanitization in data from userspace
      netfilter: nf_tables: revisit chain/object refcounting from elements

Willem de Bruijn (2):
      netfilter: xtables: zero padding in data_to_user
      netfilter: xtables: fix build failure from COMPAT_XT_ALIGN outside CONFIG_COMPAT

 include/linux/netfilter/x_tables.h          |   2 +-
 include/linux/netfilter_bridge/ebtables.h   |   5 +
 include/net/netfilter/nf_conntrack_helper.h |   4 +
 include/net/netfilter/nf_tables.h           |   2 +-
 net/bridge/netfilter/ebt_arpreply.c         |   3 +
 net/bridge/netfilter/ebtables.c             |   9 +-
 net/netfilter/ipvs/ip_vs_core.c             |  19 +++-
 net/netfilter/nf_conntrack_helper.c         |  12 +++
 net/netfilter/nf_conntrack_netlink.c        |  11 +-
 net/netfilter/nf_nat_core.c                 |   4 +
 net/netfilter/nf_tables_api.c               | 160 ++++++++++++++++++++++------
 net/netfilter/nfnetlink_cthelper.c          |  17 +--
 net/netfilter/nft_bitwise.c                 |  19 +++-
 net/netfilter/nft_cmp.c                     |  12 ++-
 net/netfilter/nft_ct.c                      |   4 +-
 net/netfilter/nft_immediate.c               |   5 +-
 net/netfilter/nft_range.c                   |   4 +-
 net/netfilter/nft_set_hash.c                |   2 +-
 net/netfilter/x_tables.c                    |  24 +++--
 net/netfilter/xt_CT.c                       |   6 +-
 net/openvswitch/conntrack.c                 |   4 +-
 21 files changed, 249 insertions(+), 79 deletions(-)

^ permalink raw reply

* [PATCH 10/12] netfilter: nf_tables: revisit chain/object refcounting from elements
From: Pablo Neira Ayuso @ 2017-05-19  8:33 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1495182833-2272-1-git-send-email-pablo@netfilter.org>

Andreas reports that the following incremental update using our commit
protocol doesn't work.

 # nft -f incremental-update.nft
 delete element ip filter client_to_any { 10.180.86.22 : goto CIn_1 }
 delete chain ip filter CIn_1
 ... Error: Could not process rule: Device or resource busy

The existing code is not well-integrated into the commit phase protocol,
since element deletions do not result in refcount decrement from the
preparation phase. This results in bogus EBUSY errors like the one
above.

Two new functions come with this patch:

* nft_set_elem_activate() function is used from the abort path, to
  restore the set element refcounting on objects that occurred from
  the preparation phase.

* nft_set_elem_deactivate() that is called from nft_del_setelem() to
  decrement set element refcounting on objects from the preparation
  phase in the commit protocol.

The nft_data_uninit() has been renamed to nft_data_release() since this
function does not uninitialize any data store in the data register,
instead just releases the references to objects. Moreover, a new
function nft_data_hold() has been introduced to be used from
nft_set_elem_activate().

Reported-by: Andreas Schultz <aschultz@tpip.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h |  2 +-
 net/netfilter/nf_tables_api.c     | 82 ++++++++++++++++++++++++++++++++++-----
 net/netfilter/nft_bitwise.c       |  4 +-
 net/netfilter/nft_cmp.c           |  2 +-
 net/netfilter/nft_immediate.c     |  5 ++-
 net/netfilter/nft_range.c         |  4 +-
 6 files changed, 81 insertions(+), 18 deletions(-)

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 028faec8fc27..8a8bab8d7b15 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -176,7 +176,7 @@ struct nft_data_desc {
 int nft_data_init(const struct nft_ctx *ctx,
 		  struct nft_data *data, unsigned int size,
 		  struct nft_data_desc *desc, const struct nlattr *nla);
-void nft_data_uninit(const struct nft_data *data, enum nft_data_types type);
+void nft_data_release(const struct nft_data *data, enum nft_data_types type);
 int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data,
 		  enum nft_data_types type, unsigned int len);
 
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 5f4a4d48b871..da314be0c048 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -3627,9 +3627,9 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem,
 {
 	struct nft_set_ext *ext = nft_set_elem_ext(set, elem);
 
-	nft_data_uninit(nft_set_ext_key(ext), NFT_DATA_VALUE);
+	nft_data_release(nft_set_ext_key(ext), NFT_DATA_VALUE);
 	if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
-		nft_data_uninit(nft_set_ext_data(ext), set->dtype);
+		nft_data_release(nft_set_ext_data(ext), set->dtype);
 	if (destroy_expr && nft_set_ext_exists(ext, NFT_SET_EXT_EXPR))
 		nf_tables_expr_destroy(NULL, nft_set_ext_expr(ext));
 	if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF))
@@ -3638,6 +3638,18 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem,
 }
 EXPORT_SYMBOL_GPL(nft_set_elem_destroy);
 
+/* Only called from commit path, nft_set_elem_deactivate() already deals with
+ * the refcounting from the preparation phase.
+ */
+static void nf_tables_set_elem_destroy(const struct nft_set *set, void *elem)
+{
+	struct nft_set_ext *ext = nft_set_elem_ext(set, elem);
+
+	if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPR))
+		nf_tables_expr_destroy(NULL, nft_set_ext_expr(ext));
+	kfree(elem);
+}
+
 static int nft_setelem_parse_flags(const struct nft_set *set,
 				   const struct nlattr *attr, u32 *flags)
 {
@@ -3849,9 +3861,9 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
 	kfree(elem.priv);
 err3:
 	if (nla[NFTA_SET_ELEM_DATA] != NULL)
-		nft_data_uninit(&data, d2.type);
+		nft_data_release(&data, d2.type);
 err2:
-	nft_data_uninit(&elem.key.val, d1.type);
+	nft_data_release(&elem.key.val, d1.type);
 err1:
 	return err;
 }
@@ -3896,6 +3908,53 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk,
 	return err;
 }
 
+/**
+ *	nft_data_hold - hold a nft_data item
+ *
+ *	@data: struct nft_data to release
+ *	@type: type of data
+ *
+ *	Hold a nft_data item. NFT_DATA_VALUE types can be silently discarded,
+ *	NFT_DATA_VERDICT bumps the reference to chains in case of NFT_JUMP and
+ *	NFT_GOTO verdicts. This function must be called on active data objects
+ *	from the second phase of the commit protocol.
+ */
+static void nft_data_hold(const struct nft_data *data, enum nft_data_types type)
+{
+	if (type == NFT_DATA_VERDICT) {
+		switch (data->verdict.code) {
+		case NFT_JUMP:
+		case NFT_GOTO:
+			data->verdict.chain->use++;
+			break;
+		}
+	}
+}
+
+static void nft_set_elem_activate(const struct net *net,
+				  const struct nft_set *set,
+				  struct nft_set_elem *elem)
+{
+	const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
+
+	if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
+		nft_data_hold(nft_set_ext_data(ext), set->dtype);
+	if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF))
+		(*nft_set_ext_obj(ext))->use++;
+}
+
+static void nft_set_elem_deactivate(const struct net *net,
+				    const struct nft_set *set,
+				    struct nft_set_elem *elem)
+{
+	const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
+
+	if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
+		nft_data_release(nft_set_ext_data(ext), set->dtype);
+	if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF))
+		(*nft_set_ext_obj(ext))->use--;
+}
+
 static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
 			   const struct nlattr *attr)
 {
@@ -3961,6 +4020,8 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
 	kfree(elem.priv);
 	elem.priv = priv;
 
+	nft_set_elem_deactivate(ctx->net, set, &elem);
+
 	nft_trans_elem(trans) = elem;
 	list_add_tail(&trans->list, &ctx->net->nft.commit_list);
 	return 0;
@@ -3970,7 +4031,7 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
 err3:
 	kfree(elem.priv);
 err2:
-	nft_data_uninit(&elem.key.val, desc.type);
+	nft_data_release(&elem.key.val, desc.type);
 err1:
 	return err;
 }
@@ -4777,8 +4838,8 @@ static void nf_tables_commit_release(struct nft_trans *trans)
 		nft_set_destroy(nft_trans_set(trans));
 		break;
 	case NFT_MSG_DELSETELEM:
-		nft_set_elem_destroy(nft_trans_elem_set(trans),
-				     nft_trans_elem(trans).priv, true);
+		nf_tables_set_elem_destroy(nft_trans_elem_set(trans),
+					   nft_trans_elem(trans).priv);
 		break;
 	case NFT_MSG_DELOBJ:
 		nft_obj_destroy(nft_trans_obj(trans));
@@ -5013,6 +5074,7 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb)
 		case NFT_MSG_DELSETELEM:
 			te = (struct nft_trans_elem *)trans->data;
 
+			nft_set_elem_activate(net, te->set, &te->elem);
 			te->set->ops->activate(net, te->set, &te->elem);
 			te->set->ndeact--;
 
@@ -5498,7 +5560,7 @@ int nft_data_init(const struct nft_ctx *ctx,
 EXPORT_SYMBOL_GPL(nft_data_init);
 
 /**
- *	nft_data_uninit - release a nft_data item
+ *	nft_data_release - release a nft_data item
  *
  *	@data: struct nft_data to release
  *	@type: type of data
@@ -5506,7 +5568,7 @@ EXPORT_SYMBOL_GPL(nft_data_init);
  *	Release a nft_data item. NFT_DATA_VALUE types can be silently discarded,
  *	all others need to be released by calling this function.
  */
-void nft_data_uninit(const struct nft_data *data, enum nft_data_types type)
+void nft_data_release(const struct nft_data *data, enum nft_data_types type)
 {
 	if (type < NFT_DATA_VERDICT)
 		return;
@@ -5517,7 +5579,7 @@ void nft_data_uninit(const struct nft_data *data, enum nft_data_types type)
 		WARN_ON(1);
 	}
 }
-EXPORT_SYMBOL_GPL(nft_data_uninit);
+EXPORT_SYMBOL_GPL(nft_data_release);
 
 int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data,
 		  enum nft_data_types type, unsigned int len)
diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c
index 96bd4f325b0f..fff8073e2a56 100644
--- a/net/netfilter/nft_bitwise.c
+++ b/net/netfilter/nft_bitwise.c
@@ -99,9 +99,9 @@ static int nft_bitwise_init(const struct nft_ctx *ctx,
 
 	return 0;
 err2:
-	nft_data_uninit(&priv->xor, d2.type);
+	nft_data_release(&priv->xor, d2.type);
 err1:
-	nft_data_uninit(&priv->mask, d1.type);
+	nft_data_release(&priv->mask, d1.type);
 	return err;
 }
 
diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c
index 8c9d0fb19118..c2945eb3397c 100644
--- a/net/netfilter/nft_cmp.c
+++ b/net/netfilter/nft_cmp.c
@@ -211,7 +211,7 @@ nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[])
 
 	return &nft_cmp_ops;
 err1:
-	nft_data_uninit(&data, desc.type);
+	nft_data_release(&data, desc.type);
 	return ERR_PTR(-EINVAL);
 }
 
diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c
index 728baf88295a..4717d7796927 100644
--- a/net/netfilter/nft_immediate.c
+++ b/net/netfilter/nft_immediate.c
@@ -65,7 +65,7 @@ static int nft_immediate_init(const struct nft_ctx *ctx,
 	return 0;
 
 err1:
-	nft_data_uninit(&priv->data, desc.type);
+	nft_data_release(&priv->data, desc.type);
 	return err;
 }
 
@@ -73,7 +73,8 @@ static void nft_immediate_destroy(const struct nft_ctx *ctx,
 				  const struct nft_expr *expr)
 {
 	const struct nft_immediate_expr *priv = nft_expr_priv(expr);
-	return nft_data_uninit(&priv->data, nft_dreg_to_type(priv->dreg));
+
+	return nft_data_release(&priv->data, nft_dreg_to_type(priv->dreg));
 }
 
 static int nft_immediate_dump(struct sk_buff *skb, const struct nft_expr *expr)
diff --git a/net/netfilter/nft_range.c b/net/netfilter/nft_range.c
index 9edc74eedc10..cedb96c3619f 100644
--- a/net/netfilter/nft_range.c
+++ b/net/netfilter/nft_range.c
@@ -102,9 +102,9 @@ static int nft_range_init(const struct nft_ctx *ctx, const struct nft_expr *expr
 	priv->len = desc_from.len;
 	return 0;
 err2:
-	nft_data_uninit(&priv->data_to, desc_to.type);
+	nft_data_release(&priv->data_to, desc_to.type);
 err1:
-	nft_data_uninit(&priv->data_from, desc_from.type);
+	nft_data_release(&priv->data_from, desc_from.type);
 	return err;
 }
 
-- 
2.1.4


^ permalink raw reply related

* [PATCH 08/12] netfilter: nf_tables: can't assume lock is acquired when dumping set elems
From: Pablo Neira Ayuso @ 2017-05-19  8:33 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1495182833-2272-1-git-send-email-pablo@netfilter.org>

From: Liping Zhang <zlpnobody@gmail.com>

When dumping the elements related to a specified set, we may invoke the
nf_tables_dump_set with the NFNL_SUBSYS_NFTABLES lock not acquired. So
we should use the proper rcu operation to avoid race condition, just
like other nft dump operations.

Signed-off-by: Liping Zhang <zlpnobody@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 78 +++++++++++++++++++++++++++++++------------
 net/netfilter/nft_set_hash.c  |  2 +-
 2 files changed, 57 insertions(+), 23 deletions(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 559225029740..5f4a4d48b871 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -3367,35 +3367,50 @@ static int nf_tables_dump_setelem(const struct nft_ctx *ctx,
 	return nf_tables_fill_setelem(args->skb, set, elem);
 }
 
+struct nft_set_dump_ctx {
+	const struct nft_set	*set;
+	struct nft_ctx		ctx;
+};
+
 static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
 {
+	struct nft_set_dump_ctx *dump_ctx = cb->data;
 	struct net *net = sock_net(skb->sk);
-	u8 genmask = nft_genmask_cur(net);
+	struct nft_af_info *afi;
+	struct nft_table *table;
 	struct nft_set *set;
 	struct nft_set_dump_args args;
-	struct nft_ctx ctx;
-	struct nlattr *nla[NFTA_SET_ELEM_LIST_MAX + 1];
+	bool set_found = false;
 	struct nfgenmsg *nfmsg;
 	struct nlmsghdr *nlh;
 	struct nlattr *nest;
 	u32 portid, seq;
-	int event, err;
+	int event;
 
-	err = nlmsg_parse(cb->nlh, sizeof(struct nfgenmsg), nla,
-			  NFTA_SET_ELEM_LIST_MAX, nft_set_elem_list_policy,
-			  NULL);
-	if (err < 0)
-		return err;
+	rcu_read_lock();
+	list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
+		if (afi != dump_ctx->ctx.afi)
+			continue;
 
-	err = nft_ctx_init_from_elemattr(&ctx, net, cb->skb, cb->nlh,
-					 (void *)nla, genmask);
-	if (err < 0)
-		return err;
+		list_for_each_entry_rcu(table, &afi->tables, list) {
+			if (table != dump_ctx->ctx.table)
+				continue;
 
-	set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET],
-				   genmask);
-	if (IS_ERR(set))
-		return PTR_ERR(set);
+			list_for_each_entry_rcu(set, &table->sets, list) {
+				if (set == dump_ctx->set) {
+					set_found = true;
+					break;
+				}
+			}
+			break;
+		}
+		break;
+	}
+
+	if (!set_found) {
+		rcu_read_unlock();
+		return -ENOENT;
+	}
 
 	event  = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, NFT_MSG_NEWSETELEM);
 	portid = NETLINK_CB(cb->skb).portid;
@@ -3407,11 +3422,11 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
 		goto nla_put_failure;
 
 	nfmsg = nlmsg_data(nlh);
-	nfmsg->nfgen_family = ctx.afi->family;
+	nfmsg->nfgen_family = afi->family;
 	nfmsg->version      = NFNETLINK_V0;
-	nfmsg->res_id	    = htons(ctx.net->nft.base_seq & 0xffff);
+	nfmsg->res_id	    = htons(net->nft.base_seq & 0xffff);
 
-	if (nla_put_string(skb, NFTA_SET_ELEM_LIST_TABLE, ctx.table->name))
+	if (nla_put_string(skb, NFTA_SET_ELEM_LIST_TABLE, table->name))
 		goto nla_put_failure;
 	if (nla_put_string(skb, NFTA_SET_ELEM_LIST_SET, set->name))
 		goto nla_put_failure;
@@ -3422,12 +3437,13 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
 
 	args.cb			= cb;
 	args.skb		= skb;
-	args.iter.genmask	= nft_genmask_cur(ctx.net);
+	args.iter.genmask	= nft_genmask_cur(net);
 	args.iter.skip		= cb->args[0];
 	args.iter.count		= 0;
 	args.iter.err		= 0;
 	args.iter.fn		= nf_tables_dump_setelem;
-	set->ops->walk(&ctx, set, &args.iter);
+	set->ops->walk(&dump_ctx->ctx, set, &args.iter);
+	rcu_read_unlock();
 
 	nla_nest_end(skb, nest);
 	nlmsg_end(skb, nlh);
@@ -3441,9 +3457,16 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
 	return skb->len;
 
 nla_put_failure:
+	rcu_read_unlock();
 	return -ENOSPC;
 }
 
+static int nf_tables_dump_set_done(struct netlink_callback *cb)
+{
+	kfree(cb->data);
+	return 0;
+}
+
 static int nf_tables_getsetelem(struct net *net, struct sock *nlsk,
 				struct sk_buff *skb, const struct nlmsghdr *nlh,
 				const struct nlattr * const nla[])
@@ -3465,7 +3488,18 @@ static int nf_tables_getsetelem(struct net *net, struct sock *nlsk,
 	if (nlh->nlmsg_flags & NLM_F_DUMP) {
 		struct netlink_dump_control c = {
 			.dump = nf_tables_dump_set,
+			.done = nf_tables_dump_set_done,
 		};
+		struct nft_set_dump_ctx *dump_ctx;
+
+		dump_ctx = kmalloc(sizeof(*dump_ctx), GFP_KERNEL);
+		if (!dump_ctx)
+			return -ENOMEM;
+
+		dump_ctx->set = set;
+		dump_ctx->ctx = ctx;
+
+		c.data = dump_ctx;
 		return netlink_dump_start(nlsk, skb, nlh, &c);
 	}
 	return -EOPNOTSUPP;
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index 8ec086b6b56b..3d3a6df4ce70 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -222,7 +222,7 @@ static void nft_hash_walk(const struct nft_ctx *ctx, struct nft_set *set,
 	struct nft_set_elem elem;
 	int err;
 
-	err = rhashtable_walk_init(&priv->ht, &hti, GFP_KERNEL);
+	err = rhashtable_walk_init(&priv->ht, &hti, GFP_ATOMIC);
 	iter->err = err;
 	if (err)
 		return;
-- 
2.1.4


^ permalink raw reply related

* [PATCH 07/12] netfilter: synproxy: fix conntrackd interaction
From: Pablo Neira Ayuso @ 2017-05-19  8:33 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1495182833-2272-1-git-send-email-pablo@netfilter.org>

From: Eric Leblond <eric@regit.org>

This patch fixes the creation of connection tracking entry from
netlink when synproxy is used. It was missing the addition of
the synproxy extension.

This was causing kernel crashes when a conntrack entry created by
conntrackd was used after the switch of traffic from active node
to the passive node.

Signed-off-by: Eric Leblond <eric@regit.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_netlink.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index fa752626029e..9799a50bc604 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -45,6 +45,8 @@
 #include <net/netfilter/nf_conntrack_zones.h>
 #include <net/netfilter/nf_conntrack_timestamp.h>
 #include <net/netfilter/nf_conntrack_labels.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
+#include <net/netfilter/nf_conntrack_synproxy.h>
 #ifdef CONFIG_NF_NAT_NEEDED
 #include <net/netfilter/nf_nat_core.h>
 #include <net/netfilter/nf_nat_l4proto.h>
@@ -1827,6 +1829,8 @@ ctnetlink_create_conntrack(struct net *net,
 	nf_ct_tstamp_ext_add(ct, GFP_ATOMIC);
 	nf_ct_ecache_ext_add(ct, 0, 0, GFP_ATOMIC);
 	nf_ct_labels_ext_add(ct);
+	nfct_seqadj_ext_add(ct);
+	nfct_synproxy_ext_add(ct);
 
 	/* we must add conntrack extensions before confirmation. */
 	ct->status |= IPS_CONFIRMED;
-- 
2.1.4


^ permalink raw reply related

* [PATCH 01/12] ipvs: SNAT packet replies only for NATed connections
From: Pablo Neira Ayuso @ 2017-05-19  8:33 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1495182833-2272-1-git-send-email-pablo@netfilter.org>

From: Julian Anastasov <ja@ssi.bg>

We do not check if packet from real server is for NAT
connection before performing SNAT. This causes problems
for setups that use DR/TUN and allow local clients to
access the real server directly, for example:

- local client in director creates IPVS-DR/TUN connection
CIP->VIP and the request packets are routed to RIP.
Talks are finished but IPVS connection is not expired yet.

- second local client creates non-IPVS connection CIP->RIP
with same reply tuple RIP->CIP and when replies are received
on LOCAL_IN we wrongly assign them for the first client
connection because RIP->CIP matches the reply direction.
As result, IPVS SNATs replies for non-IPVS connections.

The problem is more visible to local UDP clients but in rare
cases it can happen also for TCP or remote clients when the
real server sends the reply traffic via the director.

So, better to be more precise for the reply traffic.
As replies are not expected for DR/TUN connections, better
to not touch them.

Reported-by: Nick Moriarty <nick.moriarty@york.ac.uk>
Tested-by: Nick Moriarty <nick.moriarty@york.ac.uk>
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/netfilter/ipvs/ip_vs_core.c | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index d2d7bdf1d510..ad99c1ceea6f 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -849,10 +849,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
 {
 	unsigned int verdict = NF_DROP;
 
-	if (IP_VS_FWD_METHOD(cp) != 0) {
-		pr_err("shouldn't reach here, because the box is on the "
-		       "half connection in the tun/dr module.\n");
-	}
+	if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
+		goto ignore_cp;
 
 	/* Ensure the checksum is correct */
 	if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) {
@@ -886,6 +884,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
 		ip_vs_notrack(skb);
 	else
 		ip_vs_update_conntrack(skb, cp, 0);
+
+ignore_cp:
 	verdict = NF_ACCEPT;
 
 out:
@@ -1385,8 +1385,11 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in
 	 */
 	cp = pp->conn_out_get(ipvs, af, skb, &iph);
 
-	if (likely(cp))
+	if (likely(cp)) {
+		if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
+			goto ignore_cp;
 		return handle_response(af, skb, pd, cp, &iph, hooknum);
+	}
 
 	/* Check for real-server-started requests */
 	if (atomic_read(&ipvs->conn_out_counter)) {
@@ -1444,9 +1447,15 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in
 			}
 		}
 	}
+
+out:
 	IP_VS_DBG_PKT(12, af, pp, skb, iph.off,
 		      "ip_vs_out: packet continues traversal as normal");
 	return NF_ACCEPT;
+
+ignore_cp:
+	__ip_vs_conn_put(cp);
+	goto out;
 }
 
 /*
-- 
2.1.4


^ permalink raw reply related

* Re: [PATCH net 1/3] vlan: Fix tcp checksums offloads for Q-in-Q vlan.
From: Toshiaki Makita @ 2017-05-19  8:16 UTC (permalink / raw)
  To: vyasevic, Vladislav Yasevich, netdev; +Cc: mkubecek
In-Reply-To: <95de0652-6d73-9885-875e-8c98d02bd419@redhat.com>

On 2017/05/19 16:09, Vlad Yasevich wrote:
> On 05/18/2017 10:13 PM, Toshiaki Makita wrote:
>> On 2017/05/18 22:31, Vladislav Yasevich wrote:
>>> It appears that since commit 8cb65d000, Q-in-Q vlans have been
>>> broken.  The series that commit is part of enabled TSO and checksum
>>> offloading on Q-in-Q vlans.  However, most HW we support can't handle
>>> it.  To work around the issue, the above commit added a function that
>>> turns off offloads on Q-in-Q devices, but it left the checksum offload.
>>> That will cause issues with most older devices that supprort very basic
>>> checksum offload capabilities as well as some newer devices (we've
>>> reproduced te problem with both be2net and bnx).
>>>
>>> To solve this for everyone, turn off checksum offloading feature
>>> by default when sending Q-in-Q traffic.  Devices that are proven to
>>> work can provided a corrected ndo_features_check implemetation.
>>>
>>> Fixes: 8cb65d000 ("net: Move check for multiple vlans to drivers")
>>> CC: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
>>> Signed-off-by: Vladislav Yasevich <vyasevic@redhat.com>
>>> ---
>>>  include/linux/if_vlan.h | 1 -
>>>  1 file changed, 1 deletion(-)
>>>
>>> diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
>>> index 8d5fcd6..ae537f0 100644
>>> --- a/include/linux/if_vlan.h
>>> +++ b/include/linux/if_vlan.h
>>> @@ -619,7 +619,6 @@ static inline netdev_features_t vlan_features_check(const struct sk_buff *skb,
>>>  						     NETIF_F_SG |
>>>  						     NETIF_F_HIGHDMA |
>>>  						     NETIF_F_FRAGLIST |
>>> -						     NETIF_F_HW_CSUM |
>>>  						     NETIF_F_HW_VLAN_CTAG_TX |
>>>  						     NETIF_F_HW_VLAN_STAG_TX);
>>>  
>>
>> I guess HW_CSUM theoretically can handle Q-in-Q packets and the problem
>> is IP_CSUM and IPV6_CSUM.
>> So wouldn't it be better to leave HW_CSUM and drop IP_CSUM/IPV6_CSUM,
>> i.e. change intersection into bitwise AND?
>>
> 
> It wasn't really a problem before accelerations got enabled on q-in-q
> vlans.

Right for stacked vlan device.
But I think the check was there for packets from guests forwarded by
bridge to vlan device so it was a problem before 8cb65d000.

>> The intersection was introduced in db115037bb57 ("net: fix checksum
>> features handling in netif_skb_features()"), but I guess for this
>> particular check the intersection was not needed.
>>
> 
> So, to put it another way, leave the intersection with HW_CSUM in the mask,
> and then do:
> 
>   return features & ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
> 
> This might work, but it assumes that everyone who announce HW_CSUM can
> do q-in-q vlans.  It's been a bit of a pain tracking this down and I'd rather
> fix it for everyone and let individual driver authors verify that Q-in-Q works
> correctly with HW checksum.  However, I am willing to do the above if
> that's what people want.

At least HW_CSUM in the check was introduced intentionally.
https://www.spinics.net/lists/netdev/msg152016.html

And I think HW_CSUM should work with any packets.
You know, include/linux/skbuff.h says
>  *	NETIF_F_HW_CSUM	- The driver (or its device) is able to compute one
>  *			  IP (one's complement) checksum for any combination
>  *			  of protocols or protocol layering.

We should be able to safely enable it.

...But you are so worried about layer2 protocol handling of HW_CSUM
devices, I'm ok with disabling it for now.

-- 
Toshiaki Makita

^ permalink raw reply

* Re: [PATCH v2] e1000e: Don't return uninitialized stats
From: Jeff Kirsher @ 2017-05-19  8:16 UTC (permalink / raw)
  To: David Miller, bpoirier
  Cc: s.priebe, intel-wired-lan, netdev, pmenzel, sasha.neftin,
	aaron.f.brown, stephen
In-Reply-To: <20170518.104614.1739169308591707817.davem@davemloft.net>

[-- Attachment #1: Type: text/plain, Size: 744 bytes --]

On Thu, 2017-05-18 at 10:46 -0400, David Miller wrote:
> From: Benjamin Poirier <bpoirier@suse.com>
> Date: Wed, 17 May 2017 16:24:13 -0400
> 
> > Some statistics passed to ethtool are garbage because
> > e1000e_get_stats64()
> > doesn't write them, for example: tx_heartbeat_errors. This leaks kernel
> > memory to userspace and confuses users.
> > 
> > Do like ixgbe and use dev_get_stats() which first zeroes out
> > rtnl_link_stats64.
> > 
> > Fixes: 5944701df90d ("net: remove useless memset's in drivers
> > get_stats64")
> > Reported-by: Stefan Priebe <s.priebe@profihost.ag>
> > Signed-off-by: Benjamin Poirier <bpoirier@suse.com>
> 
> Jeff, please be sure to pick this up, thanks.

Yep, I have it in my tree, thanks.

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply

* Re: [v2] ath9k: remove unnecessary code
From: Kalle Valo @ 2017-05-19  7:58 UTC (permalink / raw)
  To: Gustavo A. R. Silva
  Cc: linux-wireless, netdev, linux-kernel, Gustavo A. R. Silva,
	Arend Van Spriel
In-Reply-To: <20170509130430.GA22190@embeddedgus>

"Gustavo A. R. Silva" <garsilva@embeddedor.com> wrote:
> The array field eeprom_data in struct th9k_platform_data
> is a fixed size array so it can never be NULL.
> 
> Addresses-Coverity-ID: 1364903
> Cc: Arend Van Spriel <arend.vanspriel@broadcom.com>
> Cc: Kalle Valo <kvalo@qca.qualcomm.com>
> Signed-off-by: Gustavo A. R. Silva <garsilva@embeddedor.com>

Patch applied to ath-next branch of ath.git, thanks.

641c1f4ab7f6 ath9k: remove unnecessary code

-- 
https://patchwork.kernel.org/patch/9717873/

https://wireless.wiki.kernel.org/en/developers/documentation/submittingpatches

^ permalink raw reply

* Re: wil6210: use memdup_user
From: Kalle Valo @ 2017-05-19  7:55 UTC (permalink / raw)
  To: Geliang Tang
  Cc: Maya Erez, Kalle Valo, Geliang Tang, linux-wireless, wil6210,
	netdev, linux-kernel
In-Reply-To: <b5b672573585b51a3acb37231f38db45f5873f60.1493781695.git.geliangtang@gmail.com>

Geliang Tang <geliangtang@gmail.com> wrote:
> Use memdup_user() helper instead of open-coding to simplify the code.
> 
> Signed-off-by: Geliang Tang <geliangtang@gmail.com>

Patch applied to ath-next branch of ath.git, thanks.

9a49290919e1 wil6210: use memdup_user

-- 
https://patchwork.kernel.org/patch/9715009/

https://wireless.wiki.kernel.org/en/developers/documentation/submittingpatches

^ permalink raw reply

* Re: ath5k: fix memory leak on buf on failed eeprom read
From: Kalle Valo @ 2017-05-19  7:52 UTC (permalink / raw)
  To: Colin Ian King
  Cc: Jiri Slaby, Nick Kossifidis, Luis R . Rodriguez, Kalle Valo,
	linux-wireless, kernel-janitors, netdev
In-Reply-To: <20170503142600.32609-1-colin.king@canonical.com>

Colin Ian King <colin.king@canonical.com> wrote:
> From: Colin Ian King <colin.king@canonical.com>
> 
> The AR5K_EEPROM_READ macro returns with -EIO if a read error
> occurs causing a memory leak on the allocated buffer buf. Fix
> this by explicitly calling ath5k_hw_nvram_read and exiting on
> the via the freebuf label that performs the necessary free'ing
> of buf when a read error occurs.
> 
> Detected by CoverityScan, CID#1248782 ("Resource Leak")
> 
> Signed-off-by: Colin Ian King <colin.king@canonical.com>

Patch applied to ath-next branch of ath.git, thanks.

8fed6823e06e ath5k: fix memory leak on buf on failed eeprom read

-- 
https://patchwork.kernel.org/patch/9709935/

https://wireless.wiki.kernel.org/en/developers/documentation/submittingpatches

^ permalink raw reply

* Re: net: ath: tx99: fixed a spelling issue
From: Kalle Valo @ 2017-05-19  7:49 UTC (permalink / raw)
  To: ammly
  Cc: ath9k-devel, kvalo, linux-wireless, netdev, linux-kernel,
	Ammly Fredrick
In-Reply-To: <1493310697-18610-1-git-send-email-ammly@gmail.com>

ammly <ammlyf@gmail.com> wrote:
> Fixed a spelling issue.
> 
> Signed-off-by: Ammly Fredrick <ammlyf@gmail.com>

Patch applied to ath-next branch of ath.git, thanks.

c46e2a848f29 ath9k: fix spelling in ath9k_tx99_init()

-- 
https://patchwork.kernel.org/patch/9703211/

https://wireless.wiki.kernel.org/en/developers/documentation/submittingpatches

^ permalink raw reply

* [PATCH] e1000e: use disable_hardirq() also for MSIX vectors in e1000_netpoll()
From: Konstantin Khlebnikov @ 2017-05-19  7:18 UTC (permalink / raw)
  To: netdev, intel-wired-lan, Jeff Kirsher
  Cc: Dave Jones, WANG Cong, David S. Miller, Sabrina Dubroca

Replace disable_irq() which waits for threaded irq handlers with
disable_hardirq() which waits only for hardirq part.

Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Fixes: 311191297125 ("e1000: use disable_hardirq() for e1000_netpoll()")
---
 drivers/net/ethernet/intel/e1000e/netdev.c |   12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index b3679728caac..7f185f481b12 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -6733,20 +6733,20 @@ static irqreturn_t e1000_intr_msix(int __always_unused irq, void *data)
 
 		vector = 0;
 		msix_irq = adapter->msix_entries[vector].vector;
-		disable_irq(msix_irq);
-		e1000_intr_msix_rx(msix_irq, netdev);
+		if (disable_hardirq(msix_irq))
+			e1000_intr_msix_rx(msix_irq, netdev);
 		enable_irq(msix_irq);
 
 		vector++;
 		msix_irq = adapter->msix_entries[vector].vector;
-		disable_irq(msix_irq);
-		e1000_intr_msix_tx(msix_irq, netdev);
+		if (disable_hardirq(msix_irq))
+			e1000_intr_msix_tx(msix_irq, netdev);
 		enable_irq(msix_irq);
 
 		vector++;
 		msix_irq = adapter->msix_entries[vector].vector;
-		disable_irq(msix_irq);
-		e1000_msix_other(msix_irq, netdev);
+		if (disable_hardirq(msix_irq))
+			e1000_msix_other(msix_irq, netdev);
 		enable_irq(msix_irq);
 	}
 

^ permalink raw reply related

* Re: [PATCH v2 00/10] rt2x00: rt2x00: improve calling conventions for register accessors
From: Arnd Bergmann @ 2017-05-19  7:21 UTC (permalink / raw)
  To: Kalle Valo
  Cc: linux-wireless, Stanislaw Gruszka, David Miller, Helmut Schaa,
	Daniel Golle, Mathias Kresin, Johannes Berg, Serge Vasilugin,
	Roman Yeryomin, Networking, Linux Kernel Mailing List,
	Jes Sorensen, Tom Psyborg
In-Reply-To: <87efvlwc48.fsf@kamboji.qca.qualcomm.com>

On Fri, May 19, 2017 at 9:15 AM, Kalle Valo <kvalo@codeaurora.org> wrote:
> Arnd Bergmann <arnd@arndb.de> writes:
>
>> On Fri, May 19, 2017 at 7:18 AM, Kalle Valo <kvalo@codeaurora.org> wrote:
>>> Arnd Bergmann <arnd@arndb.de> writes:
>>>
>>>> I've managed to split up my long patch into a series of reasonble
>>>> steps now.
>>>>
>>>> The first two are required to fix a regression from commit 41977e86c984
>>>> ("rt2x00: add support for MT7620"), the rest are just cleanups to
>>>> have a consistent state across all the register access functions.
>>>
>>> Can these all go to 4.13 or would you prefer me to push the first two
>>> 4.12? Or what?
>>
>> I think you can reasonably argue either way: the second patch does
>> fix a real bug that may or may not lead to an exploitable stack overflow
>> when CONFIG_KASAN is enabled, which would be a reason to put it
>> into 4.12. On the other hand, I have another 20 patches for similar
>> (or worse) stack overflow issues with KASAN that I'm hoping to all
>> get into 4.13 and backported into stable kernel later if necessary,
>> so we could treat this one like the others.
>>
>> The only difference between this and the others is that in rt2x00 it
>> is a regression against 4.11, while the others have all been present
>> for a long time.
>
> Having all of these in 4.12 sounds a bit excessive and splitting the set
> (the first two into 4.12 and the rest into 4.13) sounds too much work.
> So I would prefer to queue these to 4.13, if it's ok for everyone?

Ok, sounds fine. Thanks,

      Arnd

^ permalink raw reply

* Re: [PATCH v2 00/10] rt2x00: rt2x00: improve calling conventions for register accessors
From: Kalle Valo @ 2017-05-19  7:15 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: linux-wireless, Stanislaw Gruszka, David Miller, Helmut Schaa,
	Daniel Golle, Mathias Kresin, Johannes Berg, Serge Vasilugin,
	Roman Yeryomin, Networking, Linux Kernel Mailing List,
	Jes Sorensen, Tom Psyborg
In-Reply-To: <CAK8P3a3YGxNBQq3RYucMCCvSyXOD8skF_Db-eba0PcBL4_P8Dg@mail.gmail.com>

Arnd Bergmann <arnd@arndb.de> writes:

> On Fri, May 19, 2017 at 7:18 AM, Kalle Valo <kvalo@codeaurora.org> wrote:
>> Arnd Bergmann <arnd@arndb.de> writes:
>>
>>> I've managed to split up my long patch into a series of reasonble
>>> steps now.
>>>
>>> The first two are required to fix a regression from commit 41977e86c984
>>> ("rt2x00: add support for MT7620"), the rest are just cleanups to
>>> have a consistent state across all the register access functions.
>>
>> Can these all go to 4.13 or would you prefer me to push the first two
>> 4.12? Or what?
>
> I think you can reasonably argue either way: the second patch does
> fix a real bug that may or may not lead to an exploitable stack overflow
> when CONFIG_KASAN is enabled, which would be a reason to put it
> into 4.12. On the other hand, I have another 20 patches for similar
> (or worse) stack overflow issues with KASAN that I'm hoping to all
> get into 4.13 and backported into stable kernel later if necessary,
> so we could treat this one like the others.
>
> The only difference between this and the others is that in rt2x00 it
> is a regression against 4.11, while the others have all been present
> for a long time.

Having all of these in 4.12 sounds a bit excessive and splitting the set
(the first two into 4.12 and the rest into 4.13) sounds too much work.
So I would prefer to queue these to 4.13, if it's ok for everyone?

-- 
Kalle Valo

^ permalink raw reply

* Re: [PATCH v5 15/17] dt-bindings: qca7000: append UART interface to binding
From: Stefan Wahren @ 2017-05-19  7:13 UTC (permalink / raw)
  To: Rob Herring
  Cc: Jakub Kicinski, Michael Heimpold, Mark Rutland,
	Greg Kroah-Hartman, Jiri Slaby,
	linux-serial-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	netdev-u79uwXL29TY76Z2rM5mHXA, devicetree-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20170511234342.575d9226-68UzVGuGftmUSpRRplVxJ1aTQe2KTcn/@public.gmane.org>

Hi Rob,

Am 12.05.2017 um 08:43 schrieb Jakub Kicinski:
> On Fri, 12 May 2017 06:15:52 +0000, Michael Heimpold wrote:
>> Hi,
>>
>> Zitat von Jakub Kicinski <kubakici-5tc4TXWwyLM@public.gmane.org>:
>>
>>> On Thu, 11 May 2017 21:12:22 +0200, Michael Heimpold wrote:  
>>>> Am Mittwoch, 10. Mai 2017, 10:53:26 CEST schrieb Stefan Wahren:  
>>>>> This merges the serdev binding for the QCA7000 UART driver (Ethernet over
>>>>> UART) into the existing document.
>>>>>
>>>>> Signed-off-by: Stefan Wahren <stefan.wahren-eS4NqCHxEME@public.gmane.org>
>>>>> ---
>>>>>  .../devicetree/bindings/net/qca-qca7000.txt        | 32
>>>>> ++++++++++++++++++++++ 1 file changed, 32 insertions(+)
>>>>>
>>>>> diff --git a/Documentation/devicetree/bindings/net/qca-qca7000.txt
>>>>> b/Documentation/devicetree/bindings/net/qca-qca7000.txt index
>>>>> a37f656..08364c3 100644
>>>>> --- a/Documentation/devicetree/bindings/net/qca-qca7000.txt
>>>>> +++ b/Documentation/devicetree/bindings/net/qca-qca7000.txt
>>>>> @@ -54,3 +54,35 @@ ssp2: spi@80014000 {
>>>>>  		local-mac-address = [ A0 B0 C0 D0 E0 F0 ];
>>>>>  	};
>>>>>  };
>>>>> +
>>>>> +(b) Ethernet over UART
>>>>> +
>>>>> +In order to use the QCA7000 as UART slave it must be defined as    
>>>> a child of  
>>>>> a +UART master in the device tree. It is possible to preconfigure the UART
>>>>> +settings of the QCA7000 firmware, but it's not possible to change them
>>>>> during +runtime.
>>>>> +
>>>>> +Required properties:
>>>>> +- compatible        : Should be "qca,qca7000-uart"  
>>>> I already discussed this with Stefan off-list a little bit, but I would like
>>>> to bring this to a broader audience: I'm not sure whether the compatible
>>>> should contain the "-uart" suffix, because the hardware chip is the  
>>>> very same
>>>> QCA7000 chip which can also be used with SPI protocol.
>>>> The only difference is the loaded firmware within the chip which can either
>>>> speak SPI or UART protocol (but not both at the same time - due to shared
>>>> pins). So the hardware design decides which interface type is used.
>>>>
>>>> At the moment, this patch series adds a dedicated driver for the UART
>>>> protocol, in parallel to the existing SPI driver. So a different compatible
>>>> string is needed here to match against the new driver.
>>>>
>>>> An alternative approach would be to re-use the existing compatible string
>>>> "qca,qca7000" for both, the SPI and UART protocol, because a "smarter"
>>>> (combined) driver would detect which protocol to use. For example the driver
>>>> could check for spi-cpha and/or spi-cpol which are required for SPI  
>>>> protocol:
>>>> if these exists the driver could assume that SPI must be used, if both are
>>>> missing then UART protocol should be used.
>>>> (This way it would not be necessary to check whether the node is a child of
>>>> a SPI or UART master node - but maybe this is even easier - I don't know)
>>>>
>>>> Or in shorter words: my concern is that while "qca7000-uart" describes the
>>>> hardware, it's too closely coupled to the driver implementation. Having
>>>> some feedback of the experts would be nice :-)  
>>> I'm no expert, but devices which can do both I2C and SPI are quite
>>> common, and they usually have the same compatible string for both
>>> buses.  
>> do you have an example driver at hand? I only found GPIO mcp23s08 driver,
>> which can handle both I2C and SPI chips, but there are different compatible
>> strings used to distinguish several chip models.
> I think drivers/tty/serial/sc16is7xx.c has the same strings, and some
> Kconfig magic to work when either bus is enabled in .config.
>
> Quick grep shows there are couple more potential ones to look at:
>
> $ find . -name Kconfig | xargs grep -n 'SPI_MASTER.* I2C' 
> ./drivers/tty/serial/Kconfig:1208:        depends on (SPI_MASTER && !I2C) || I2C
> ./drivers/mfd/Kconfig:327:	depends on (SPI_MASTER || I2C)
> ./drivers/iio/dac/Kconfig:10:	depends on (SPI_MASTER && I2C!=m) || I2C
> ./drivers/iio/dac/Kconfig:34:	depends on (SPI_MASTER && I2C!=m) || I2C
> ./drivers/iio/dac/Kconfig:57:	depends on (SPI_MASTER && I2C!=m) || I2C
> ./drivers/gpio/Kconfig:1231:	depends on (SPI_MASTER && !I2C) || I2C
> $ find . -name Kconfig | xargs grep -n 'I2C.*||.*SPI_MASTER' 
> ./drivers/mfd/Kconfig:1094:	depends on (I2C=y || SPI_MASTER=y)
> ./drivers/iio/gyro/Kconfig:55:	depends on (I2C || SPI_MASTER)
> ./drivers/iio/gyro/Kconfig:107:	depends on (I2C || SPI_MASTER) && SYSFS
> ./drivers/iio/accel/Kconfig:153:	depends on (I2C || SPI_MASTER) && SYSFS
> ./drivers/iio/pressure/Kconfig:20:	depends on (I2C || SPI_MASTER)
> ./drivers/iio/pressure/Kconfig:161:	depends on (I2C || SPI_MASTER) && SYSFS
> ./drivers/iio/magnetometer/Kconfig:118:	depends on (I2C || SPI_MASTER) && SYSFS
>
> drivers/mfd/mc13xxx-*.c seems to have the same strings.  The iio/dac drivers
> don't support DT but do share names.  The MCP GPIO chip you mention indeed has
> different product names based on the bus it's made for (0 vs s in the middle 
> of the name), so I gather less relevant case?  drivers/iio/pressure/bmp280-*.c 
> has the same strings, if I'm looking correctly... I didn't look at the others.

are you okay with the suggestion to use the compatible "qca,qca7000" for
both drivers?

Should we mark "qca,qca7000-spi" as deprecated?

Regards
Stefan
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH v2 00/10] rt2x00: rt2x00: improve calling conventions for register accessors
From: Kalle Valo @ 2017-05-19  7:12 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: Tom Psyborg, linux-wireless, Stanislaw Gruszka, David Miller,
	Helmut Schaa, Daniel Golle, Mathias Kresin, Johannes Berg,
	Serge Vasilugin, Roman Yeryomin, Networking,
	Linux Kernel Mailing List, Jes Sorensen
In-Reply-To: <CAK8P3a3Y7rCvmJo062-5G07HGiT6x2MtvVV=dQAVBK0AiX0U_A@mail.gmail.com>

Arnd Bergmann <arnd@arndb.de> writes:

> On Fri, May 19, 2017 at 8:44 AM, Tom Psyborg <pozega.tomislav@gmail.com> wrote:
>> warning: 'rt2800_bbp_read' used but never defined
>>  static u8 rt2800_bbp_read(struct rt2x00_dev *rt2x00dev,
>>            ^
>> /home/ubuntu/Music/openwrt/build_dir/target-mipsel_24kc_musl-1.1.16/linux-ramips_mt7620/compat-wireless-2016-05-12/drivers/net/wireless/ralink/rt2x00/rt2800lib.h:262:13:
>> warning: 'rt2800_bbp_write' used but never defined
>>  static void rt2800_bbp_write(struct rt2x00_dev *rt2x00dev,
>>              ^
>>   CC [M]
>> /home/ubuntu/Music/openwrt/build_dir/target-mipsel_24kc_musl-1.1.16/linux-ramips_mt7620/compat-wireless-2016-05-12/drivers/net/wireless/ralink/rt2x00/rt2800pci.o
>> In file included from
>> /home/ubuntu/Music/openwrt/build_dir/target-mipsel_24kc_musl-1.1.16/linux-ramips_mt7620/compat-wireless-2016-05-12/drivers/net/wireless/ralink/rt2x00/rt2800pci.c:43:0:
>> /home/ubuntu/Music/openwrt/build_dir/target-mipsel_24kc_musl-1.1.16/linux-ramips_mt7620/compat-wireless-2016-05-12/drivers/net/wireless/ralink/rt2x00/rt2800lib.h:259:11:
>> warning: 'rt2800_bbp_read' declared 'static' but never defined
>> [-Wunused-function]
>>  static u8 rt2800_bbp_read(struct rt2x00_dev *rt2x00dev,
>>            ^
>
> On which base version did you apply my patches? There may be a conflict
> against patches that are in your tree but not yet in linux-next, as I don't see
> the warning and also see no reference to rt2800_bbp_read in rt2800lib.h

I did a test build with current wireless-drivers-next and I also don't
see any warnings.

-- 
Kalle Valo

^ permalink raw reply

* Re: [PATCH net 1/3] vlan: Fix tcp checksums offloads for Q-in-Q vlan.
From: Vlad Yasevich @ 2017-05-19  7:09 UTC (permalink / raw)
  To: Toshiaki Makita, Vladislav Yasevich, netdev; +Cc: mkubecek
In-Reply-To: <7593b180-9355-2c14-6cd8-b1e4f47a0ae6@lab.ntt.co.jp>

On 05/18/2017 10:13 PM, Toshiaki Makita wrote:
> On 2017/05/18 22:31, Vladislav Yasevich wrote:
>> It appears that since commit 8cb65d000, Q-in-Q vlans have been
>> broken.  The series that commit is part of enabled TSO and checksum
>> offloading on Q-in-Q vlans.  However, most HW we support can't handle
>> it.  To work around the issue, the above commit added a function that
>> turns off offloads on Q-in-Q devices, but it left the checksum offload.
>> That will cause issues with most older devices that supprort very basic
>> checksum offload capabilities as well as some newer devices (we've
>> reproduced te problem with both be2net and bnx).
>>
>> To solve this for everyone, turn off checksum offloading feature
>> by default when sending Q-in-Q traffic.  Devices that are proven to
>> work can provided a corrected ndo_features_check implemetation.
>>
>> Fixes: 8cb65d000 ("net: Move check for multiple vlans to drivers")
>> CC: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
>> Signed-off-by: Vladislav Yasevich <vyasevic@redhat.com>
>> ---
>>  include/linux/if_vlan.h | 1 -
>>  1 file changed, 1 deletion(-)
>>
>> diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
>> index 8d5fcd6..ae537f0 100644
>> --- a/include/linux/if_vlan.h
>> +++ b/include/linux/if_vlan.h
>> @@ -619,7 +619,6 @@ static inline netdev_features_t vlan_features_check(const struct sk_buff *skb,
>>  						     NETIF_F_SG |
>>  						     NETIF_F_HIGHDMA |
>>  						     NETIF_F_FRAGLIST |
>> -						     NETIF_F_HW_CSUM |
>>  						     NETIF_F_HW_VLAN_CTAG_TX |
>>  						     NETIF_F_HW_VLAN_STAG_TX);
>>  
> 
> I guess HW_CSUM theoretically can handle Q-in-Q packets and the problem
> is IP_CSUM and IPV6_CSUM.
> So wouldn't it be better to leave HW_CSUM and drop IP_CSUM/IPV6_CSUM,
> i.e. change intersection into bitwise AND?
> 

It wasn't really a problem before accelerations got enabled on q-in-q
vlans.

> The intersection was introduced in db115037bb57 ("net: fix checksum
> features handling in netif_skb_features()"), but I guess for this
> particular check the intersection was not needed.
> 

So, to put it another way, leave the intersection with HW_CSUM in the mask,
and then do:

  return features & ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);

This might work, but it assumes that everyone who announce HW_CSUM can
do q-in-q vlans.  It's been a bit of a pain tracking this down and I'd rather
fix it for everyone and let individual driver authors verify that Q-in-Q works
correctly with HW checksum.  However, I am willing to do the above if
that's what people want.

-vlad

^ permalink raw reply

* [PATCH 2/4] net-next: stmmac: Remove unnecessary parenthesis
From: Corentin Labbe @ 2017-05-19  7:03 UTC (permalink / raw)
  To: peppe.cavallaro, alexandre.torgue; +Cc: netdev, linux-kernel, Corentin Labbe
In-Reply-To: <20170519070335.1604-1-clabbe.montjoie@gmail.com>

Signed-off-by: Corentin Labbe <clabbe.montjoie@gmail.com>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 2b778f63d1d5..e008cded388e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -799,7 +799,7 @@ static void stmmac_adjust_link(struct net_device *dev)
 		 * If not, we operate in half-duplex mode. */
 		if (phydev->duplex != priv->oldduplex) {
 			new_state = 1;
-			if (!(phydev->duplex))
+			if (!phydev->duplex)
 				ctrl &= ~priv->hw->link.duplex;
 			else
 				ctrl |= priv->hw->link.duplex;
-- 
2.13.0

^ permalink raw reply related

* [PATCH 3/4] net-next: stmmac: use SPEED_xxx instead of raw value
From: Corentin Labbe @ 2017-05-19  7:03 UTC (permalink / raw)
  To: peppe.cavallaro, alexandre.torgue; +Cc: netdev, linux-kernel, Corentin Labbe
In-Reply-To: <20170519070335.1604-1-clabbe.montjoie@gmail.com>

Signed-off-by: Corentin Labbe <clabbe.montjoie@gmail.com>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index e008cded388e..a1ab52e29359 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -812,12 +812,12 @@ static void stmmac_adjust_link(struct net_device *dev)
 		if (phydev->speed != priv->speed) {
 			new_state = 1;
 			switch (phydev->speed) {
-			case 1000:
+			case SPEED_1000:
 				if (priv->plat->has_gmac ||
 				    priv->plat->has_gmac4)
 					ctrl &= ~priv->hw->link.port;
 				break;
-			case 100:
+			case SPEED_100:
 				if (priv->plat->has_gmac ||
 				    priv->plat->has_gmac4) {
 					ctrl |= priv->hw->link.port;
@@ -826,7 +826,7 @@ static void stmmac_adjust_link(struct net_device *dev)
 					ctrl &= ~priv->hw->link.port;
 				}
 				break;
-			case 10:
+			case SPEED_10:
 				if (priv->plat->has_gmac ||
 				    priv->plat->has_gmac4) {
 					ctrl |= priv->hw->link.port;
-- 
2.13.0

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox