Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH 3/4] netfilter: ipv6: add getsockopt to retrieve origdst
From: pablo @ 2012-11-16 12:40 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1353069653-3231-1-git-send-email-pablo@netfilter.org>

From: Florian Westphal <fw@strlen.de>

userspace can query the original ipv4 destination address of a REDIRECTed
connection via
getsockopt(m_sock, SOL_IP, SO_ORIGINAL_DST, &m_server_addr, &addrsize)

but for ipv6 no such option existed.

This adds getsockopt(..., IPPROTO_IPV6, IP6T_SO_ORIGINAL_DST, ...).

Without this, userspace needs to parse /proc or use ctnetlink, which
appears to be overkill.

This uses option number 80 for IP6T_SO_ORIGINAL_DST, which is spare,
to use the same number we use in the IPv4 socket option SO_ORIGINAL_DST.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/in6.h                       |    1 +
 include/uapi/linux/netfilter_ipv6/ip6_tables.h |    3 ++
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c |   61 ++++++++++++++++++++++++
 3 files changed, 65 insertions(+)

diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h
index 1e31599..f79c372 100644
--- a/include/uapi/linux/in6.h
+++ b/include/uapi/linux/in6.h
@@ -240,6 +240,7 @@ struct in6_flowlabel_req {
  *
  * IP6T_SO_GET_REVISION_MATCH	68
  * IP6T_SO_GET_REVISION_TARGET	69
+ * IP6T_SO_ORIGINAL_DST		80
  */
 
 /* RFC5014: Source address selection */
diff --git a/include/uapi/linux/netfilter_ipv6/ip6_tables.h b/include/uapi/linux/netfilter_ipv6/ip6_tables.h
index bf1ef65..649c680 100644
--- a/include/uapi/linux/netfilter_ipv6/ip6_tables.h
+++ b/include/uapi/linux/netfilter_ipv6/ip6_tables.h
@@ -178,6 +178,9 @@ struct ip6t_error {
 #define IP6T_SO_GET_REVISION_TARGET	(IP6T_BASE_CTL + 5)
 #define IP6T_SO_GET_MAX			IP6T_SO_GET_REVISION_TARGET
 
+/* obtain original address if REDIRECT'd connection */
+#define IP6T_SO_ORIGINAL_DST            80
+
 /* ICMP matching stuff */
 struct ip6t_icmp {
 	__u8 type;				/* type to match */
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 8860d23..02dcafd 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -21,6 +21,7 @@
 
 #include <linux/netfilter_bridge.h>
 #include <linux/netfilter_ipv6.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
@@ -295,6 +296,50 @@ static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
 	},
 };
 
+static int
+ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len)
+{
+	const struct inet_sock *inet = inet_sk(sk);
+	const struct ipv6_pinfo *inet6 = inet6_sk(sk);
+	const struct nf_conntrack_tuple_hash *h;
+	struct sockaddr_in6 sin6;
+	struct nf_conntrack_tuple tuple = { .src.l3num = NFPROTO_IPV6 };
+	struct nf_conn *ct;
+
+	tuple.src.u3.in6 = inet6->rcv_saddr;
+	tuple.src.u.tcp.port = inet->inet_sport;
+	tuple.dst.u3.in6 = inet6->daddr;
+	tuple.dst.u.tcp.port = inet->inet_dport;
+	tuple.dst.protonum = sk->sk_protocol;
+
+	if (sk->sk_protocol != IPPROTO_TCP && sk->sk_protocol != IPPROTO_SCTP)
+		return -ENOPROTOOPT;
+
+	if (*len < 0 || (unsigned int) *len < sizeof(sin6))
+		return -EINVAL;
+
+	h = nf_conntrack_find_get(sock_net(sk), NF_CT_DEFAULT_ZONE, &tuple);
+	if (!h) {
+		pr_debug("IP6T_SO_ORIGINAL_DST: Can't find %pI6c/%u-%pI6c/%u.\n",
+			 &tuple.src.u3.ip6, ntohs(tuple.src.u.tcp.port),
+			 &tuple.dst.u3.ip6, ntohs(tuple.dst.u.tcp.port));
+		return -ENOENT;
+	}
+
+	ct = nf_ct_tuplehash_to_ctrack(h);
+
+	sin6.sin6_family = AF_INET6;
+	sin6.sin6_port = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.tcp.port;
+	sin6.sin6_flowinfo = inet6->flow_label & IPV6_FLOWINFO_MASK;
+	memcpy(&sin6.sin6_addr,
+		&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in6,
+					sizeof(sin6.sin6_addr));
+	sin6.sin6_scope_id = sk->sk_bound_dev_if;
+
+	nf_ct_put(ct);
+	return copy_to_user(user, &sin6, sizeof(sin6)) ? -EFAULT : 0;
+}
+
 #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
 
 #include <linux/netfilter/nfnetlink.h>
@@ -359,6 +404,14 @@ MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET6));
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Yasuyuki KOZAKAI @USAGI <yasuyuki.kozakai@toshiba.co.jp>");
 
+static struct nf_sockopt_ops so_getorigdst6 = {
+	.pf		= NFPROTO_IPV6,
+	.get_optmin	= IP6T_SO_ORIGINAL_DST,
+	.get_optmax	= IP6T_SO_ORIGINAL_DST + 1,
+	.get		= ipv6_getorigdst,
+	.owner		= THIS_MODULE,
+};
+
 static int ipv6_net_init(struct net *net)
 {
 	int ret = 0;
@@ -425,6 +478,12 @@ static int __init nf_conntrack_l3proto_ipv6_init(void)
 	need_conntrack();
 	nf_defrag_ipv6_enable();
 
+	ret = nf_register_sockopt(&so_getorigdst6);
+	if (ret < 0) {
+		pr_err("Unable to register netfilter socket option\n");
+		return ret;
+	}
+
 	ret = register_pernet_subsys(&ipv6_net_ops);
 	if (ret < 0)
 		goto cleanup_pernet;
@@ -440,6 +499,7 @@ static int __init nf_conntrack_l3proto_ipv6_init(void)
  cleanup_ipv6:
 	unregister_pernet_subsys(&ipv6_net_ops);
  cleanup_pernet:
+	nf_unregister_sockopt(&so_getorigdst6);
 	return ret;
 }
 
@@ -448,6 +508,7 @@ static void __exit nf_conntrack_l3proto_ipv6_fini(void)
 	synchronize_net();
 	nf_unregister_hooks(ipv6_conntrack_ops, ARRAY_SIZE(ipv6_conntrack_ops));
 	unregister_pernet_subsys(&ipv6_net_ops);
+	nf_unregister_sockopt(&so_getorigdst6);
 }
 
 module_init(nf_conntrack_l3proto_ipv6_init);
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH 2/4] netfilter: nf_nat: use PTR_RET
From: pablo @ 2012-11-16 12:40 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1353069653-3231-1-git-send-email-pablo@netfilter.org>

From: Wu Fengguang <fengguang.wu@intel.com>

Use PTR_RET rather than if(IS_ERR(...)) + PTR_ERR

Generated by: coccinelle/api/ptr_ret.cocci

Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter/iptable_nat.c  |    4 +---
 net/ipv6/netfilter/ip6table_nat.c |    4 +---
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index 9e0ffaf..8d65b74 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -274,9 +274,7 @@ static int __net_init iptable_nat_net_init(struct net *net)
 		return -ENOMEM;
 	net->ipv4.nat_table = ipt_register_table(net, &nf_nat_ipv4_table, repl);
 	kfree(repl);
-	if (IS_ERR(net->ipv4.nat_table))
-		return PTR_ERR(net->ipv4.nat_table);
-	return 0;
+	return PTR_RET(net->ipv4.nat_table);
 }
 
 static void __net_exit iptable_nat_net_exit(struct net *net)
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index e418bd6..4c8219e 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -275,9 +275,7 @@ static int __net_init ip6table_nat_net_init(struct net *net)
 		return -ENOMEM;
 	net->ipv6.ip6table_nat = ip6t_register_table(net, &nf_nat_ipv6_table, repl);
 	kfree(repl);
-	if (IS_ERR(net->ipv6.ip6table_nat))
-		return PTR_ERR(net->ipv6.ip6table_nat);
-	return 0;
+	return PTR_RET(net->ipv6.ip6table_nat);
 }
 
 static void __net_exit ip6table_nat_net_exit(struct net *net)
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH 1/4] ipvs: remove silly double assignment
From: pablo @ 2012-11-16 12:40 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1353069653-3231-1-git-send-email-pablo@netfilter.org>

From: Alan Cox <alan@linux.intel.com>

I don't even want to think what the C spec says for this 8)

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/ipvs/ip_vs_nfct.c |    2 +-
 net/netfilter/ipvs/ip_vs_xmit.c |    8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c
index 022e77e..c8beafd 100644
--- a/net/netfilter/ipvs/ip_vs_nfct.c
+++ b/net/netfilter/ipvs/ip_vs_nfct.c
@@ -82,7 +82,7 @@ void
 ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, int outin)
 {
 	enum ip_conntrack_info ctinfo;
-	struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
+	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
 	struct nf_conntrack_tuple new_tuple;
 
 	if (ct == NULL || nf_ct_is_confirmed(ct) || nf_ct_is_untracked(ct) ||
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 12008b4..ee6b7a9 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -594,7 +594,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
 	if (cp->flags & IP_VS_CONN_F_SYNC && local) {
 		enum ip_conntrack_info ctinfo;
-		struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
+		struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
 
 		if (ct && !nf_ct_is_untracked(ct)) {
 			IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, 0,
@@ -710,7 +710,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
 	if (cp->flags & IP_VS_CONN_F_SYNC && local) {
 		enum ip_conntrack_info ctinfo;
-		struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
+		struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
 
 		if (ct && !nf_ct_is_untracked(ct)) {
 			IP_VS_DBG_RL_PKT(10, AF_INET6, pp, skb, 0,
@@ -1235,7 +1235,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
 	if (cp->flags & IP_VS_CONN_F_SYNC && local) {
 		enum ip_conntrack_info ctinfo;
-		struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
+		struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
 
 		if (ct && !nf_ct_is_untracked(ct)) {
 			IP_VS_DBG(10, "%s(): "
@@ -1356,7 +1356,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
 	if (cp->flags & IP_VS_CONN_F_SYNC && local) {
 		enum ip_conntrack_info ctinfo;
-		struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
+		struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
 
 		if (ct && !nf_ct_is_untracked(ct)) {
 			IP_VS_DBG(10, "%s(): "
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH 4/4] netfilter: ipv6: only provide sk_bound_dev_if for link-local addr
From: pablo @ 2012-11-16 12:40 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1353069653-3231-1-git-send-email-pablo@netfilter.org>

From: Florian Westphal <fw@strlen.de>

yoshfuji points out that sk_bound_dev_if should only be provided
for link-local addresses.

IPv6 getpeer/sockname also has this test, i.e. we will now
only set sin6_scope_id if the original(!) destination
was a link-local address.

Reported-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c |    7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 02dcafd..e5f6cf7 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -334,9 +334,14 @@ ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len)
 	memcpy(&sin6.sin6_addr,
 		&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in6,
 					sizeof(sin6.sin6_addr));
-	sin6.sin6_scope_id = sk->sk_bound_dev_if;
 
 	nf_ct_put(ct);
+
+	if (ipv6_addr_type(&sin6.sin6_addr) & IPV6_ADDR_LINKLOCAL)
+		sin6.sin6_scope_id = sk->sk_bound_dev_if;
+	else
+		sin6.sin6_scope_id = 0;
+
 	return copy_to_user(user, &sin6, sizeof(sin6)) ? -EFAULT : 0;
 }
 
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH 0/4] netfilter updates for nf-next (try 2)
From: pablo @ 2012-11-16 12:40 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev

From: Pablo Neira Ayuso <pablo@netfilter.org>

Hi David,

This is the second try to include the following four patches that contain
updates for your net-next tree, they are:

* Little cleanup for IPVS the use of a strange notation to assign the
  conntrack object, from Alan Cox.

* Another little cleanup for nf_nat to save a couple of lines by using
  PTR_RET, from Wu Fengguan

* getsockopt support to obtain the original IPv6 address after NAT,
  similar to the one that IPv4 provides, from Florian Westphal.

* Follow-up patch pointed out by YOSHIFUJI Hideaki to only provide
  the scope_id in case that link is local, again from Florian Westphal.

You can pull these changes from:

git://1984.lsi.us.es/nf-next master

Thanks!

Alan Cox (1):
  ipvs: remove silly double assignment

Florian Westphal (2):
  netfilter: ipv6: add getsockopt to retrieve origdst
  netfilter: ipv6: only provide sk_bound_dev_if for link-local addr

Wu Fengguang (1):
  netfilter: nf_nat: use PTR_RET

 include/uapi/linux/in6.h                       |    1 +
 include/uapi/linux/netfilter_ipv6/ip6_tables.h |    3 ++
 net/ipv4/netfilter/iptable_nat.c               |    4 +-
 net/ipv6/netfilter/ip6table_nat.c              |    4 +-
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c |   66 ++++++++++++++++++++++++
 net/netfilter/ipvs/ip_vs_nfct.c                |    2 +-
 net/netfilter/ipvs/ip_vs_xmit.c                |    8 +--
 7 files changed, 77 insertions(+), 11 deletions(-)

-- 
1.7.10.4

^ permalink raw reply

* Re: [PATCH 0/4] Implement persistent grant in xen-netfront/netback
From: Ian Campbell @ 2012-11-16 11:46 UTC (permalink / raw)
  To: ANNIE LI
  Cc: xen-devel@lists.xensource.com, netdev@vger.kernel.org,
	konrad.wilk@oracle.com
In-Reply-To: <50A6255C.10108@oracle.com>

On Fri, 2012-11-16 at 11:37 +0000, ANNIE LI wrote:
> 
> On 2012-11-16 17:57, Ian Campbell wrote:
> > On Thu, 2012-11-15 at 07:03 +0000, Annie Li wrote:
> >> This patch implements persistent grants for xen-netfront/netback.
> > Hang on a sec. It has just occurred to me that netfront/netback in the
> > current mainline kernels don't currently use grant maps at all, they use
> > grant copy on both the tx and rx paths.
> 
> Ah, this patch is based on v3.4-rc3.

Nothing has changed in more recent kernels in this regard.

> >
> > The supposed benefit of persistent grants is to avoid the TLB shootdowns
> > on grant unmap, but in the current code there should be exactly zero of
> > those.
> 
> Is there any performance document about current grant copy code in 
> mainline kernel?

Not AFAIK.

> > If I understand correctly this patch goes from using grant copy
> > operations to persistently mapping frames and then using memcpy on those
> > buffers to copy in/out to local buffers. I'm finding it hard to think of
> > a reason why this should perform any better, do you have a theory which
> > explains it?
> 
> This patch is aiming to fix spin lock issue of grant operations, it 
> comes out to avoid possible grant operations(including grant map and copy).

Makes sense. This is the sort of thing which ought to feature
prominently in commit messages and/or introductory mails.

> > Do you know
> > that they both benefit from this change (rather than for example an
> > improvement in one direction masking a regression in the other).
> 
> On theory, this implementation avoid spinlock issue of grant operation, 
> so they should both benefit from it.

It seems like having netfront simply allocate itself a pool of grant
references which it reuses would give equivalent benefits whilst being a
smaller patch, with no protocol change and avoiding double copying. In
fact by avoiding the double copy I'd expect it to be even better.

> > Were
> > the numbers you previously posted in one particular direction or did you
> > measure both?
> 
> One particular direction, one runs as server, the other runs as client.

I think you need to measure both dom0->domU and domU->dom0 to get the
full picture since AIUI netperf sends the bulk data in only one
direction with just ACKs coming back the other way.

Ian.

^ permalink raw reply

* Re: [PATCH 8/8] drivers/net/wireless/ath/ath6kl/hif.c: drop if around WARN_ON
From: Kalle Valo @ 2012-11-16 11:39 UTC (permalink / raw)
  To: Julia Lawall
  Cc: kernel-janitors-u79uwXL29TY76Z2rM5mHXA, John W. Linville,
	linux-wireless-u79uwXL29TY76Z2rM5mHXA,
	netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <1351974625-10282-9-git-send-email-Julia.Lawall-L2FTfq7BK8M@public.gmane.org>

On 11/03/2012 10:30 PM, Julia Lawall wrote:
> From: Julia Lawall <Julia.Lawall-L2FTfq7BK8M@public.gmane.org>
> 
> Just use WARN_ON rather than an if containing only WARN_ON(1).
> 
> A simplified version of the semantic patch that makes this transformation
> is as follows: (http://coccinelle.lip6.fr/)
> 
> // <smpl>
> @@
> expression e;
> @@
> - if (e) WARN_ON(1);
> + WARN_ON(e);
> // </smpl>
> 
> Signed-off-by: Julia Lawall <Julia.Lawall-L2FTfq7BK8M@public.gmane.org>

Thanks, applied to ath6kl.git.

Kalle
--
To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH 0/4] Implement persistent grant in xen-netfront/netback
From: ANNIE LI @ 2012-11-16 11:37 UTC (permalink / raw)
  To: Ian Campbell
  Cc: xen-devel@lists.xensource.com, netdev@vger.kernel.org,
	konrad.wilk@oracle.com
In-Reply-To: <1353059821.3499.190.camel@zakaz.uk.xensource.com>



On 2012-11-16 17:57, Ian Campbell wrote:
> On Thu, 2012-11-15 at 07:03 +0000, Annie Li wrote:
>> This patch implements persistent grants for xen-netfront/netback.
> Hang on a sec. It has just occurred to me that netfront/netback in the
> current mainline kernels don't currently use grant maps at all, they use
> grant copy on both the tx and rx paths.

Ah, this patch is based on v3.4-rc3.
Current mainline kernel does not pass the netperf/netserver case. As I 
mentioned earlier, I hit BUG_ON with your debug patch too when testing 
mainline kernel with netperf/netserver.
This is interesting, I should have check the latest code.

>
> The supposed benefit of persistent grants is to avoid the TLB shootdowns
> on grant unmap, but in the current code there should be exactly zero of
> those.

Is there any performance document about current grant copy code in 
mainline kernel?

>
> If I understand correctly this patch goes from using grant copy
> operations to persistently mapping frames and then using memcpy on those
> buffers to copy in/out to local buffers. I'm finding it hard to think of
> a reason why this should perform any better, do you have a theory which
> explains it?

This patch is aiming to fix spin lock issue of grant operations, it 
comes out to avoid possible grant operations(including grant map and copy).

> (my best theory is that it has a beneficial impact on where
> the cache locality of the data, but netperf doesn't typically actually
> access the data so I'm not sure why that would matter)
>
> Also AIUI this is also doing persistent grants for both Tx and Rx
> directions?

Yes.

>
> For guest Rx does this mean it now copies twice, in dom0 from the DMA
> buffer to the guest provided buffer and then again in the guest from the
> granted buffer to a normal one?

Yes.

>
> For guest Tx how do you handle the lifecycle of the grant mapped pages
> which are being sent up into the dom0 network stack? Or are you also now
> copying twice in this case? (i.e. guest copies into a granted buffer and
> dom0 copies out into a local buffer?)

Copy twice: guest copies into a granted buffer and dom0 copies out into 
a local buffer.

>
> Did you do measurement of the Tx and Rx cases independently?

No.

> Do you know
> that they both benefit from this change (rather than for example an
> improvement in one direction masking a regression in the other).

On theory, this implementation avoid spinlock issue of grant operation, 
so they should both benefit from it.

> Were
> the numbers you previously posted in one particular direction or did you
> measure both?

One particular direction, one runs as server, the other runs as client.

Thanks
Annie
>
> Ian.
>

^ permalink raw reply

* Re: [Xen-devel] [PATCH 1/4] xen/netback: implements persistent grant with one page pool.
From: ANNIE LI @ 2012-11-16 11:34 UTC (permalink / raw)
  To: Ian Campbell
  Cc: Roger Pau Monne, xen-devel@lists.xensource.com,
	netdev@vger.kernel.org, konrad.wilk@oracle.com
In-Reply-To: <1353058363.3499.171.camel@zakaz.uk.xensource.com>



On 2012-11-16 17:32, Ian Campbell wrote:
> On Fri, 2012-11-16 at 02:49 +0000, ANNIE LI wrote:
>>> Take a look at the following functions from blkback; foreach_grant,
>>> add_persistent_gnt and get_persistent_gnt. They are generic functions to
>>> deal with persistent grants.
>> Ok, thanks.
>> Or moving those functions into a separate common file?
> Please put them somewhere common.

Ok.

>>> This is highly inefficient, one of the points of using gnttab_set_map_op
>>> is that you can queue a bunch of grants, and then map them at the same
>>> time using gnttab_map_refs, but here you are using it to map a single
>>> grant at a time. You should instead see how much grants you need to map
>>> to complete the request and map them all at the same time.
>> Yes, it is inefficient here. But this is limited by current netback
>> implementation. Current netback is not per-VIF based(not like blkback
>> does). After combining persistent grant and non persistent grant
>> together, every vif request in the queue may/may not support persistent
>> grant. I have to judge whether every vif in the queue supports
>> persistent grant or not. If it support, memcpy is used, if not,
>> grantcopy is used.
> You could (and should) still batch all the grant copies into one
> hypercall, e.g. walk the list either doing memcpy or queuing up copyops
> as appropriate, then at the end if the queue is non-zero length issue
> the hypercall.

This still connects with netback per-VIF implementation.

> I'd expect this lack of batching here and in the other case I just
> spotted to have a detrimental affect on guests running with this patch
> but not using persistent grants. Did you benchmark that case?

I did some test before.
But I'd better to create more detailed result under in different case.

>> After making netback per-VIF works, this issue can be fixed.
> You've mentioned improvements which are conditional on this work a few
> times I think, perhaps it makes sense to make that change first?

Yes, I did consider to implement per-VIF first before persistent grant. 
But thinking of it is part of wei's patch and combined with other 
patches, and decided to implement it later. But making the change first 
would make things more clear.

Thanks
Annie
> Ian.
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message tomajordomo@vger.kernel.org
> More majordomo info athttp://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH 4/5] ath6kl/wmi.c: eliminate possible double free
From: Kalle Valo @ 2012-11-16 11:17 UTC (permalink / raw)
  To: Julia Lawall
  Cc: kernel-janitors, John W. Linville, linux-wireless, netdev,
	linux-kernel
In-Reply-To: <1350816727-1381-5-git-send-email-Julia.Lawall@lip6.fr>

Hi Julia,

On 10/21/2012 01:52 PM, Julia Lawall wrote:
> From: Julia Lawall <Julia.Lawall@lip6.fr>
> 
> This makes two changes.  In ath6kl_wmi_cmd_send, a call to dev_kfree_skb on
> the skb argument is added to the initial sanity check to more completely
> establish the invariant that ath6kl_wmi_cmd_send owns its skb argument.
> Then, in ath6kl_wmi_sync_point, on failure of the call to
> ath6kl_wmi_cmd_send, the clearing of the local skb variable is moved up, so
> that the error-handling code at the end of the function does not free it
> again.
> 
> A simplified version of the semantic match that finds this problem is as
> follows: (http://coccinelle.lip6.fr/)
> 
> // <smpl>
> @r@
> identifier f,free,a;
> parameter list[n] ps;
> type T;
> expression e;
> @@
> 
> f(ps,T a,...) {
>   ... when any
>       when != a = e
>   if(...) { ... free(a); ... return ...; }
>   ... when any
> }
> 
> @@
> identifier r.f,r.free;
> expression x,a;
> expression list[r.n] xs;
> @@
> 
> * x = f(xs,a,...);
>   if (...) { ... free(a); ... return ...; }
> // </smpl>
> 
> Signed-off-by: Julia Lawall <Julia.Lawall@lip6.fr>

I think this patch which is commited to ath6kl.git has fixed this.

commit 0616dc1f2bef563d7916c0dcedbb1bff7d9bd80b
Author: Vasanthakumar Thiagarajan <vthiagar@qca.qualcomm.com>
Date:   Tue Aug 14 10:10:33 2012 +0530

    ath6kl: Fix potential skb double free in ath6kl_wmi_sync_point()

    skb given to ath6kl_control_tx() is owned by ath6kl_control_tx().
    Calling function should not free the skb for error cases.
    This is found during code review.

    kvalo: fix a checkpatch warning in ath6kl_wmi_cmd_send()

    Signed-off-by: Vasanthakumar Thiagarajan <vthiagar@qca.qualcomm.com>
    Signed-off-by: Kalle Valo <kvalo@qca.qualcomm.com>

https://github.com/kvalo/ath6kl/commit/0616dc1f2bef563d7916c0dcedbb1bff7d9bd80b

If you have the time, I would appreciate if you could take a look and
confirm.

Kalle

^ permalink raw reply

* [PATCH v2 3.7.0-rc4] of/net/mdio-gpio: Fix pdev->id issue when using devicetrees.
From: Srinivas KANDAGATLA @ 2012-11-16 10:33 UTC (permalink / raw)
  To: netdev-u79uwXL29TY76Z2rM5mHXA, davem-fT/PcQaiUtIeIZ0/mPfg9Q
  Cc: devicetree-discuss-uLR06cmDAlY/bJ5BZ2RsiQ,
	srinivas.kandagatla-qxv4g6HH51o
In-Reply-To: <1352816773-17837-1-git-send-email-srinivas.kandagatla-qxv4g6HH51o@public.gmane.org>

From: Srinivas Kandagatla <srinivas.kandagatla-qxv4g6HH51o@public.gmane.org>

When the mdio-gpio driver is probed via device trees, the platform
device id is set as -1, However the pdev->id is re-used as bus-id for
while creating mdio gpio bus.
So
For device tree case the mdio-gpio bus name appears as "gpio-ffffffff"
where as
for non-device tree case the bus name appears as "gpio-<bus-num>"

Which means the bus_id is fixed in device tree case, so we can't have
two mdio gpio buses via device trees. Assigning a logical bus number
via device tree solves the problem and the bus name is much consistent
with non-device tree bus name.

Without this patch
1. we can't support two mdio-gpio buses via device trees.
2. we should always pass gpio-ffffffff as bus name to phy_connect, very
different to non-device tree bus name.

So, setting up the bus_id via aliases from device tree is the right
solution and other drivers do similar thing.

Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla-qxv4g6HH51o@public.gmane.org>
---
 .../devicetree/bindings/net/mdio-gpio.txt          |    9 ++++++++-
 drivers/net/phy/mdio-gpio.c                        |   11 +++++++----
 2 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/Documentation/devicetree/bindings/net/mdio-gpio.txt b/Documentation/devicetree/bindings/net/mdio-gpio.txt
index bc95495..c79bab0 100644
--- a/Documentation/devicetree/bindings/net/mdio-gpio.txt
+++ b/Documentation/devicetree/bindings/net/mdio-gpio.txt
@@ -8,9 +8,16 @@ gpios property as described in section VIII.1 in the following order:
 
 MDC, MDIO.
 
+Note: Each gpio-mdio bus should have an alias correctly numbered in "aliases"
+node.
+
 Example:
 
-mdio {
+aliases {
+	mdio-gpio0 = <&mdio0>;
+};
+
+mdio0: mdio {
 	compatible = "virtual,mdio-gpio";
 	#address-cells = <1>;
 	#size-cells = <0>;
diff --git a/drivers/net/phy/mdio-gpio.c b/drivers/net/phy/mdio-gpio.c
index 899274f..2ed1140 100644
--- a/drivers/net/phy/mdio-gpio.c
+++ b/drivers/net/phy/mdio-gpio.c
@@ -185,17 +185,20 @@ static int __devinit mdio_gpio_probe(struct platform_device *pdev)
 {
 	struct mdio_gpio_platform_data *pdata;
 	struct mii_bus *new_bus;
-	int ret;
+	int ret, bus_id;
 
-	if (pdev->dev.of_node)
+	if (pdev->dev.of_node) {
 		pdata = mdio_gpio_of_get_data(pdev);
-	else
+		bus_id = of_alias_get_id(pdev->dev.of_node, "mdio-gpio");
+	} else {
 		pdata = pdev->dev.platform_data;
+		bus_id = pdev->id;
+	}
 
 	if (!pdata)
 		return -ENODEV;
 
-	new_bus = mdio_gpio_bus_init(&pdev->dev, pdata, pdev->id);
+	new_bus = mdio_gpio_bus_init(&pdev->dev, pdata, bus_id);
 	if (!new_bus)
 		return -ENODEV;
 
-- 
1.7.0.4

^ permalink raw reply related

* Re: [PATCH 0/4] Implement persistent grant in xen-netfront/netback
From: Ian Campbell @ 2012-11-16  9:57 UTC (permalink / raw)
  To: Annie Li
  Cc: xen-devel@lists.xensource.com, netdev@vger.kernel.org,
	konrad.wilk@oracle.com
In-Reply-To: <1352962987-541-1-git-send-email-annie.li@oracle.com>

On Thu, 2012-11-15 at 07:03 +0000, Annie Li wrote:
> This patch implements persistent grants for xen-netfront/netback.

Hang on a sec. It has just occurred to me that netfront/netback in the
current mainline kernels don't currently use grant maps at all, they use
grant copy on both the tx and rx paths.

The supposed benefit of persistent grants is to avoid the TLB shootdowns
on grant unmap, but in the current code there should be exactly zero of
those.

If I understand correctly this patch goes from using grant copy
operations to persistently mapping frames and then using memcpy on those
buffers to copy in/out to local buffers. I'm finding it hard to think of
a reason why this should perform any better, do you have a theory which
explains it? (my best theory is that it has a beneficial impact on where
the cache locality of the data, but netperf doesn't typically actually
access the data so I'm not sure why that would matter)

Also AIUI this is also doing persistent grants for both Tx and Rx
directions?

For guest Rx does this mean it now copies twice, in dom0 from the DMA
buffer to the guest provided buffer and then again in the guest from the
granted buffer to a normal one?

For guest Tx how do you handle the lifecycle of the grant mapped pages
which are being sent up into the dom0 network stack? Or are you also now
copying twice in this case? (i.e. guest copies into a granted buffer and
dom0 copies out into a local buffer?)

Did you do measurement of the Tx and Rx cases independently? Do you know
that they both benefit from this change (rather than for example an
improvement in one direction masking a regression in the other). Were
the numbers you previously posted in one particular direction or did you
measure both?

Ian.

^ permalink raw reply

* Re: [PATCH 3.7.0-rc4] of/net/mdio-gpio: Fix pdev->id issue when using devicetrees.
From: Srinivas KANDAGATLA @ 2012-11-16  9:54 UTC (permalink / raw)
  To: David Miller, Grant Likely; +Cc: netdev, devicetree-discuss
In-Reply-To: <20121114.185940.648295821386414260.davem@davemloft.net>

On 14/11/12 23:59, David Miller wrote:
> From: Srinivas KANDAGATLA <srinivas.kandagatla@st.com>
> Date: Tue, 13 Nov 2012 14:26:13 +0000
>
>> From: Srinivas Kandagatla <srinivas.kandagatla@st.com>
>>
>> When the mdio-gpio driver is probed via device trees, the platform
>> device id is set as -1, However the id is re-used in the code while
>> creating an mdio bus.
>> So, setting up the id via aliases from device tree is a sensible
>> solution to fix this issue.
>>
>> Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@st.com>
> This seems rather pointless unless you also update every single device
> tree out there.
>
> Also you need to describe what are the ramifications of this problem
> otherwise it is impossible to figure out how serious this change is.
>
> Does it prevent probing?  Does it cause a crash?
I apologies, I should have explained the full use-case.

use-case is if the mac driver want to connect via phy_connect() to
mdio-gpio phy it would use bus name to do so.

mdio-gpio phy bus name is formated as "gpio-<bus-number>:<phy-addr>.
In the existing code the bus number for mdio-gpio if probed from device
trees will be set to -1 which results in bus name set to
"gpio-ffffffff:<phy-addr>" which is the problem here.
fffffff is result of pdev->id set to -1 which should be set to a logical
number, and this is only possible via aliases.

Having fffffff as bus-id also means that we can't have two mdio-gpio
buses via device trees as it will result in same bus-id.
This patch attempts to fix this issue.
So getting the id from alias would be a right choice.

I also agree with Grant's comments about setting up pdev->id.

Will send v2 patch with considering Grant's comments.

>
> Basically, what I'm saying is that this is a very poor submission and
> you need to substantially improve it and communicate better.
>
> If the problem is basically benign, then you should target this change
> to net-next instead of the net tree, along with the necessary dt file
> updates.
I have looked in net-next and there are no dt files which use this driver.

Thanks,
srini
> Thanks.
>
>

^ permalink raw reply

* Re: [Xen-devel] [PATCH 1/4] xen/netback: implements persistent grant with one page pool.
From: ANNIE LI @ 2012-11-16  9:55 UTC (permalink / raw)
  To: Ian Campbell
  Cc: xen-devel@lists.xensource.com, netdev@vger.kernel.org,
	konrad.wilk@oracle.com
In-Reply-To: <1353058074.3499.166.camel@zakaz.uk.xensource.com>



On 2012-11-16 17:27, Ian Campbell wrote:
> On Fri, 2012-11-16 at 02:18 +0000, ANNIE LI wrote:
>> In this patch,
>> The maximum of memory overhead is about
>>
>> (XEN_NETIF_TX_RING_SIZE+XEN_NETIF_RX_RING_SIZE)*PAGE_SIZE  (plus size of grant_ref_t and handle)
>> which is about 512 PAGE_SIZE. Normally, without heavy network offload, this maximum can not be reached.
>>
>> In next patch of splitting tx/rx pool, the maximum is about
> "about" or just "is"?

For only grant pages, it is this value. I took into account other 
element of grant_ref_t and map(change to handle in future)....

>
>>   (256+512)PAGE_SIZE.
> IOW 3MB.
>
>>>> +
>>>> +       return NULL;
>>>> +}
>>>> +
>>>> @@ -1338,7 +1497,11 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk)
>>>>                   gop->source.domid = vif->domid;
>>>>                   gop->source.offset = txreq.offset;
>>>>
>>>> -               gop->dest.u.gmfn = virt_to_mfn(page_address(page));
>>>> +               if (!vif->persistent_grant)
>>>> +                       gop->dest.u.gmfn = virt_to_mfn(page_address(page));
>>>> +               else
>>>> +                       gop->dest.u.gmfn = (unsigned long)page_address(page);
>>> page_address doesn't return any sort of frame number, does it? This is
>>> rather confusing...
>> Yes. I only use dest.u.gmfn element to save the page_address here for
>> future memcpy, and it does not mean to use frame number actually. To
>> avoid confusion, here I can use
>>
>> gop->dest.u.gmfn = virt_to_mfn(page_address(page));
>>
>> and then call mfn_to_virt when doing memcpy.
> It seems a bit odd to be using the gop structure in this way when you
> aren't actually doing a grant op on it.
>
> While investigating I noticed:
> +static int
> +grant_memory_copy_op(unsigned int cmd, void *vuop, unsigned int count,
> +                    struct xen_netbk *netbk, bool tx_pool)
> ...
> +       struct gnttab_copy *uop = vuop;
>
> Why void *vuop? Why not struct gnttab_copy * in the parameter?

Sorry, my mistake.

>
> I also noticed your new grant_memory_copy_op() seems to have unbatched
> the grant ops in the non-persistent case, which is going to suck for
> performance in non-persistent mode. You need to pull the conditional and
> the HYPERVISOR_grant_table_op outside the loop and pass it full array
> instead of doing them one at a time.

This still connects with netback per-VIF implementation.
Currently, these could not be pulled out outside since netback queue may 
contains persistent and nonpersistent in the same queue. I did consider 
to implement per-VIF first and then the persistent grant,
but thinking of it is part of wei's patch combined with other patches, 
and finally decided to implement per-VIF later.

But this does limit implementation of persistent grant.

Thanks
Annie
>
> Ian
>

^ permalink raw reply

* Re: [Xen-devel] [PATCH 1/4] xen/netback: implements persistent grant with one page pool.
From: Ian Campbell @ 2012-11-16  9:32 UTC (permalink / raw)
  To: ANNIE LI
  Cc: Roger Pau Monne, xen-devel@lists.xensource.com,
	netdev@vger.kernel.org, konrad.wilk@oracle.com
In-Reply-To: <50A5A9CF.8030008@oracle.com>

On Fri, 2012-11-16 at 02:49 +0000, ANNIE LI wrote:
> >
> > Take a look at the following functions from blkback; foreach_grant,
> > add_persistent_gnt and get_persistent_gnt. They are generic functions to
> > deal with persistent grants.
> 
> Ok, thanks.
> Or moving those functions into a separate common file?

Please put them somewhere common.

> > This is highly inefficient, one of the points of using gnttab_set_map_op
> > is that you can queue a bunch of grants, and then map them at the same
> > time using gnttab_map_refs, but here you are using it to map a single
> > grant at a time. You should instead see how much grants you need to map
> > to complete the request and map them all at the same time.
> 
> Yes, it is inefficient here. But this is limited by current netback
> implementation. Current netback is not per-VIF based(not like blkback
> does). After combining persistent grant and non persistent grant
> together, every vif request in the queue may/may not support persistent
> grant. I have to judge whether every vif in the queue supports
> persistent grant or not. If it support, memcpy is used, if not,
> grantcopy is used.

You could (and should) still batch all the grant copies into one
hypercall, e.g. walk the list either doing memcpy or queuing up copyops
as appropriate, then at the end if the queue is non-zero length issue
the hypercall.

I'd expect this lack of batching here and in the other case I just
spotted to have a detrimental affect on guests running with this patch
but not using persistent grants. Did you benchmark that case?

> After making netback per-VIF works, this issue can be fixed.

You've mentioned improvements which are conditional on this work a few
times I think, perhaps it makes sense to make that change first?

Ian.

^ permalink raw reply

* Re: [Xen-devel] [PATCH 1/4] xen/netback: implements persistent grant with one page pool.
From: Ian Campbell @ 2012-11-16  9:27 UTC (permalink / raw)
  To: ANNIE LI
  Cc: xen-devel@lists.xensource.com, netdev@vger.kernel.org,
	konrad.wilk@oracle.com
In-Reply-To: <50A5A285.1030805@oracle.com>

On Fri, 2012-11-16 at 02:18 +0000, ANNIE LI wrote:
> In this patch,
> The maximum of memory overhead is about
> 
> (XEN_NETIF_TX_RING_SIZE+XEN_NETIF_RX_RING_SIZE)*PAGE_SIZE  (plus size of grant_ref_t and handle)
> which is about 512 PAGE_SIZE. Normally, without heavy network offload, this maximum can not be reached.
> 
> In next patch of splitting tx/rx pool, the maximum is about

"about" or just "is"?

>  (256+512)PAGE_SIZE.

IOW 3MB.

> >
> >> +
> >> +       return NULL;
> >> +}
> >> +
> >> @@ -1338,7 +1497,11 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk)
> >>                  gop->source.domid = vif->domid;
> >>                  gop->source.offset = txreq.offset;
> >>
> >> -               gop->dest.u.gmfn = virt_to_mfn(page_address(page));
> >> +               if (!vif->persistent_grant)
> >> +                       gop->dest.u.gmfn = virt_to_mfn(page_address(page));
> >> +               else
> >> +                       gop->dest.u.gmfn = (unsigned long)page_address(page);
> > page_address doesn't return any sort of frame number, does it? This is
> > rather confusing...
> 
> Yes. I only use dest.u.gmfn element to save the page_address here for 
> future memcpy, and it does not mean to use frame number actually. To 
> avoid confusion, here I can use
> 
> gop->dest.u.gmfn = virt_to_mfn(page_address(page));
> 
> and then call mfn_to_virt when doing memcpy.

It seems a bit odd to be using the gop structure in this way when you
aren't actually doing a grant op on it. 

While investigating I noticed:
+static int
+grant_memory_copy_op(unsigned int cmd, void *vuop, unsigned int count,
+                    struct xen_netbk *netbk, bool tx_pool)
...
+       struct gnttab_copy *uop = vuop;

Why void *vuop? Why not struct gnttab_copy * in the parameter?

I also noticed your new grant_memory_copy_op() seems to have unbatched
the grant ops in the non-persistent case, which is going to suck for
performance in non-persistent mode. You need to pull the conditional and
the HYPERVISOR_grant_table_op outside the loop and pass it full array
instead of doing them one at a time.

Ian

^ permalink raw reply

* Re: [PATCH 08/14] xen: netback: Remove redundant check on unsigned variable
From: Ian Campbell @ 2012-11-16  9:16 UTC (permalink / raw)
  To: Tushar Behera
  Cc: linux-kernel@vger.kernel.org, patches@linaro.org,
	xen-devel@lists.xensource.com, netdev@vger.kernel.org
In-Reply-To: <1353048646-10935-9-git-send-email-tushar.behera@linaro.org>

On Fri, 2012-11-16 at 06:50 +0000, Tushar Behera wrote:
> No need to check whether unsigned variable is less than 0.
> 
> CC: Ian Campbell <ian.campbell@citrix.com>
> CC: xen-devel@lists.xensource.com
> CC: netdev@vger.kernel.org
> Signed-off-by: Tushar Behera <tushar.behera@linaro.org>

Acked-by: Ian Campbell <ian.campbell@citrix.com>

Thanks.

> ---
>  drivers/net/xen-netback/netback.c |    4 ++--
>  1 files changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
> index aab8677..515e10c 100644
> --- a/drivers/net/xen-netback/netback.c
> +++ b/drivers/net/xen-netback/netback.c
> @@ -190,14 +190,14 @@ static int get_page_ext(struct page *pg,
>  
>  	group = ext.e.group - 1;
>  
> -	if (group < 0 || group >= xen_netbk_group_nr)
> +	if (group >= xen_netbk_group_nr)
>  		return 0;
>  
>  	netbk = &xen_netbk[group];
>  
>  	idx = ext.e.idx;
>  
> -	if ((idx < 0) || (idx >= MAX_PENDING_REQS))
> +	if (idx >= MAX_PENDING_REQS)
>  		return 0;
>  
>  	if (netbk->mmap_pages[idx] != pg)

^ permalink raw reply

* [PATCH 4/4] batman-adv: process broadcast packets in BLA earlier
From: Antonio Quartulli @ 2012-11-16  8:49 UTC (permalink / raw)
  To: davem
  Cc: netdev, Simon Wunderlich, Marek Lindner, Sven Eckelmann,
	Antonio Quartulli, Simon Wunderlich
In-Reply-To: <1353055758-2901-1-git-send-email-ordex@autistici.org>

The logic in the BLA mechanism may decide to drop broadcast packets
because the node may still be in the setup phase. For this reason,
further broadcast processing like the early client detection mechanism
must be done only after the BLA check.

This patches moves the invocation to BLA before any other broadcast
processing.

This was introduced 30cfd02b60e1cb16f5effb0a01f826c5bb7e4c59
("batman-adv: detect not yet announced clients")

Reported-by: Glen Page <glen.page@thet.net>
Signed-off-by: Simon Wunderlich <siwu@hrz.tu-chemnitz.de>
Signed-off-by: Antonio Quartulli <ordex@autistici.org>
Signed-off-by: Marek Lindner <lindner_marek@yahoo.de>
---
 net/batman-adv/soft-interface.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index b9a28d2..ce0684a 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -325,6 +325,12 @@ void batadv_interface_rx(struct net_device *soft_iface,
 
 	soft_iface->last_rx = jiffies;
 
+	/* Let the bridge loop avoidance check the packet. If will
+	 * not handle it, we can safely push it up.
+	 */
+	if (batadv_bla_rx(bat_priv, skb, vid, is_bcast))
+		goto out;
+
 	if (orig_node)
 		batadv_tt_add_temporary_global_entry(bat_priv, orig_node,
 						     ethhdr->h_source);
@@ -332,12 +338,6 @@ void batadv_interface_rx(struct net_device *soft_iface,
 	if (batadv_is_ap_isolated(bat_priv, ethhdr->h_source, ethhdr->h_dest))
 		goto dropped;
 
-	/* Let the bridge loop avoidance check the packet. If will
-	 * not handle it, we can safely push it up.
-	 */
-	if (batadv_bla_rx(bat_priv, skb, vid, is_bcast))
-		goto out;
-
 	netif_rx(skb);
 	goto out;
 
-- 
1.8.0

^ permalink raw reply related

* [PATCH 3/4] batman-adv: don't add TEMP clients belonging to other backbone nodes
From: Antonio Quartulli @ 2012-11-16  8:49 UTC (permalink / raw)
  To: davem
  Cc: netdev, Simon Wunderlich, Marek Lindner, Sven Eckelmann,
	Antonio Quartulli
In-Reply-To: <1353055758-2901-1-git-send-email-ordex@autistici.org>

The "early client detection" mechanism must not add clients belonging
to other backbone nodes. Such clients must be reached by directly
using the LAN instead of the mesh.

This was introduced by 30cfd02b60e1cb16f5effb0a01f826c5bb7e4c59
("batman-adv: detect not yet announced clients")

Reported-by: Glen Page <glen.page@thet.net>
Signed-off-by: Antonio Quartulli <ordex@autistici.org>
Signed-off-by: Marek Lindner <lindner_marek@yahoo.de>
---
 net/batman-adv/translation-table.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index fec1a00..baae715 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -2456,6 +2456,13 @@ bool batadv_tt_add_temporary_global_entry(struct batadv_priv *bat_priv,
 {
 	bool ret = false;
 
+	/* if the originator is a backbone node (meaning it belongs to the same
+	 * LAN of this node) the temporary client must not be added because to
+	 * reach such destination the node must use the LAN instead of the mesh
+	 */
+	if (batadv_bla_is_backbone_gw_orig(bat_priv, orig_node->orig))
+		goto out;
+
 	if (!batadv_tt_global_add(bat_priv, orig_node, addr,
 				  BATADV_TT_CLIENT_TEMP,
 				  atomic_read(&orig_node->last_ttvn)))
-- 
1.8.0

^ permalink raw reply related

* [PATCH 2/4] batman-adv: correctly pass the client flag on tt_response
From: Antonio Quartulli @ 2012-11-16  8:49 UTC (permalink / raw)
  To: davem
  Cc: netdev, Simon Wunderlich, Marek Lindner, Sven Eckelmann,
	Antonio Quartulli
In-Reply-To: <1353055758-2901-1-git-send-email-ordex@autistici.org>

When a TT response with the full table is sent, the client flags
should be sent as well. This patch fix the flags assignment when
populating the tt_response to send back

This was introduced by 30cfd02b60e1cb16f5effb0a01f826c5bb7e4c59
("batman-adv: detect not yet announced clients")

Signed-off-by: Antonio Quartulli <ordex@autistici.org>
Signed-off-by: Marek Lindner <lindner_marek@yahoo.de>
---
 net/batman-adv/translation-table.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 64c0012..fec1a00 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -1502,7 +1502,7 @@ batadv_tt_response_fill_table(uint16_t tt_len, uint8_t ttvn,
 
 			memcpy(tt_change->addr, tt_common_entry->addr,
 			       ETH_ALEN);
-			tt_change->flags = BATADV_NO_FLAGS;
+			tt_change->flags = tt_common_entry->flags;
 
 			tt_count++;
 			tt_change++;
-- 
1.8.0

^ permalink raw reply related

* [PATCH 1/4] batman-adv: fix tt_global_entries flags update
From: Antonio Quartulli @ 2012-11-16  8:49 UTC (permalink / raw)
  To: davem
  Cc: netdev, Simon Wunderlich, Marek Lindner, Sven Eckelmann,
	Antonio Quartulli
In-Reply-To: <1353055758-2901-1-git-send-email-ordex@autistici.org>

Flags carried by a change_entry have to be always copied into the
client entry as they may contain important attributes (e.g.
TT_CLIENT_WIFI).

For instance, a client added by means of the "early detection
mechanism" has no flag set at the beginning, so they must be updated once the
proper ADD event is received.

This was introduced by 30cfd02b60e1cb16f5effb0a01f826c5bb7e4c59
("batman-adv: detect not yet announced clients")

Signed-off-by: Antonio Quartulli <ordex@autistici.org>
Signed-off-by: Marek Lindner <lindner_marek@yahoo.de>
---
 net/batman-adv/translation-table.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 112edd3..64c0012 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -769,6 +769,12 @@ int batadv_tt_global_add(struct batadv_priv *bat_priv,
 		 */
 		tt_global_entry->common.flags &= ~BATADV_TT_CLIENT_TEMP;
 
+		/* the change can carry possible "attribute" flags like the
+		 * TT_CLIENT_WIFI, therefore they have to be copied in the
+		 * client entry
+		 */
+		tt_global_entry->common.flags |= flags;
+
 		/* If there is the BATADV_TT_CLIENT_ROAM flag set, there is only
 		 * one originator left in the list and we previously received a
 		 * delete + roaming change for this originator.
-- 
1.8.0

^ permalink raw reply related

* pull request: batman-adv 2012-11-16
From: Antonio Quartulli @ 2012-11-16  8:49 UTC (permalink / raw)
  To: davem
  Cc: netdev, Simon Wunderlich, Marek Lindner, Sven Eckelmann,
	Antonio Quartulli

Hello David,

here is small set of fixes intended for net/linux-3.7.
These patches are fixing some interoperability problems due to the features we
added in 3.7. Mainly we have two big issues: one is preventing clients connected
to the mesh network to contact any other hosts, caused by a not proper
translation table handling; the second one compromises the AP isolation feature
causing it to be completely useless, no matter it was on or off.

Please, let me know if there is any problem.

Thank you very much!
		Antonio



The following changes since commit 80d11788fb8f4d9fcfae5ad508c7f1b65e8b28a3:

  Revert "drivers/net/phy/mdio-bitbang.c: Call mdiobus_unregister before mdiobus_free" (2012-11-14 22:32:15 -0500)

are available in the git repository at:

  git://git.open-mesh.org/linux-merge.git tags/batman-adv-fix-for-davem

for you to fetch changes up to 74490f969155caf1ec945ad2d35d3a8eec6be71d:

  batman-adv: process broadcast packets in BLA earlier (2012-11-16 09:36:54 +0100)

----------------------------------------------------------------
Included fixes are:
- update the client entry status flags when using the "early client
  detection". This makes the Distributed AP isolation correctly work;
- transfer the client entry status flags when recovering the translation
  table from another node. This makes the Distributed AP isolation correctly
  work;
- prevent the "early client detection mechanism" to add clients belonging to
  other backbone nodes in the same LAN. This breaks connectivity when using this
  mechanism together with the Bridge Loop Avoidance
- process broadcast packets with the Bridge Loop Avoidance before any other
  component. BLA can possibly drop the packets based on the source address. This
  makes the "early client detection mechanism" correctly work when used with
  BLA.

----------------------------------------------------------------
Antonio Quartulli (4):
      batman-adv: fix tt_global_entries flags update
      batman-adv: correctly pass the client flag on tt_response
      batman-adv: don't add TEMP clients belonging to other backbone nodes
      batman-adv: process broadcast packets in BLA earlier

 net/batman-adv/soft-interface.c    | 12 ++++++------
 net/batman-adv/translation-table.c | 15 ++++++++++++++-
 2 files changed, 20 insertions(+), 7 deletions(-)

^ permalink raw reply

* Re: [PATCH v4 2/9] net: rds: use this_cpu_* per-cpu helper
From: Shan Wei @ 2012-11-16  8:38 UTC (permalink / raw)
  To: David Miller
  Cc: Shan Wei, venkat.x.venkatsubra, rds-devel, NetDev,
	Kernel-Maillist, cl, Tejun Heo
In-Reply-To: <50A1A7C1.50308@gmail.com>

Shan Wei said, at 2012/11/13 9:52:
> From: Shan Wei <davidshan@tencent.com>
> 
> 
> Signed-off-by: Shan Wei <davidshan@tencent.com>
> Reviewed-by: Christoph Lameter <cl@linux.com>

David Miller,  would you like to pick it up to your net-next tree?


> ---
> v4:
> 1. add missing __percpu annotations.
> 2. [read|write]ing fields of struct rds_ib_cache_head
> using __this_cpu_* operation, drop per_cpu_ptr.
> ---
>  net/rds/ib.h      |    2 +-
>  net/rds/ib_recv.c |   24 +++++++++++++-----------
>  2 files changed, 14 insertions(+), 12 deletions(-)
> 
> diff --git a/net/rds/ib.h b/net/rds/ib.h
> index 8d2b3d5..7280ab8 100644
> --- a/net/rds/ib.h
> +++ b/net/rds/ib.h
> @@ -50,7 +50,7 @@ struct rds_ib_cache_head {
>  };
>  
>  struct rds_ib_refill_cache {
> -	struct rds_ib_cache_head *percpu;
> +	struct rds_ib_cache_head __percpu *percpu;
>  	struct list_head	 *xfer;
>  	struct list_head	 *ready;
>  };
> diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
> index 8d19491..8c5bc85 100644
> --- a/net/rds/ib_recv.c
> +++ b/net/rds/ib_recv.c
> @@ -418,20 +418,21 @@ static void rds_ib_recv_cache_put(struct list_head *new_item,
>  				 struct rds_ib_refill_cache *cache)
>  {
>  	unsigned long flags;
> -	struct rds_ib_cache_head *chp;
>  	struct list_head *old;
> +	struct list_head __percpu *chpfirst;
>  
>  	local_irq_save(flags);
>  
> -	chp = per_cpu_ptr(cache->percpu, smp_processor_id());
> -	if (!chp->first)
> +	chpfirst = __this_cpu_read(cache->percpu->first);
> +	if (!chpfirst)
>  		INIT_LIST_HEAD(new_item);
>  	else /* put on front */
> -		list_add_tail(new_item, chp->first);
> -	chp->first = new_item;
> -	chp->count++;
> +		list_add_tail(new_item, chpfirst);
>  
> -	if (chp->count < RDS_IB_RECYCLE_BATCH_COUNT)
> +	__this_cpu_write(chpfirst, new_item);
> +	__this_cpu_inc(cache->percpu->count);
> +
> +	if (__this_cpu_read(cache->percpu->count) < RDS_IB_RECYCLE_BATCH_COUNT)
>  		goto end;
>  
>  	/*
> @@ -443,12 +444,13 @@ static void rds_ib_recv_cache_put(struct list_head *new_item,
>  	do {
>  		old = xchg(&cache->xfer, NULL);
>  		if (old)
> -			list_splice_entire_tail(old, chp->first);
> -		old = cmpxchg(&cache->xfer, NULL, chp->first);
> +			list_splice_entire_tail(old, chpfirst);
> +		old = cmpxchg(&cache->xfer, NULL, chpfirst);
>  	} while (old);
>  
> -	chp->first = NULL;
> -	chp->count = 0;
> +
> +	__this_cpu_write(chpfirst, NULL);
> +	__this_cpu_write(cache->percpu->count, 0);
>  end:
>  	local_irq_restore(flags);
>  }
> 

^ permalink raw reply

* Re: [PATCH v4 1/9] net: core: use this_cpu_ptr per-cpu helper
From: Shan Wei @ 2012-11-16  8:38 UTC (permalink / raw)
  To: David Miller
  Cc: Shan Wei, timo.teras, steffen.klassert, NetDev, Kernel-Maillist,
	cl, Tejun Heo
In-Reply-To: <50A1A7BA.5000507@gmail.com>

Shan Wei said, at 2012/11/13 9:51:
> From: Shan Wei <davidshan@tencent.com>
> 
> flush_tasklet is a struct, not a pointer in percpu var.
> so use this_cpu_ptr to get the member pointer.
> 
> Signed-off-by: Shan Wei <davidshan@tencent.com>
> Reviewed-by: Christoph Lameter <cl@linux.com>

David Miller,  would you like to pick it up to your net-next tree?

> ---
> no changes vs v3.
> ---
>  net/core/flow.c |    4 +---
>  1 files changed, 1 insertions(+), 3 deletions(-)
> 
> diff --git a/net/core/flow.c b/net/core/flow.c
> index e318c7e..b0901ee 100644
> --- a/net/core/flow.c
> +++ b/net/core/flow.c
> @@ -327,11 +327,9 @@ static void flow_cache_flush_tasklet(unsigned long data)
>  static void flow_cache_flush_per_cpu(void *data)
>  {
>  	struct flow_flush_info *info = data;
> -	int cpu;
>  	struct tasklet_struct *tasklet;
>  
> -	cpu = smp_processor_id();
> -	tasklet = &per_cpu_ptr(info->cache->percpu, cpu)->flush_tasklet;
> +	tasklet = this_cpu_ptr(&info->cache->percpu->flush_tasklet);
>  	tasklet->data = (unsigned long)info;
>  	tasklet_schedule(tasklet);
>  }
> 

^ permalink raw reply

* Re: [PATCH v4 4/9] net: openvswitch: use this_cpu_ptr per-cpu helper
From: Shan Wei @ 2012-11-16  8:35 UTC (permalink / raw)
  To: jesse-l0M0P4e3n4LQT0dZR+AlfA
  Cc: dev-yBygre7rU0TnMu66kgdUjQ, Tejun Heo,
	cl-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b, NetDev, Kernel-Maillist,
	Shan Wei, David Miller
In-Reply-To: <50A1A7D9.2040506-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>

Shan Wei said, at 2012/11/13 9:52:
> From: Shan Wei <davidshan-1Nz4purKYjRBDgjK7y7TUQ@public.gmane.org>
> 
> just use more faster this_cpu_ptr instead of per_cpu_ptr(p, smp_processor_id());
> 
> 
> Signed-off-by: Shan Wei <davidshan-1Nz4purKYjRBDgjK7y7TUQ@public.gmane.org>
> Reviewed-by: Christoph Lameter <cl-vYTEC60ixJUAvxtiuMwx3w@public.gmane.org>

Jesse Gross,  would you like to pick it up to your tree?

> ---
> no changes vs v3,v2.
> ---
>  net/openvswitch/datapath.c |    4 ++--
>  net/openvswitch/vport.c    |    5 ++---
>  2 files changed, 4 insertions(+), 5 deletions(-)
> 
> diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
> index 4c4b62c..77d16a5 100644
> --- a/net/openvswitch/datapath.c
> +++ b/net/openvswitch/datapath.c
> @@ -208,7 +208,7 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)
>  	int error;
>  	int key_len;
>  
> -	stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
> +	stats = this_cpu_ptr(dp->stats_percpu);
>  
>  	/* Extract flow from 'skb' into 'key'. */
>  	error = ovs_flow_extract(skb, p->port_no, &key, &key_len);
> @@ -282,7 +282,7 @@ int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
>  	return 0;
>  
>  err:
> -	stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
> +	stats = this_cpu_ptr(dp->stats_percpu);
>  
>  	u64_stats_update_begin(&stats->sync);
>  	stats->n_lost++;
> diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
> index 03779e8..70af0be 100644
> --- a/net/openvswitch/vport.c
> +++ b/net/openvswitch/vport.c
> @@ -333,8 +333,7 @@ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb)
>  {
>  	struct vport_percpu_stats *stats;
>  
> -	stats = per_cpu_ptr(vport->percpu_stats, smp_processor_id());
> -
> +	stats = this_cpu_ptr(vport->percpu_stats);
>  	u64_stats_update_begin(&stats->sync);
>  	stats->rx_packets++;
>  	stats->rx_bytes += skb->len;
> @@ -359,7 +358,7 @@ int ovs_vport_send(struct vport *vport, struct sk_buff *skb)
>  	if (likely(sent)) {
>  		struct vport_percpu_stats *stats;
>  
> -		stats = per_cpu_ptr(vport->percpu_stats, smp_processor_id());
> +		stats = this_cpu_ptr(vport->percpu_stats);
>  
>  		u64_stats_update_begin(&stats->sync);
>  		stats->tx_packets++;
> 

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox