Netdev List
 help / color / mirror / Atom feed
* [PATCH 2/9] sysctl: use ctl_header_cookie in proc_handler
From: Lucian Adrian Grijincu @ 2011-02-25 18:52 UTC (permalink / raw)
  To: David S. Miller, Alexey Dobriyan, Eric W. Biederman,
	Octavian Purdila, netdev
  Cc: Lucian Adrian Grijincu
In-Reply-To: <1298659961-23863-1-git-send-email-lucian.grijincu@gmail.com>

TODO: if this patch series gets a positive feedback this patch will be
extended with a kernel-wide change of each proc_handler to add a
'cookie' argument.

Signed-off-by: Lucian Adrian Grijincu <lucian.grijincu@gmail.com>
---
 fs/proc/proc_sysctl.c  |   11 ++++++++++-
 include/linux/sysctl.h |    3 +++
 2 files changed, 13 insertions(+), 1 deletions(-)

diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 09a1f92..85b6b75 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -135,6 +135,7 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
 	struct inode *inode = filp->f_path.dentry->d_inode;
 	struct ctl_table_header *head = grab_header(inode);
 	struct ctl_table *table = PROC_I(inode)->sysctl_entry;
+	proc_handler_cookie *phc = (proc_handler_cookie *) table->proc_handler;
 	ssize_t error;
 	size_t res;
 
@@ -156,7 +157,15 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
 
 	/* careful: calling conventions are nasty here */
 	res = count;
-	error = table->proc_handler(table, write, buf, &res, ppos);
+	/*XXX Most handlers only use the first 5 arguments (without
+	 *XXX @cookie). Changing all handlers is too much of work,
+	 *XXX as this is only a RFC patch at the moment.
+	 *XXX
+	 *XXX This is just a HACK for now, I did this this way to not
+	 *XXX waste time changing all the handlers, in the final version
+	 *XXX I'll change all the handlers if there's not other solution.
+	 */
+	error = phc(table, write, buf, &res, ppos, head->ctl_header_cookie);
 	if (!error)
 		error = res;
 out:
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 43fed29..3d21832 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -963,6 +963,9 @@ typedef struct ctl_table ctl_table;
 
 typedef int proc_handler (struct ctl_table *ctl, int write,
 			  void __user *buffer, size_t *lenp, loff_t *ppos);
+typedef int proc_handler_cookie(struct ctl_table *ctl, int write,
+				void __user *buffer, size_t *lenp,
+				loff_t *ppos, void *ctl_header_cookie);
 
 extern int proc_dostring(struct ctl_table *, int,
 			 void __user *, size_t *, loff_t *);
-- 
1.7.4.rc1.7.g2cf08.dirty

^ permalink raw reply related

* [PATCH 3/9] sysctl: add netns_proc_dointvec and similar handlers
From: Lucian Adrian Grijincu @ 2011-02-25 18:52 UTC (permalink / raw)
  To: David S. Miller, Alexey Dobriyan, Eric W. Biederman,
	Octavian Purdila, netdev
  Cc: Lucian Adrian Grijincu
In-Reply-To: <1298659961-23863-1-git-send-email-lucian.grijincu@gmail.com>

Signed-off-by: Lucian Adrian Grijincu <lucian.grijincu@gmail.com>
---
 include/net/net_namespace.h |   26 ++++++++++++++++++++++++++
 net/sysctl_net.c            |   31 +++++++++++++++++++++++++++++++
 2 files changed, 57 insertions(+), 0 deletions(-)

diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 1bf812b..0b7d37d 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -276,4 +276,30 @@ extern struct ctl_table_header *register_net_sysctl_rotable(
 	const struct ctl_path *path, struct ctl_table *table);
 extern void unregister_net_sysctl_table(struct ctl_table_header *header);
 
+/* similar to the versions without 'netns', with these remarks:
+ * - these handlers receive as cookie a 'struct net*'
+ * - the data field of ctl_table* must be of the form
+ *    &init_net.member1.member2..memberN
+ * - these handlers will call their equivalent handler with a
+ *   ctl_table with data of the form: net->member1.member2..memberN
+ */
+extern int netns_proc_dostring(struct ctl_table *,
+		int, void __user *, size_t *, loff_t *, void *net);
+extern int netns_proc_dointvec(struct ctl_table *, int,
+		void __user *, size_t *, loff_t *, void *net);
+extern int netns_proc_dointvec_minmax(struct ctl_table *, int,
+		void __user *, size_t *, loff_t *, void *net);
+extern int netns_proc_dointvec_jiffies(struct ctl_table *, int,
+		void __user *, size_t *, loff_t *, void *net);
+extern int netns_proc_dointvec_userhz_jiffies(struct ctl_table *, int,
+		void __user *, size_t *, loff_t *, void *net);
+extern int netns_proc_dointvec_ms_jiffies(struct ctl_table *, int,
+		void __user *, size_t *, loff_t *, void *net);
+extern int netns_proc_doulongvec_minmax(struct ctl_table *, int,
+		void __user *, size_t *, loff_t *, void *net);
+extern int netns_proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int,
+		void __user *, size_t *, loff_t *, void *net);
+extern int netns_proc_do_large_bitmap(struct ctl_table *, int,
+		void __user *, size_t *, loff_t *, void *net);
+
 #endif /* __NET_NET_NAMESPACE_H */
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index 9dadd17..60b36ad 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -127,3 +127,34 @@ void unregister_net_sysctl_table(struct ctl_table_header *header)
 	unregister_sysctl_table(header);
 }
 EXPORT_SYMBOL_GPL(unregister_net_sysctl_table);
+
+
+
+static int netns_proc_wrapper(struct ctl_table *table, int write,
+			      void __user *buffer, size_t *lenp,
+			      loff_t *ppos, void *net, proc_handler proc_handler)
+{
+	struct ctl_table tmp = *table;
+	tmp.data += (char *)net - (char *)&init_net;
+	return ((proc_handler_cookie*) proc_handler)(&tmp, write, buffer, lenp, ppos, NULL);
+}
+
+
+#define NETNS_PROC_WRAP(handler_name)					\
+	int netns_##handler_name(struct ctl_table *table, int write,	\
+				 void __user *buffer, size_t *lenp,	\
+				 loff_t *ppos, void *net)		\
+	{								\
+		return netns_proc_wrapper(table, write, buffer, lenp,	\
+					  ppos, net, handler_name);	\
+	}								\
+	EXPORT_SYMBOL_GPL(netns_##handler_name);
+
+NETNS_PROC_WRAP(proc_dointvec);
+NETNS_PROC_WRAP(proc_dointvec_minmax);
+NETNS_PROC_WRAP(proc_dointvec_jiffies);
+NETNS_PROC_WRAP(proc_dointvec_userhz_jiffies);
+NETNS_PROC_WRAP(proc_dointvec_ms_jiffies);
+NETNS_PROC_WRAP(proc_doulongvec_minmax)
+NETNS_PROC_WRAP(proc_doulongvec_ms_jiffies_minmax);
+NETNS_PROC_WRAP(proc_do_large_bitmap);
-- 
1.7.4.rc1.7.g2cf08.dirty

^ permalink raw reply related

* [PATCH 4/9] sysctl: ipv4: ipfrag: share ip4_frags_ns_ctl_table between nets
From: Lucian Adrian Grijincu @ 2011-02-25 18:52 UTC (permalink / raw)
  To: David S. Miller, Alexey Dobriyan, Eric W. Biederman,
	Octavian Purdila, netdev
  Cc: Lucian Adrian Grijincu
In-Reply-To: <1298659961-23863-1-git-send-email-lucian.grijincu@gmail.com>

The only reason we were creating a copy of this table was to set
->data to point to data from within the newly created net. The
netns_proc_* handlers do this dynamically.

Signed-off-by: Lucian Adrian Grijincu <lucian.grijincu@gmail.com>
---
 net/ipv4/ip_fragment.c |   34 ++++++----------------------------
 net/sysctl_net.c       |    2 +-
 2 files changed, 7 insertions(+), 29 deletions(-)

diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index a1151b8..ffca3cc 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -677,21 +677,21 @@ static struct ctl_table ip4_frags_ns_ctl_table[] = {
 		.data		= &init_net.ipv4.frags.high_thresh,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= (proc_handler *) netns_proc_dointvec
 	},
 	{
 		.procname	= "ipfrag_low_thresh",
 		.data		= &init_net.ipv4.frags.low_thresh,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= (proc_handler *) netns_proc_dointvec
 	},
 	{
 		.procname	= "ipfrag_time",
 		.data		= &init_net.ipv4.frags.timeout,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
+		.proc_handler	= (proc_handler *) netns_proc_dointvec_jiffies,
 	},
 	{ }
 };
@@ -717,41 +717,19 @@ static struct ctl_table ip4_frags_ctl_table[] = {
 
 static int __net_init ip4_frags_ns_ctl_register(struct net *net)
 {
-	struct ctl_table *table;
 	struct ctl_table_header *hdr;
-
-	table = ip4_frags_ns_ctl_table;
-	if (!net_eq(net, &init_net)) {
-		table = kmemdup(table, sizeof(ip4_frags_ns_ctl_table), GFP_KERNEL);
-		if (table == NULL)
-			goto err_alloc;
-
-		table[0].data = &net->ipv4.frags.high_thresh;
-		table[1].data = &net->ipv4.frags.low_thresh;
-		table[2].data = &net->ipv4.frags.timeout;
-	}
-
-	hdr = register_net_sysctl_table(net, net_ipv4_ctl_path, table);
+	hdr = register_net_sysctl_table(net, net_ipv4_ctl_path,
+					ip4_frags_ns_ctl_table);
 	if (hdr == NULL)
-		goto err_reg;
+		return -ENOMEM;
 
 	net->ipv4.frags_hdr = hdr;
 	return 0;
-
-err_reg:
-	if (!net_eq(net, &init_net))
-		kfree(table);
-err_alloc:
-	return -ENOMEM;
 }
 
 static void __net_exit ip4_frags_ns_ctl_unregister(struct net *net)
 {
-	struct ctl_table *table;
-
-	table = net->ipv4.frags_hdr->ctl_table_arg;
 	unregister_net_sysctl_table(net->ipv4.frags_hdr);
-	kfree(table);
 }
 
 static void ip4_frags_ctl_register(void)
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index 60b36ad..d80e9c4 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -110,7 +110,7 @@ struct ctl_table_header *register_net_sysctl_table(struct net *net,
 	namespaces = *current->nsproxy;
 	namespaces.net_ns = net;
 	return __register_sysctl_paths(&net_sysctl_root, &namespaces, path,
-				       table, NULL);
+				       table, net);
 }
 EXPORT_SYMBOL_GPL(register_net_sysctl_table);
 
-- 
1.7.4.rc1.7.g2cf08.dirty

^ permalink raw reply related

* [PATCH 6/9] sysctl: route: share ipv4_route_flush_table between nets
From: Lucian Adrian Grijincu @ 2011-02-25 18:52 UTC (permalink / raw)
  To: David S. Miller, Alexey Dobriyan, Eric W. Biederman,
	Octavian Purdila, netdev
  Cc: Lucian Adrian Grijincu
In-Reply-To: <1298659961-23863-1-git-send-email-lucian.grijincu@gmail.com>

Signed-off-by: Lucian Adrian Grijincu <lucian.grijincu@gmail.com>
---
 net/ipv4/route.c |   36 +++++++-----------------------------
 1 files changed, 7 insertions(+), 29 deletions(-)

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 6ed6603..8fd0208 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -3038,19 +3038,18 @@ void ip_rt_multicast_event(struct in_device *in_dev)
 
 #ifdef CONFIG_SYSCTL
 static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write,
-					void __user *buffer,
-					size_t *lenp, loff_t *ppos)
+				     void __user *buffer,
+				     size_t *lenp, loff_t *ppos, void *cookie)
 {
 	if (write) {
 		int flush_delay;
 		ctl_table ctl;
-		struct net *net;
+		struct net *net = (struct net *) cookie;
 
 		memcpy(&ctl, __ctl, sizeof(ctl));
 		ctl.data = &flush_delay;
 		proc_dointvec(&ctl, write, buffer, lenp, ppos);
 
-		net = (struct net *)__ctl->extra1;
 		rt_cache_flush(net, flush_delay);
 		return 0;
 	}
@@ -3191,7 +3190,7 @@ static struct ctl_table ipv4_route_flush_table[] = {
 		.procname	= "flush",
 		.maxlen		= sizeof(int),
 		.mode		= 0200,
-		.proc_handler	= ipv4_sysctl_rtcache_flush,
+		.proc_handler	= (proc_handler *) ipv4_sysctl_rtcache_flush,
 	},
 	{ },
 };
@@ -3205,37 +3204,16 @@ static __net_initdata struct ctl_path ipv4_route_path[] = {
 
 static __net_init int sysctl_route_net_init(struct net *net)
 {
-	struct ctl_table *tbl;
-
-	tbl = ipv4_route_flush_table;
-	if (!net_eq(net, &init_net)) {
-		tbl = kmemdup(tbl, sizeof(ipv4_route_flush_table), GFP_KERNEL);
-		if (tbl == NULL)
-			goto err_dup;
-	}
-	tbl[0].extra1 = net;
-
-	net->ipv4.route_hdr =
-		register_net_sysctl_table(net, ipv4_route_path, tbl);
+	net->ipv4.route_hdr = register_net_sysctl_table(net,
+				ipv4_route_path, ipv4_route_flush_table);
 	if (net->ipv4.route_hdr == NULL)
-		goto err_reg;
+		return -ENOMEM;
 	return 0;
-
-err_reg:
-	if (tbl != ipv4_route_flush_table)
-		kfree(tbl);
-err_dup:
-	return -ENOMEM;
 }
 
 static __net_exit void sysctl_route_net_exit(struct net *net)
 {
-	struct ctl_table *tbl;
-
-	tbl = net->ipv4.route_hdr->ctl_table_arg;
 	unregister_net_sysctl_table(net->ipv4.route_hdr);
-	BUG_ON(tbl == ipv4_route_flush_table);
-	kfree(tbl);
 }
 
 static __net_initdata struct pernet_operations sysctl_route_ops = {
-- 
1.7.4.rc1.7.g2cf08.dirty

^ permalink raw reply related

* [PATCH 7/9] sysctl: ipv4: share ipv4_net_table between nets
From: Lucian Adrian Grijincu @ 2011-02-25 18:52 UTC (permalink / raw)
  To: David S. Miller, Alexey Dobriyan, Eric W. Biederman,
	Octavian Purdila, netdev
  Cc: Lucian Adrian Grijincu
In-Reply-To: <1298659961-23863-1-git-send-email-lucian.grijincu@gmail.com>

Signed-off-by: Lucian Adrian Grijincu <lucian.grijincu@gmail.com>
---
 net/ipv4/sysctl_net_ipv4.c |   53 +++++++------------------------------------
 1 files changed, 9 insertions(+), 44 deletions(-)

diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 1a45665..6fd3279 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -636,49 +636,49 @@ static struct ctl_table ipv4_net_table[] = {
 		.data		= &init_net.ipv4.sysctl_icmp_echo_ignore_all,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= (proc_handler *) netns_proc_dointvec
 	},
 	{
 		.procname	= "icmp_echo_ignore_broadcasts",
 		.data		= &init_net.ipv4.sysctl_icmp_echo_ignore_broadcasts,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= (proc_handler *) netns_proc_dointvec
 	},
 	{
 		.procname	= "icmp_ignore_bogus_error_responses",
 		.data		= &init_net.ipv4.sysctl_icmp_ignore_bogus_error_responses,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= (proc_handler *) netns_proc_dointvec
 	},
 	{
 		.procname	= "icmp_errors_use_inbound_ifaddr",
 		.data		= &init_net.ipv4.sysctl_icmp_errors_use_inbound_ifaddr,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= (proc_handler *) netns_proc_dointvec
 	},
 	{
 		.procname	= "icmp_ratelimit",
 		.data		= &init_net.ipv4.sysctl_icmp_ratelimit,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec_ms_jiffies,
+		.proc_handler	= (proc_handler *) netns_proc_dointvec_ms_jiffies,
 	},
 	{
 		.procname	= "icmp_ratemask",
 		.data		= &init_net.ipv4.sysctl_icmp_ratemask,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= (proc_handler *) netns_proc_dointvec
 	},
 	{
 		.procname	= "rt_cache_rebuild_count",
 		.data		= &init_net.ipv4.sysctl_rt_cache_rebuild_count,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= (proc_handler *) netns_proc_dointvec
 	},
 	{ }
 };
@@ -692,53 +692,18 @@ EXPORT_SYMBOL_GPL(net_ipv4_ctl_path);
 
 static __net_init int ipv4_sysctl_init_net(struct net *net)
 {
-	struct ctl_table *table;
-
-	table = ipv4_net_table;
-	if (!net_eq(net, &init_net)) {
-		table = kmemdup(table, sizeof(ipv4_net_table), GFP_KERNEL);
-		if (table == NULL)
-			goto err_alloc;
-
-		table[0].data =
-			&net->ipv4.sysctl_icmp_echo_ignore_all;
-		table[1].data =
-			&net->ipv4.sysctl_icmp_echo_ignore_broadcasts;
-		table[2].data =
-			&net->ipv4.sysctl_icmp_ignore_bogus_error_responses;
-		table[3].data =
-			&net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr;
-		table[4].data =
-			&net->ipv4.sysctl_icmp_ratelimit;
-		table[5].data =
-			&net->ipv4.sysctl_icmp_ratemask;
-		table[6].data =
-			&net->ipv4.sysctl_rt_cache_rebuild_count;
-	}
-
 	net->ipv4.sysctl_rt_cache_rebuild_count = 4;
 
 	net->ipv4.ipv4_hdr = register_net_sysctl_table(net,
-			net_ipv4_ctl_path, table);
+			net_ipv4_ctl_path, ipv4_net_table);
 	if (net->ipv4.ipv4_hdr == NULL)
-		goto err_reg;
-
+		return -ENOMEM;
 	return 0;
-
-err_reg:
-	if (!net_eq(net, &init_net))
-		kfree(table);
-err_alloc:
-	return -ENOMEM;
 }
 
 static __net_exit void ipv4_sysctl_exit_net(struct net *net)
 {
-	struct ctl_table *table;
-
-	table = net->ipv4.ipv4_hdr->ctl_table_arg;
 	unregister_net_sysctl_table(net->ipv4.ipv4_hdr);
-	kfree(table);
 }
 
 static __net_initdata struct pernet_operations ipv4_sysctl_ops = {
-- 
1.7.4.rc1.7.g2cf08.dirty

^ permalink raw reply related

* [PATCH 8/9] sysctl: ipv6: share ip6_frags_ns_ctl_table between nets
From: Lucian Adrian Grijincu @ 2011-02-25 18:52 UTC (permalink / raw)
  To: David S. Miller, Alexey Dobriyan, Eric W. Biederman,
	Octavian Purdila, netdev
  Cc: Lucian Adrian Grijincu
In-Reply-To: <1298659961-23863-1-git-send-email-lucian.grijincu@gmail.com>

Signed-off-by: Lucian Adrian Grijincu <lucian.grijincu@gmail.com>
---
 net/ipv6/reassembly.c |   34 ++++++----------------------------
 1 files changed, 6 insertions(+), 28 deletions(-)

diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 07beeb0..868cbd5 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -600,21 +600,21 @@ static struct ctl_table ip6_frags_ns_ctl_table[] = {
 		.data		= &init_net.ipv6.frags.high_thresh,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= (proc_handler *) netns_proc_dointvec,
 	},
 	{
 		.procname	= "ip6frag_low_thresh",
 		.data		= &init_net.ipv6.frags.low_thresh,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= (proc_handler *) netns_proc_dointvec,
 	},
 	{
 		.procname	= "ip6frag_time",
 		.data		= &init_net.ipv6.frags.timeout,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
+		.proc_handler	= (proc_handler *) netns_proc_dointvec_jiffies,
 	},
 	{ }
 };
@@ -632,42 +632,20 @@ static struct ctl_table ip6_frags_ctl_table[] = {
 
 static int __net_init ip6_frags_ns_sysctl_register(struct net *net)
 {
-	struct ctl_table *table;
 	struct ctl_table_header *hdr;
 
-	table = ip6_frags_ns_ctl_table;
-	if (!net_eq(net, &init_net)) {
-		table = kmemdup(table, sizeof(ip6_frags_ns_ctl_table), GFP_KERNEL);
-		if (table == NULL)
-			goto err_alloc;
-
-		table[0].data = &net->ipv6.frags.high_thresh;
-		table[1].data = &net->ipv6.frags.low_thresh;
-		table[2].data = &net->ipv6.frags.timeout;
-	}
-
-	hdr = register_net_sysctl_table(net, net_ipv6_ctl_path, table);
+	hdr = register_net_sysctl_table(net, net_ipv6_ctl_path,
+					ip6_frags_ns_ctl_table);
 	if (hdr == NULL)
-		goto err_reg;
+		return -ENOMEM;
 
 	net->ipv6.sysctl.frags_hdr = hdr;
 	return 0;
-
-err_reg:
-	if (!net_eq(net, &init_net))
-		kfree(table);
-err_alloc:
-	return -ENOMEM;
 }
 
 static void __net_exit ip6_frags_ns_sysctl_unregister(struct net *net)
 {
-	struct ctl_table *table;
-
-	table = net->ipv6.sysctl.frags_hdr->ctl_table_arg;
 	unregister_net_sysctl_table(net->ipv6.sysctl.frags_hdr);
-	if (!net_eq(net, &init_net))
-		kfree(table);
 }
 
 static struct ctl_table_header *ip6_ctl_header;
-- 
1.7.4.rc1.7.g2cf08.dirty

^ permalink raw reply related

* [PATCH 5/9] sysctl: net: share netns_core_table between nets
From: Lucian Adrian Grijincu @ 2011-02-25 18:52 UTC (permalink / raw)
  To: David S. Miller, Alexey Dobriyan, Eric W. Biederman,
	Octavian Purdila, netdev
  Cc: Lucian Adrian Grijincu
In-Reply-To: <1298659961-23863-1-git-send-email-lucian.grijincu@gmail.com>

Signed-off-by: Lucian Adrian Grijincu <lucian.grijincu@gmail.com>
---
 net/core/sysctl_net_core.c |   28 +++-------------------------
 1 files changed, 3 insertions(+), 25 deletions(-)

diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 385b609..e5a1544 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -182,7 +182,7 @@ static struct ctl_table netns_core_table[] = {
 		.data		= &init_net.core.sysctl_somaxconn,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= (proc_handler *) netns_proc_dointvec
 	},
 	{ }
 };
@@ -195,41 +195,19 @@ __net_initdata struct ctl_path net_core_path[] = {
 
 static __net_init int sysctl_core_net_init(struct net *net)
 {
-	struct ctl_table *tbl;
-
 	net->core.sysctl_somaxconn = SOMAXCONN;
 
-	tbl = netns_core_table;
-	if (!net_eq(net, &init_net)) {
-		tbl = kmemdup(tbl, sizeof(netns_core_table), GFP_KERNEL);
-		if (tbl == NULL)
-			goto err_dup;
-
-		tbl[0].data = &net->core.sysctl_somaxconn;
-	}
-
 	net->core.sysctl_hdr = register_net_sysctl_table(net,
-			net_core_path, tbl);
+			net_core_path, netns_core_table);
 	if (net->core.sysctl_hdr == NULL)
-		goto err_reg;
+		return -ENOMEM;
 
 	return 0;
-
-err_reg:
-	if (tbl != netns_core_table)
-		kfree(tbl);
-err_dup:
-	return -ENOMEM;
 }
 
 static __net_exit void sysctl_core_net_exit(struct net *net)
 {
-	struct ctl_table *tbl;
-
-	tbl = net->core.sysctl_hdr->ctl_table_arg;
 	unregister_net_sysctl_table(net->core.sysctl_hdr);
-	BUG_ON(tbl == netns_core_table);
-	kfree(tbl);
 }
 
 static __net_initdata struct pernet_operations sysctl_core_ops = {
-- 
1.7.4.rc1.7.g2cf08.dirty


^ permalink raw reply related

* [PATCH 9/9] sysctl: ipv6: share ip6_ctl_table, ipv6_icmp_table and ipv6_route_table between nets
From: Lucian Adrian Grijincu @ 2011-02-25 18:52 UTC (permalink / raw)
  To: David S. Miller, Alexey Dobriyan, Eric W. Biederman,
	Octavian Purdila, netdev
  Cc: Lucian Adrian Grijincu
In-Reply-To: <1298659961-23863-1-git-send-email-lucian.grijincu@gmail.com>

This patch includes another implementation of the patch from [1]. This
patch will not apply cleanly if that one has been applied.

[1] http://thread.gmane.org/gmane.linux.network/187273

Signed-off-by: Lucian Adrian Grijincu <lucian.grijincu@gmail.com>
---
 include/net/ipv6.h         |    6 +---
 net/ipv6/icmp.c            |   17 +-----------
 net/ipv6/route.c           |   54 +++++++++++----------------------------
 net/ipv6/sysctl_net_ipv6.c |   61 ++++++--------------------------------------
 4 files changed, 27 insertions(+), 111 deletions(-)

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 96e50e0..1526ed6 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -652,11 +652,9 @@ static inline int snmp6_unregister_dev(struct inet6_dev *idev) { return 0; }
 #endif
 
 #ifdef CONFIG_SYSCTL
-extern ctl_table ipv6_route_table_template[];
-extern ctl_table ipv6_icmp_table_template[];
+extern ctl_table ipv6_route_table[];
+extern ctl_table ipv6_icmp_table[];
 
-extern struct ctl_table *ipv6_icmp_sysctl_init(struct net *net);
-extern struct ctl_table *ipv6_route_sysctl_init(struct net *net);
 extern int ipv6_sysctl_register(void);
 extern void ipv6_sysctl_unregister(void);
 extern int ipv6_static_sysctl_register(void);
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 03e62f9..924cb36 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -938,29 +938,16 @@ int icmpv6_err_convert(u8 type, u8 code, int *err)
 EXPORT_SYMBOL(icmpv6_err_convert);
 
 #ifdef CONFIG_SYSCTL
-ctl_table ipv6_icmp_table_template[] = {
+ctl_table ipv6_icmp_table[] = {
 	{
 		.procname	= "ratelimit",
 		.data		= &init_net.ipv6.sysctl.icmpv6_time,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec_ms_jiffies,
+		.proc_handler	= (proc_handler *) netns_proc_dointvec_ms_jiffies,
 	},
 	{ },
 };
 
-struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
-{
-	struct ctl_table *table;
-
-	table = kmemdup(ipv6_icmp_table_template,
-			sizeof(ipv6_icmp_table_template),
-			GFP_KERNEL);
-
-	if (table)
-		table[0].data = &net->ipv6.sysctl.icmpv6_time;
-
-	return table;
-}
 #endif
 
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index a998db6..29e05ca 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2553,11 +2553,11 @@ static const struct file_operations rt6_stats_seq_fops = {
 
 #ifdef CONFIG_SYSCTL
 
-static
-int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
-			      void __user *buffer, size_t *lenp, loff_t *ppos)
+static int netns_ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
+					   void __user *buffer, size_t *lenp,
+					   loff_t *ppos, void *cookie)
 {
-	struct net *net = current->nsproxy->net_ns;
+	struct net *net = (struct net *) cookie;
 	int delay = net->ipv6.sysctl.flush_delay;
 	if (write) {
 		proc_dointvec(ctl, write, buffer, lenp, ppos);
@@ -2567,103 +2567,79 @@ int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
 		return -EINVAL;
 }
 
-ctl_table ipv6_route_table_template[] = {
+ctl_table ipv6_route_table[] = {
 	{
 		.procname	=	"flush",
 		.data		=	&init_net.ipv6.sysctl.flush_delay,
 		.maxlen		=	sizeof(int),
 		.mode		=	0200,
-		.proc_handler	=	ipv6_sysctl_rtcache_flush
+		.proc_handler	=	(proc_handler *) netns_ipv6_sysctl_rtcache_flush
 	},
 	{
 		.procname	=	"gc_thresh",
 		.data		=	&ip6_dst_ops_template.gc_thresh,
 		.maxlen		=	sizeof(int),
 		.mode		=	0644,
-		.proc_handler	=	proc_dointvec,
+		.proc_handler	=	(proc_handler *) netns_proc_dointvec,
 	},
 	{
 		.procname	=	"max_size",
 		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
 		.maxlen		=	sizeof(int),
 		.mode		=	0644,
-		.proc_handler	=	proc_dointvec,
+		.proc_handler	=	(proc_handler *) netns_proc_dointvec,
 	},
 	{
 		.procname	=	"gc_min_interval",
 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
 		.maxlen		=	sizeof(int),
 		.mode		=	0644,
-		.proc_handler	=	proc_dointvec_jiffies,
+		.proc_handler	=	(proc_handler *) netns_proc_dointvec_jiffies,
 	},
 	{
 		.procname	=	"gc_timeout",
 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
 		.maxlen		=	sizeof(int),
 		.mode		=	0644,
-		.proc_handler	=	proc_dointvec_jiffies,
+		.proc_handler	=	(proc_handler *) netns_proc_dointvec_jiffies,
 	},
 	{
 		.procname	=	"gc_interval",
 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
 		.maxlen		=	sizeof(int),
 		.mode		=	0644,
-		.proc_handler	=	proc_dointvec_jiffies,
+		.proc_handler	=	(proc_handler *) netns_proc_dointvec_jiffies,
 	},
 	{
 		.procname	=	"gc_elasticity",
 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
 		.maxlen		=	sizeof(int),
 		.mode		=	0644,
-		.proc_handler	=	proc_dointvec,
+		.proc_handler	=	(proc_handler *) netns_proc_dointvec,
 	},
 	{
 		.procname	=	"mtu_expires",
 		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
 		.maxlen		=	sizeof(int),
 		.mode		=	0644,
-		.proc_handler	=	proc_dointvec_jiffies,
+		.proc_handler	=	(proc_handler *) netns_proc_dointvec_jiffies,
 	},
 	{
 		.procname	=	"min_adv_mss",
 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
 		.maxlen		=	sizeof(int),
 		.mode		=	0644,
-		.proc_handler	=	proc_dointvec,
+		.proc_handler	=	(proc_handler *) netns_proc_dointvec,
 	},
 	{
 		.procname	=	"gc_min_interval_ms",
 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
 		.maxlen		=	sizeof(int),
 		.mode		=	0644,
-		.proc_handler	=	proc_dointvec_ms_jiffies,
+		.proc_handler	=	(proc_handler *) netns_proc_dointvec_ms_jiffies,
 	},
 	{ }
 };
-
-struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
-{
-	struct ctl_table *table;
-
-	table = kmemdup(ipv6_route_table_template,
-			sizeof(ipv6_route_table_template),
-			GFP_KERNEL);
-
-	if (table) {
-		table[0].data = &net->ipv6.sysctl.flush_delay;
-		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
-		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
-		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
-		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
-		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
-		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
-		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
-		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
-		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
-	}
-
-	return table;
-}
 #endif
 
 static int __net_init ip6_route_net_init(struct net *net)
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 7cb65ef..cd15483 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -17,25 +17,25 @@
 
 static struct ctl_table empty[1];
 
-static ctl_table ipv6_table_template[] = {
+static ctl_table ipv6_table[] = {
 	{
 		.procname	= "route",
 		.maxlen		= 0,
 		.mode		= 0555,
-		.child		= ipv6_route_table_template
+		.child		= ipv6_route_table
 	},
 	{
 		.procname	= "icmp",
 		.maxlen		= 0,
 		.mode		= 0555,
-		.child		= ipv6_icmp_table_template
+		.child		= ipv6_icmp_table
 	},
 	{
 		.procname	= "bindv6only",
 		.data		= &init_net.ipv6.sysctl.bindv6only,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= (proc_handler *) netns_proc_dointvec
 	},
 	{
 		.procname	= "neigh",
@@ -66,62 +66,17 @@ EXPORT_SYMBOL_GPL(net_ipv6_ctl_path);
 
 static int __net_init ipv6_sysctl_net_init(struct net *net)
 {
-	struct ctl_table *ipv6_table;
-	struct ctl_table *ipv6_route_table;
-	struct ctl_table *ipv6_icmp_table;
-	int err;
-
-	err = -ENOMEM;
-	ipv6_table = kmemdup(ipv6_table_template, sizeof(ipv6_table_template),
-			     GFP_KERNEL);
-	if (!ipv6_table)
-		goto out;
-
-	ipv6_route_table = ipv6_route_sysctl_init(net);
-	if (!ipv6_route_table)
-		goto out_ipv6_table;
-	ipv6_table[0].child = ipv6_route_table;
-
-	ipv6_icmp_table = ipv6_icmp_sysctl_init(net);
-	if (!ipv6_icmp_table)
-		goto out_ipv6_route_table;
-	ipv6_table[1].child = ipv6_icmp_table;
-
-	ipv6_table[2].data = &net->ipv6.sysctl.bindv6only;
-
-	net->ipv6.sysctl.table = register_net_sysctl_table(net, net_ipv6_ctl_path,
-							   ipv6_table);
+	net->ipv6.sysctl.table = register_net_sysctl_table(net,
+				   net_ipv6_ctl_path, ipv6_table);
 	if (!net->ipv6.sysctl.table)
-		goto out_ipv6_icmp_table;
-
-	err = 0;
-out:
-	return err;
+		return -ENOMEM;
 
-out_ipv6_icmp_table:
-	kfree(ipv6_icmp_table);
-out_ipv6_route_table:
-	kfree(ipv6_route_table);
-out_ipv6_table:
-	kfree(ipv6_table);
-	goto out;
+	return 0;
 }
 
 static void __net_exit ipv6_sysctl_net_exit(struct net *net)
 {
-	struct ctl_table *ipv6_table;
-	struct ctl_table *ipv6_route_table;
-	struct ctl_table *ipv6_icmp_table;
-
-	ipv6_table = net->ipv6.sysctl.table->ctl_table_arg;
-	ipv6_route_table = ipv6_table[0].child;
-	ipv6_icmp_table = ipv6_table[1].child;
-
 	unregister_net_sysctl_table(net->ipv6.sysctl.table);
-
-	kfree(ipv6_table);
-	kfree(ipv6_route_table);
-	kfree(ipv6_icmp_table);
 }
 
 static struct pernet_operations ipv6_sysctl_net_ops = {
-- 
1.7.4.rc1.7.g2cf08.dirty


^ permalink raw reply related

* Re: [Bugme-new] [Bug 29712] New: Bonding Driver(version : 3.5.0) - Problem with ARP monitoring in active backup mode
From: David Miller @ 2011-02-25 18:54 UTC (permalink / raw)
  To: Harsha.R02
  Cc: brian.haley, akpm, bugzilla-daemon, bugme-daemon, netdev, fubar
In-Reply-To: <E351E450E8B9F54684A699D42DC5ADF210062FA2@MPBAGVEX02.corp.mphasis.com>

From: "Harsha R02" <Harsha.R02@mphasis.com>
Date: Fri, 25 Feb 2011 18:14:32 +0530

> Attached patch resolves the issue. Failover happened back to primary
> when it was up again in both the point to point and switch
> configuration.
> 
> Please let us know if this change can be included.

Please don't base64 encode your patches, that makes them harder
to read for some people.  It's just plain text.

^ permalink raw reply

* Re: [PATCH] sysctl: ipv6: use correct net in ipv6_sysctl_rtcache_flush
From: David Miller @ 2011-02-25 19:02 UTC (permalink / raw)
  To: daniel.lezcano; +Cc: lucian.grijincu, netdev, benjamin.thery
In-Reply-To: <4D67CB11.4020801@free.fr>

From: Daniel Lezcano <daniel.lezcano@free.fr>
Date: Fri, 25 Feb 2011 16:30:25 +0100

> On 02/25/2011 06:48 AM, Lucian Adrian Grijincu wrote:
>> Before this patch issuing these commands:
>>
>>    fd = open("/proc/sys/net/ipv6/route/flush")
>>    unshare(CLONE_NEWNET)
>>    write(fd, "stuff")
>>
>> would flush the newly created net, not the original one.
>>
>> The equivalent ipv4 code is correct (stores the net inside ->extra1).
>> ---
> 
> Acked-by: Daniel Lezcano <daniel.lezcano@free.fr>

Applied, thanks.

^ permalink raw reply

* Re: [PATCH] don't allow CAP_NET_ADMIN to load non-netdev kernel modules
From: Vasiliy Kulikov @ 2011-02-25 19:02 UTC (permalink / raw)
  To: David Miller
  Cc: netdev, linux-kernel, kuznet, pekkas, jmorris, yoshfuji, kaber,
	eric.dumazet, therbert, xiaosuo, jesse, Kees Cook, Eugene Teo,
	Dan Rosenberg, Andrew Morton
In-Reply-To: <20110225.104720.71110261.davem@davemloft.net>

On Fri, Feb 25, 2011 at 10:47 -0800, David Miller wrote:
> From: Vasiliy Kulikov <segoon@openwall.com>
> Date: Fri, 25 Feb 2011 18:14:14 +0300
> 
> > Since a8f80e8ff94ecba629542d9b4b5f5a8ee3eb565c any process with
> > CAP_NET_ADMIN may load any module from /lib/modules/.  This doesn't mean
> > that CAP_NET_ADMIN is a superset of CAP_SYS_MODULE as modules are limited
> > to /lib/modules/**.  However, CAP_NET_ADMIN capability shouldn't allow
> > anybody load any module not related to networking.
> 
> Why go through this naming change, which does break things, instead of
> simply adding a capability mask tag or similar to modules somehow.  You
> could stick it into a special elf section or similar.
>
> Doesn't that make tons more sense than this?

This is not "simply", adding special section for a single workaround
seems like an overkill for me - this touches the core (modules'
internals), which is not related to the initial CAP_* problem at all.

I'd be happy with not breaking anything, but I don't see any acceptable
solution.


Thanks,

-- 
Vasiliy Kulikov
http://www.openwall.com - bringing security into open computing environments

^ permalink raw reply

* Re: [Bugme-new] [Bug 29712] New: Bonding Driver(version : 3.5.0) - Problem with ARP monitoring in active backup mode
From: Jay Vosburgh @ 2011-02-25 19:02 UTC (permalink / raw)
  To: Harsha R02
  Cc: Brian Haley, Andrew Morton, bugzilla-daemon, bugme-daemon, netdev
In-Reply-To: <E351E450E8B9F54684A699D42DC5ADF20C6F1D4A@MPBAGVEX02.corp.mphasis.com>

Harsha R02 <Harsha.R02@mphasis.com> wrote:

>diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
>index 40fb5ee..0413917 100644
>--- a/drivers/net/bonding/bond_main.c
>+++ b/drivers/net/bonding/bond_main.c
>@@ -3020,11 +3020,16 @@ static void bond_ab_arp_probe(struct bonding *bond)
>                       bond->curr_active_slave->dev->name);
>        if (bond->curr_active_slave) {
>+                if((bond->curr_active_slave != bond->primary_slave) &&
>+                   (IS_UP(bond->primary_slave->dev)))
>+                        goto failover;
>+
>                bond_arp_send_all(bond, bond->curr_active_slave);
>                read_unlock(&bond->curr_slave_lock);
>                return;
>        }
>+failover:
>        read_unlock(&bond->curr_slave_lock);
>        /* if we don't have a curr_active_slave, search for the next available

	I'm not sure this is the proper place to put the "failover:"
label, as it will go through the "search for any peer" logic that's
normally used when there are no available slaves.  That will likely take
longer than simply switching to the primary.

	It should be possible to simply call bond_change_active_slave
with the appropriate arguments; did you try this?

	-J


>-------------------------------------------------------------------------------
>From: Harsha R02
>Sent: Fri 2/25/2011 6:14 PM
>To: Brian Haley; Andrew Morton
>Cc: bugzilla-daemon@bugzilla.kernel.org; bugme-daemon@bugzilla.kernel.org;
>netdev@vger.kernel.org; Jay Vosburgh
>Subject: RE: [Bugme-new] [Bug 29712] New: Bonding Driver(version : 3.5.0) -
>Problem with ARP monitoring in active backup mode
>
>Attached patch resolves the issue. Failover happened back to primary when it
>was up again in both the point to point and switch configuration.
>
>Please let us know if this change can be included.
>
>Thanks,
>
>- Harsha
>
>-----Original Message-----
>From: Brian Haley [mailto:brian.haley@hp.com]
>Sent: Friday, February 25, 2011 9:12 AM
>To: Andrew Morton
>Cc: Harsha R02; bugzilla-daemon@bugzilla.kernel.org;
>bugme-daemon@bugzilla.kernel.org; netdev@vger.kernel.org; Jay Vosburgh
>Subject: Re: [Bugme-new] [Bug 29712] New: Bonding Driver(version : 3.5.0) -
>Problem with ARP monitoring in active backup mode
>
>On 02/24/2011 05:51 PM, Andrew Morton wrote:
>> (switched to email.  Please respond via emailed reply-to-all, not via the
>> bugzilla web interface).
>>
>> On Wed, 23 Feb 2011 10:41:34 GMT
>> bugzilla-daemon@bugzilla.kernel.org wrote:
>>
>>> https://bugzilla.kernel.org/show_bug.cgi?id=29712
>>>
>>>            Summary: Bonding Driver(version : 3.5.0) - Problem with ARP
>>>                     monitoring in active backup mode
>>>            Product: Drivers
>>>            Version: 2.5
>>>     Kernel Version: 2.6.32
>>
>> That's a paleolithic kernel you have there.  This problem might have
>> been fixed already.  Can you test a more recent kernel?
>
>I can add some more info since I originally looked at the problem.  This
>happens on 2.6.38 as well, and on this 2.6.32 kernel with a backported
>3.7.0 bonding driver (with the primary_reselect option).  Harsha has a
>prototype patch that's being tested, but wanted to log the bug to see
>if one of the bonding maintainers had a better solution.
>
>I'll let him respond as I'm now out of the loop...
>
>Thanks,
>
>-Brian

---
	-Jay Vosburgh, IBM Linux Technology Center, fubar@us.ibm.com

^ permalink raw reply

* Re: [PATCH] don't allow CAP_NET_ADMIN to load non-netdev kernel modules
From: David Miller @ 2011-02-25 19:05 UTC (permalink / raw)
  To: segoon
  Cc: netdev, linux-kernel, kuznet, pekkas, jmorris, yoshfuji, kaber,
	eric.dumazet, therbert, xiaosuo, jesse, kees.cook, eugene,
	dan.j.rosenberg, akpm
In-Reply-To: <20110225190205.GA4541@albatros>

From: Vasiliy Kulikov <segoon@openwall.com>
Date: Fri, 25 Feb 2011 22:02:05 +0300

> On Fri, Feb 25, 2011 at 10:47 -0800, David Miller wrote:
>> From: Vasiliy Kulikov <segoon@openwall.com>
>> Date: Fri, 25 Feb 2011 18:14:14 +0300
>> 
>> > Since a8f80e8ff94ecba629542d9b4b5f5a8ee3eb565c any process with
>> > CAP_NET_ADMIN may load any module from /lib/modules/.  This doesn't mean
>> > that CAP_NET_ADMIN is a superset of CAP_SYS_MODULE as modules are limited
>> > to /lib/modules/**.  However, CAP_NET_ADMIN capability shouldn't allow
>> > anybody load any module not related to networking.
>> 
>> Why go through this naming change, which does break things, instead of
>> simply adding a capability mask tag or similar to modules somehow.  You
>> could stick it into a special elf section or similar.
>>
>> Doesn't that make tons more sense than this?
> 
> This is not "simply", adding special section for a single workaround
> seems like an overkill for me - this touches the core (modules'
> internals), which is not related to the initial CAP_* problem at all.
> 
> I'd be happy with not breaking anything, but I don't see any acceptable
> solution.

I think it's warranted given that it allows us to avoid breaking things.

I don't understand there is resistence in response to the first idea
I've seen proprosed that actually allows to fix the problem and not
break anything at the same time.

That seems silly.

^ permalink raw reply

* Re: [PATCH] don't allow CAP_NET_ADMIN to load non-netdev kernel modules
From: Ben Hutchings @ 2011-02-25 19:07 UTC (permalink / raw)
  To: David Miller
  Cc: segoon, netdev, linux-kernel, kuznet, pekkas, jmorris, yoshfuji,
	kaber, eric.dumazet, therbert, xiaosuo, jesse, kees.cook, eugene,
	dan.j.rosenberg, akpm
In-Reply-To: <20110225.110529.39178636.davem@davemloft.net>

On Fri, 2011-02-25 at 11:05 -0800, David Miller wrote:
> From: Vasiliy Kulikov <segoon@openwall.com>
> Date: Fri, 25 Feb 2011 22:02:05 +0300
> 
> > On Fri, Feb 25, 2011 at 10:47 -0800, David Miller wrote:
> >> From: Vasiliy Kulikov <segoon@openwall.com>
> >> Date: Fri, 25 Feb 2011 18:14:14 +0300
> >> 
> >> > Since a8f80e8ff94ecba629542d9b4b5f5a8ee3eb565c any process with
> >> > CAP_NET_ADMIN may load any module from /lib/modules/.  This doesn't mean
> >> > that CAP_NET_ADMIN is a superset of CAP_SYS_MODULE as modules are limited
> >> > to /lib/modules/**.  However, CAP_NET_ADMIN capability shouldn't allow
> >> > anybody load any module not related to networking.
> >> 
> >> Why go through this naming change, which does break things, instead of
> >> simply adding a capability mask tag or similar to modules somehow.  You
> >> could stick it into a special elf section or similar.
> >>
> >> Doesn't that make tons more sense than this?
> > 
> > This is not "simply", adding special section for a single workaround
> > seems like an overkill for me - this touches the core (modules'
> > internals), which is not related to the initial CAP_* problem at all.
> > 
> > I'd be happy with not breaking anything, but I don't see any acceptable
> > solution.
> 
> I think it's warranted given that it allows us to avoid breaking things.
> 
> I don't understand there is resistence in response to the first idea
> I've seen proprosed that actually allows to fix the problem and not
> break anything at the same time.
> 
> That seems silly.

You realise that module loading doesn't actually run in the context of
request_module(), right?

Ben.

-- 
Ben Hutchings, Senior Software Engineer, Solarflare Communications
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.

^ permalink raw reply

* Re: pull request: wireless-next-2.6 2011-02-22
From: David Miller @ 2011-02-25 19:15 UTC (permalink / raw)
  To: linville-2XuSBdqkA4R54TAoqtyWWQ
  Cc: linux-wireless-u79uwXL29TY76Z2rM5mHXA,
	linux-bluetooth-u79uwXL29TY76Z2rM5mHXA,
	netdev-u79uwXL29TY76Z2rM5mHXA, padovan-Y3ZbgMPKUGA34EUeqzHoZw
In-Reply-To: <20110224.224344.104068328.davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org>

From: David Miller <davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org>
Date: Thu, 24 Feb 2011 22:43:44 -0800 (PST)

> From: "John W. Linville" <linville-2XuSBdqkA4R54TAoqtyWWQ@public.gmane.org>
> Date: Tue, 22 Feb 2011 16:52:30 -0500
> 
>> Here is the latest batch of wireless bits intended for 2.6.39.  It seems
>> I neglected to send a pull request last week, so this one is a bit big
>> -- I apologize!
>> 
>> This includes a rather large batch of bluetooth bits by way of Gustavo.
>> It looks like a variety of bits, including some code refactoring, some
>> protocol support enhancements, some bugfixes, etc. -- nothing too
>> unusual.
>> 
>> Other items of interest include a new driver from Realtek, some ssb
>> support enhancements, and the usual sort of updates for mac80211 and a
>> variety of drivers.  Also included is a wireless-2.6 pull to resolve
>> some build breakage.
>> 
>> Please let me know if there are problems!
> 
> Pulled, thanks a lot John.

John a few things:

1) I had to add some vmalloc.h includes to fix the build on sparc64,
   see commit b08cd667c4b6641c4d16a3f87f4550f81a6d69ac in net-next-2.6

2) Something is screwey with the bluetooth config options now.

   I have an allmodconfig tree, and when I run "make oldconfig" after
   this pull, BT_L2CAP and BT_SCO both prompt me, claiming that they
   can only be built statically.

   I give it 'y' just to make it happen, for both, and afterways no
   matter how many times I rerun "make oldconfig" I keep seeing things
   like this in my build:

scripts/kconfig/conf --silentoldconfig Kconfig
include/config/auto.conf:986:warning: symbol value 'm' invalid for BT_SCO
include/config/auto.conf:3156:warning: symbol value 'm' invalid for BT_L2CAP

   First, what the heck is going on here?  Second, why the heck can't these
   non-trivial pieces of code be built modular any more?

   You can't make something "bool", have it depend on something that
   might be modular, and then build it into what could in fact be a
   module.  That's exactly what the bluetooth stuff seems to be doing
   now.

   I suspect commit 642745184f82688eb3ef0cdfaa4ba632055be9af

Thanks.

^ permalink raw reply

* Re: [PATCH] don't allow CAP_NET_ADMIN to load non-netdev kernel modules
From: David Miller @ 2011-02-25 19:16 UTC (permalink / raw)
  To: bhutchings
  Cc: segoon, netdev, linux-kernel, kuznet, pekkas, jmorris, yoshfuji,
	kaber, eric.dumazet, therbert, xiaosuo, jesse, kees.cook, eugene,
	dan.j.rosenberg, akpm
In-Reply-To: <1298660879.2554.23.camel@bwh-desktop>

From: Ben Hutchings <bhutchings@solarflare.com>
Date: Fri, 25 Feb 2011 19:07:59 +0000

> You realise that module loading doesn't actually run in the context of
> request_module(), right?

Why is that a barrier?  We could simply pass a capability mask into
request_module if necessary.

It's an implementation detail, and not a deterrant to my suggested
scheme.

^ permalink raw reply

* Re: SO_REUSEPORT - can it be done in kernel?
From: Rick Jones @ 2011-02-25 19:18 UTC (permalink / raw)
  To: Thomas Graf; +Cc: Tom Herbert, Bill Sommerfeld, Daniel Baluta, netdev
In-Reply-To: <20110225125644.GA9763@canuck.infradead.org>

On Fri, 2011-02-25 at 07:56 -0500, Thomas Graf wrote:
> On Thu, Jan 27, 2011 at 01:32:25PM -0800, Tom Herbert wrote:
> > Yes, we are still planning this.  The UDP implementation for my
> > earlier patch should be usable to try for DNS/UDP-- this is in fact
> > where we saw a major performance gain.  Eric Dumazet had some nice
> > improvements that should probably be looked at also.
> 
> I can confirm this.
> 
> Serious scalability issues have been reported on a 12 core system
> running bind 9.7-2. The system was only able to deliver ~110K queries
> per second.
> 
> Using your SO_REUSEPORT patch and a modified bind using it. The same
> system is able to deliver ~650K queries per seconds while maxing out
> all cores completely.

I think the idea is goodness, but will ask, was the (first) bottleneck
actually in the kernel, or was it in bind itself?  I've seen
single-instance, single-byte burst-mode netperf TCP_RR do in excess of
300K transactions per second (with TCP_NODELAY set) on an X5560 core.

ftp://ftp.netperf.org/netperf/misc/dl380g6_X5560_rhel54_ad386_cxgb3_1.4.1.2_b2b_to_same_agg_1500mtu_20100513-2.csv

and that was with now ancient RHEL5.4 bits...  yes, there is a bit of
apples, oranges and kumquats but still, I am wondering if this didn't
also "work around" some internal BIND scaling issues as well.

rick jones

> 
> Tom, Bill: do you have a timeline for merging this? Especially the
> UDP bits?
> 
> -Thomas
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html



^ permalink raw reply

* Re: SO_REUSEPORT - can it be done in kernel?
From: David Miller @ 2011-02-25 19:20 UTC (permalink / raw)
  To: rick.jones2; +Cc: tgraf, therbert, wsommerfeld, daniel.baluta, netdev
In-Reply-To: <1298661495.14113.152.camel@tardy>

From: Rick Jones <rick.jones2@hp.com>
Date: Fri, 25 Feb 2011 11:18:15 -0800

> and that was with now ancient RHEL5.4 bits...  yes, there is a bit of
> apples, oranges and kumquats but still, I am wondering if this didn't
> also "work around" some internal BIND scaling issues as well.

I think this is fundamentally a bind problem as well.

^ permalink raw reply

* Re: [PATCH net-next 0/6] Phonet: small pipe protocol fixes
From: David Miller @ 2011-02-25 19:20 UTC (permalink / raw)
  To: netdev, remi.denis-courmont; +Cc: ofono
In-Reply-To: <201102251113.41620.remi.denis-courmont@nokia.com>

From: "Rémi Denis-Courmont" <remi.denis-courmont@nokia.com>
Date: Fri, 25 Feb 2011 11:13:41 +0200

> This patch series cleans up and fixes a number of small bits in the
> Phonet pipe code, especially the experimental pipe controller. Once
> this small bits are sorted out, I will try to fix the controller
> protocol implementation proper so that we do not need the
> compile-time (experimental) flag anymore.

All applied thanks.

If you want to start using GIT to push phonet changes to me, frankly I
would welcome that :-)

^ permalink raw reply

* Re: SO_REUSEPORT - can it be done in kernel?
From: Eric Dumazet @ 2011-02-25 19:21 UTC (permalink / raw)
  To: rick.jones2
  Cc: Thomas Graf, Tom Herbert, Bill Sommerfeld, Daniel Baluta, netdev
In-Reply-To: <1298661495.14113.152.camel@tardy>

Le vendredi 25 février 2011 à 11:18 -0800, Rick Jones a écrit :

> I think the idea is goodness, but will ask, was the (first) bottleneck
> actually in the kernel, or was it in bind itself?  I've seen
> single-instance, single-byte burst-mode netperf TCP_RR do in excess of
> 300K transactions per second (with TCP_NODELAY set) on an X5560 core.
> 
> ftp://ftp.netperf.org/netperf/misc/dl380g6_X5560_rhel54_ad386_cxgb3_1.4.1.2_b2b_to_same_agg_1500mtu_20100513-2.csv
> 
> and that was with now ancient RHEL5.4 bits...  yes, there is a bit of
> apples, oranges and kumquats but still, I am wondering if this didn't
> also "work around" some internal BIND scaling issues as well.
> 

A single core can probably give 300K transactions.

But if you use several cores, accessing a single socket (the one bound
on port 53), then performance drops because of false sharing,
locking....




^ permalink raw reply

* Re: [PATCH net-next 0/6] Phonet: small pipe protocol fixes
From: David Miller @ 2011-02-25 19:24 UTC (permalink / raw)
  To: netdev, remi.denis-courmont; +Cc: ofono
In-Reply-To: <20110225.112055.260096987.davem@davemloft.net>

From: David Miller <davem@davemloft.net>
Date: Fri, 25 Feb 2011 11:20:55 -0800 (PST)

> From: "Rémi Denis-Courmont" <remi.denis-courmont@nokia.com>
> Date: Fri, 25 Feb 2011 11:13:41 +0200
> 
>> This patch series cleans up and fixes a number of small bits in the
>> Phonet pipe code, especially the experimental pipe controller. Once
>> this small bits are sorted out, I will try to fix the controller
>> protocol implementation proper so that we do not need the
>> compile-time (experimental) flag anymore.
> 
> All applied thanks.
> 
> If you want to start using GIT to push phonet changes to me, frankly I
> would welcome that :-)

BTW, I had to add the following patch to fix a build warning:

--------------------
phonet: Protect pipe_do_remove() with appropriate ifdefs.

It is only used when CONFIG_PHONET_PIPECTRLR is not set.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/phonet/pep.c |    2 ++
 1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index b8c31fc..875e86c 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -849,6 +849,7 @@ drop:
 	return err;
 }
 
+#ifndef CONFIG_PHONET_PIPECTRLR
 static int pipe_do_remove(struct sock *sk)
 {
 	struct pep_sock *pn = pep_sk(sk);
@@ -870,6 +871,7 @@ static int pipe_do_remove(struct sock *sk)
 
 	return pn_skb_send(sk, skb, NULL);
 }
+#endif
 
 /* associated socket ceases to exist */
 static void pep_sock_close(struct sock *sk, long timeout)
-- 
1.7.4.1


^ permalink raw reply related

* Re: [PATCH] don't allow CAP_NET_ADMIN to load non-netdev kernel modules
From: Ben Hutchings @ 2011-02-25 19:30 UTC (permalink / raw)
  To: David Miller
  Cc: segoon, netdev, linux-kernel, kuznet, pekkas, jmorris, yoshfuji,
	kaber, eric.dumazet, therbert, xiaosuo, jesse, kees.cook, eugene,
	dan.j.rosenberg, akpm
In-Reply-To: <20110225.111606.115927805.davem@davemloft.net>

On Fri, 2011-02-25 at 11:16 -0800, David Miller wrote:
> From: Ben Hutchings <bhutchings@solarflare.com>
> Date: Fri, 25 Feb 2011 19:07:59 +0000
> 
> > You realise that module loading doesn't actually run in the context of
> > request_module(), right?
> 
> Why is that a barrier?  We could simply pass a capability mask into
> request_module if necessary.
> 
> It's an implementation detail, and not a deterrant to my suggested
> scheme.

It's not an implementation detail.  modprobe currently runs with full
capabilities; your proposal requires its capabilities to be limited to
those of the capabilities of the process that triggered the
request_module() (plus, presumably, CAP_SYS_MODULE).

Now modprobe doesn't have CAP_DAC_OVERRIDE and can't read modprobe
configuration files that belong to users other than root.

It doesn't have CAP_SYS_MKNOD so it can't run hooks that call mknod.

etc.

Ben.

-- 
Ben Hutchings, Senior Software Engineer, Solarflare Communications
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.

^ permalink raw reply

* Re: [RFC] be2net: add rxhash support
From: Eric Dumazet @ 2011-02-25 19:32 UTC (permalink / raw)
  To: Ajit Khaparde; +Cc: netdev
In-Reply-To: <1298658096.2659.101.camel@edumazet-laptop>

Le vendredi 25 février 2011 à 19:21 +0100, Eric Dumazet a écrit :
> Le vendredi 25 février 2011 à 11:44 -0600, Ajit Khaparde a écrit :
> > -----
> > [PATCH net-next] be2net: add rxhash support
> > 
> > Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
> > Signed-off-by: Ajit Khaparde <ajit.khaparde@emulex.com>
> > ---
> >  drivers/net/benet/be_main.c |   11 +++++++++++
> >  1 files changed, 11 insertions(+), 0 deletions(-)
> > 
> > diff --git a/drivers/net/benet/be_main.c b/drivers/net/benet/be_main.c
> > index 26f9c56..8c4b782 100644
> > --- a/drivers/net/benet/be_main.c
> > +++ b/drivers/net/benet/be_main.c
> > @@ -1038,6 +1038,10 @@ static void be_rx_compl_process(struct be_adapter *adapter,
> >  
> >  	skb->truesize = skb->len + sizeof(struct sk_buff);
> >  	skb->protocol = eth_type_trans(skb, adapter->netdev);
> > +	if (adapter->netdev->features & NETIF_F_RXHASH)
> > +		skb->rxhash = AMAP_GET_BITS(struct amap_eth_rx_compl,
> > +					rsshash, rxcp);
> > +
> >  
> >  	vlanf = AMAP_GET_BITS(struct amap_eth_rx_compl, vtp, rxcp);
> >  	vtm = AMAP_GET_BITS(struct amap_eth_rx_compl, vtm, rxcp);
> > @@ -1099,6 +1103,10 @@ static void be_rx_compl_process_gro(struct be_adapter *adapter,
> >  		return;
> >  	}
> >  
> > +	if (adapter->netdev->features & NETIF_F_RXHASH)
> > +		skb->rxhash = AMAP_GET_BITS(struct amap_eth_rx_compl,
> > +						rsshash, rxcp);
> > +
> >  	remaining = pkt_size;
> >  	for (i = 0, j = -1; i < num_rcvd; i++) {
> >  		page_info = get_rx_page_info(adapter, rxo, rxq_idx);
> > @@ -2619,6 +2627,9 @@ static void be_netdev_init(struct net_device *netdev)
> >  		NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
> >  		NETIF_F_GRO | NETIF_F_TSO6;
> >  
> > +	if (be_multi_rxq(adapter))
> > +		netdev->features |= NETIF_F_RXHASH;
> > +
> >  	netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO |
> >  		NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
> >  
> 
> 
> I added some traces, and I am not sure its OK :
> 
> With one active tcp flow, I got different rxhash values :
> 
> [ 1064.674253] rxhash=bbd37952 rsshp=1 bank=1
> [ 1064.738104] rxhash=37acd31d rsshp=1 bank=1
> [ 1064.741684] rxhash=bbd37952 rsshp=1 bank=1
> [ 1064.874283] rxhash=bbd37952 rsshp=1 bank=1
> [ 1064.940201] rxhash=bbd37952 rsshp=1 bank=1
> [ 1064.955278] rxhash=b668ace2 rsshp=1 bank=1
> [ 1065.080028] rxhash=bbd37952 rsshp=1 bank=1
> [ 1065.153360] rxhash=bbd37952 rsshp=1 bank=1
> [ 1065.293164] rxhash=bbd37952 rsshp=1 bank=1
> [ 1065.401862] rxhash=bbd37952 rsshp=1 bank=1
> [ 1065.460506] rxhash=bbd37952 rsshp=1 bank=1
> [ 1065.519980] rxhash=bbd37952 rsshp=1 bank=1
> [ 1065.650160] rxhash=bbd37952 rsshp=1 bank=1
> [ 1065.717585] rxhash=bbd37952 rsshp=1 bank=1
> [ 1065.730909] rxhash=37acd31d rsshp=1 bank=1
> [ 1065.840350] rxhash=bbd37952 rsshp=1 bank=1
> [ 1065.900704] rxhash=bbd37952 rsshp=1 bank=1
> [ 1065.931526] rxhash=b668ace2 rsshp=1 bank=1
> [ 1066.503657] rxhash=bbd37952 rsshp=1 bank=1
> [ 1066.570138] rxhash=bbd37952 rsshp=1 bank=1
> 
> How is it possible ?
> 
> (I have a VLAN config on top of a bonding)
> 
> 

Also, Ajit, we need something to allow ethtool -K rxhash {on|off}

Something like (completely untested)

diff --git a/drivers/net/benet/be_ethtool.c b/drivers/net/benet/be_ethtool.c
index 6e5e433..540c1ea 100644
--- a/drivers/net/benet/be_ethtool.c
+++ b/drivers/net/benet/be_ethtool.c
@@ -712,6 +712,11 @@ be_read_eeprom(struct net_device *netdev, struct ethtool_eeprom *eeprom,
 	return status;
 }
 
+static int be_set_flags(struct net_device *dev, u32 data)
+{
+	return ethtool_op_set_flags(dev, data, ETH_FLAG_RXHASH);
+}
+
 const struct ethtool_ops be_ethtool_ops = {
 	.get_settings = be_get_settings,
 	.get_drvinfo = be_get_drvinfo,
@@ -739,4 +744,5 @@ const struct ethtool_ops be_ethtool_ops = {
 	.get_ethtool_stats = be_get_ethtool_stats,
 	.flash_device = be_do_flash,
 	.self_test = be_self_test,
+	.set_flags = be_set_flags,
 };



^ permalink raw reply related

* Re: pull request: wireless-next-2.6 2011-02-22
From: Gustavo F. Padovan @ 2011-02-25 19:36 UTC (permalink / raw)
  To: David Miller
  Cc: linville-2XuSBdqkA4R54TAoqtyWWQ,
	linux-wireless-u79uwXL29TY76Z2rM5mHXA,
	linux-bluetooth-u79uwXL29TY76Z2rM5mHXA,
	netdev-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20110225.111500.59674472.davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org>

Hi David,

* David Miller <davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org> [2011-02-25 11:15:00 -0800]:

> From: David Miller <davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org>
> Date: Thu, 24 Feb 2011 22:43:44 -0800 (PST)
> 
> > From: "John W. Linville" <linville-2XuSBdqkA4R54TAoqtyWWQ@public.gmane.org>
> > Date: Tue, 22 Feb 2011 16:52:30 -0500
> > 
> >> Here is the latest batch of wireless bits intended for 2.6.39.  It seems
> >> I neglected to send a pull request last week, so this one is a bit big
> >> -- I apologize!
> >> 
> >> This includes a rather large batch of bluetooth bits by way of Gustavo.
> >> It looks like a variety of bits, including some code refactoring, some
> >> protocol support enhancements, some bugfixes, etc. -- nothing too
> >> unusual.
> >> 
> >> Other items of interest include a new driver from Realtek, some ssb
> >> support enhancements, and the usual sort of updates for mac80211 and a
> >> variety of drivers.  Also included is a wireless-2.6 pull to resolve
> >> some build breakage.
> >> 
> >> Please let me know if there are problems!
> > 
> > Pulled, thanks a lot John.
> 
> John a few things:
> 
> 1) I had to add some vmalloc.h includes to fix the build on sparc64,
>    see commit b08cd667c4b6641c4d16a3f87f4550f81a6d69ac in net-next-2.6
> 
> 2) Something is screwey with the bluetooth config options now.
> 
>    I have an allmodconfig tree, and when I run "make oldconfig" after
>    this pull, BT_L2CAP and BT_SCO both prompt me, claiming that they
>    can only be built statically.
> 
>    I give it 'y' just to make it happen, for both, and afterways no
>    matter how many times I rerun "make oldconfig" I keep seeing things
>    like this in my build:
> 
> scripts/kconfig/conf --silentoldconfig Kconfig
> include/config/auto.conf:986:warning: symbol value 'm' invalid for BT_SCO
> include/config/auto.conf:3156:warning: symbol value 'm' invalid for BT_L2CAP
> 
>    First, what the heck is going on here?  Second, why the heck can't these
>    non-trivial pieces of code be built modular any more?

We now have L2CAP and SCO built-in in the main bluetooth.ko module.

> 
>    You can't make something "bool", have it depend on something that
>    might be modular, and then build it into what could in fact be a
>    module.  That's exactly what the bluetooth stuff seems to be doing
>    now.

Seems I did the Kconfig change wrong, I'll fix it ASAP and send it to you
guys.

-- 
Gustavo F. Padovan
http://profusion.mobi

^ permalink raw reply

* Re: [RFC] be2net: add rxhash support
From: Ajit Khaparde @ 2011-02-25 19:36 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: netdev

> -----Original Message-----
> From: Eric Dumazet [mailto:eric.dumazet@gmail.com]
> Sent: Friday, February 25, 2011 1:33 PM
> To: Khaparde, Ajit
> Cc: netdev@vger.kernel.org
> Subject: Re: [RFC] be2net: add rxhash support

> I added some traces, and I am not sure its OK :
> 
> With one active tcp flow, I got different rxhash values :
> 
> [ 1064.674253] rxhash=bbd37952 rsshp=1 bank=1
> [ 1064.738104] rxhash=37acd31d rsshp=1 bank=1
> [ 1064.741684] rxhash=bbd37952 rsshp=1 bank=1
> [ 1064.874283] rxhash=bbd37952 rsshp=1 bank=1
> [ 1064.940201] rxhash=bbd37952 rsshp=1 bank=1
> [ 1064.955278] rxhash=b668ace2 rsshp=1 bank=1
> [ 1065.080028] rxhash=bbd37952 rsshp=1 bank=1
> [ 1065.153360] rxhash=bbd37952 rsshp=1 bank=1
> [ 1065.293164] rxhash=bbd37952 rsshp=1 bank=1
> [ 1065.401862] rxhash=bbd37952 rsshp=1 bank=1
> [ 1065.460506] rxhash=bbd37952 rsshp=1 bank=1
> [ 1065.519980] rxhash=bbd37952 rsshp=1 bank=1
> [ 1065.650160] rxhash=bbd37952 rsshp=1 bank=1
> [ 1065.717585] rxhash=bbd37952 rsshp=1 bank=1
> [ 1065.730909] rxhash=37acd31d rsshp=1 bank=1
> [ 1065.840350] rxhash=bbd37952 rsshp=1 bank=1
> [ 1065.900704] rxhash=bbd37952 rsshp=1 bank=1
> [ 1065.931526] rxhash=b668ace2 rsshp=1 bank=1
> [ 1066.503657] rxhash=bbd37952 rsshp=1 bank=1
> [ 1066.570138] rxhash=bbd37952 rsshp=1 bank=1
> 
> How is it possible ?
> 
> (I have a VLAN config on top of a bonding)
> 
I'm looking at this..
There is no switch involved in your test, just back to back?

> 
> Also, Ajit, we need something to allow ethtool -K rxhash {on|off}
> 
> Something like (completely untested)

Yes. That is in the works.

Thanks
-Ajit

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox