Netdev List
 help / color / mirror / Atom feed
* [PATCH 2/9] sysctl: use ctl_header_cookie in proc_handler
From: Lucian Adrian Grijincu @ 2011-02-25 18:52 UTC (permalink / raw)
  To: David S. Miller, Alexey Dobriyan, Eric W. Biederman,
	Octavian Purdila, netdev
  Cc: Lucian Adrian Grijincu
In-Reply-To: <1298659961-23863-1-git-send-email-lucian.grijincu@gmail.com>

TODO: if this patch series gets a positive feedback this patch will be
extended with a kernel-wide change of each proc_handler to add a
'cookie' argument.

Signed-off-by: Lucian Adrian Grijincu <lucian.grijincu@gmail.com>
---
 fs/proc/proc_sysctl.c  |   11 ++++++++++-
 include/linux/sysctl.h |    3 +++
 2 files changed, 13 insertions(+), 1 deletions(-)

diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 09a1f92..85b6b75 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -135,6 +135,7 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
 	struct inode *inode = filp->f_path.dentry->d_inode;
 	struct ctl_table_header *head = grab_header(inode);
 	struct ctl_table *table = PROC_I(inode)->sysctl_entry;
+	proc_handler_cookie *phc = (proc_handler_cookie *) table->proc_handler;
 	ssize_t error;
 	size_t res;
 
@@ -156,7 +157,15 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
 
 	/* careful: calling conventions are nasty here */
 	res = count;
-	error = table->proc_handler(table, write, buf, &res, ppos);
+	/*XXX Most handlers only use the first 5 arguments (without
+	 *XXX @cookie). Changing all handlers is too much of work,
+	 *XXX as this is only a RFC patch at the moment.
+	 *XXX
+	 *XXX This is just a HACK for now, I did this this way to not
+	 *XXX waste time changing all the handlers, in the final version
+	 *XXX I'll change all the handlers if there's not other solution.
+	 */
+	error = phc(table, write, buf, &res, ppos, head->ctl_header_cookie);
 	if (!error)
 		error = res;
 out:
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 43fed29..3d21832 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -963,6 +963,9 @@ typedef struct ctl_table ctl_table;
 
 typedef int proc_handler (struct ctl_table *ctl, int write,
 			  void __user *buffer, size_t *lenp, loff_t *ppos);
+typedef int proc_handler_cookie(struct ctl_table *ctl, int write,
+				void __user *buffer, size_t *lenp,
+				loff_t *ppos, void *ctl_header_cookie);
 
 extern int proc_dostring(struct ctl_table *, int,
 			 void __user *, size_t *, loff_t *);
-- 
1.7.4.rc1.7.g2cf08.dirty

^ permalink raw reply related

* [PATCH 1/9] sysctl: add ctl_header_cookie
From: Lucian Adrian Grijincu @ 2011-02-25 18:52 UTC (permalink / raw)
  To: David S. Miller, Alexey Dobriyan, Eric W. Biederman,
	Octavian Purdila, netdev
  Cc: Lucian Adrian Grijincu
In-Reply-To: <1298659961-23863-1-git-send-email-lucian.grijincu@gmail.com>

Signed-off-by: Lucian Adrian Grijincu <lucian.grijincu@gmail.com>
---
 include/linux/sysctl.h |    5 ++++-
 kernel/sysctl.c        |   12 ++++++++----
 net/sysctl_net.c       |    6 +++---
 3 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 7bb5cb6..43fed29 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -1048,6 +1048,9 @@ struct ctl_table_header
 	struct ctl_table *attached_by;
 	struct ctl_table *attached_to;
 	struct ctl_table_header *parent;
+	/* Pointer to data that outlives this ctl_table_header.
+	 * Caller responsible to free the cookie. */
+	void *ctl_header_cookie;
 };
 
 /* struct ctl_path describes where in the hierarchy a table is added */
@@ -1058,7 +1061,7 @@ struct ctl_path {
 void register_sysctl_root(struct ctl_table_root *root);
 struct ctl_table_header *__register_sysctl_paths(
 	struct ctl_table_root *root, struct nsproxy *namespaces,
-	const struct ctl_path *path, struct ctl_table *table);
+	const struct ctl_path *path, struct ctl_table *table, void *cookie);
 struct ctl_table_header *register_sysctl_table(struct ctl_table * table);
 struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
 						struct ctl_table *table);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 0f1bd83..31fd587 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -199,6 +199,7 @@ static struct ctl_table_header root_table_header = {
 	.ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),
 	.root = &sysctl_table_root,
 	.set = &sysctl_table_root.default_set,
+	.ctl_header_cookie = NULL,
 };
 static struct ctl_table_root sysctl_table_root = {
 	.root_list = LIST_HEAD_INIT(sysctl_table_root.root_list),
@@ -1774,6 +1775,9 @@ static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q)
  * @namespaces: Data to compute which lists of sysctl entries are visible
  * @path: The path to the directory the sysctl table is in.
  * @table: the top-level table structure
+ * @cookie: Pointer to user provided data that must be accessible
+ *  until unregister_sysctl_table. This cookie will be passed to the
+ *  proc_handler.
  *
  * Register a sysctl table hierarchy. @table should be a filled in ctl_table
  * array. A completely 0 filled entry terminates the table.
@@ -1822,9 +1826,8 @@ static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q)
  * to the table header on success.
  */
 struct ctl_table_header *__register_sysctl_paths(
-	struct ctl_table_root *root,
-	struct nsproxy *namespaces,
-	const struct ctl_path *path, struct ctl_table *table)
+	struct ctl_table_root *root, struct nsproxy *namespaces,
+	const struct ctl_path *path, struct ctl_table *table, void *cookie)
 {
 	struct ctl_table_header *header;
 	struct ctl_table *new, **prevp;
@@ -1871,6 +1874,7 @@ struct ctl_table_header *__register_sysctl_paths(
 	header->root = root;
 	sysctl_set_parent(NULL, header->ctl_table);
 	header->count = 1;
+	header->ctl_header_cookie = cookie;
 #ifdef CONFIG_SYSCTL_SYSCALL_CHECK
 	if (sysctl_check_table(namespaces, header->ctl_table)) {
 		kfree(header);
@@ -1911,7 +1915,7 @@ struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
 						struct ctl_table *table)
 {
 	return __register_sysctl_paths(&sysctl_table_root, current->nsproxy,
-					path, table);
+				       path, table, NULL);
 }
 
 /**
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index ca84212..9dadd17 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -109,8 +109,8 @@ struct ctl_table_header *register_net_sysctl_table(struct net *net,
 	struct nsproxy namespaces;
 	namespaces = *current->nsproxy;
 	namespaces.net_ns = net;
-	return __register_sysctl_paths(&net_sysctl_root,
-					&namespaces, path, table);
+	return __register_sysctl_paths(&net_sysctl_root, &namespaces, path,
+				       table, NULL);
 }
 EXPORT_SYMBOL_GPL(register_net_sysctl_table);
 
@@ -118,7 +118,7 @@ struct ctl_table_header *register_net_sysctl_rotable(const
 		struct ctl_path *path, struct ctl_table *table)
 {
 	return __register_sysctl_paths(&net_sysctl_ro_root,
-			&init_nsproxy, path, table);
+				       &init_nsproxy, path, table, NULL);
 }
 EXPORT_SYMBOL_GPL(register_net_sysctl_rotable);
 
-- 
1.7.4.rc1.7.g2cf08.dirty

^ permalink raw reply related

* RFC v1: sysctl: add sysctl header cookie, share tables between nets
From: Lucian Adrian Grijincu @ 2011-02-25 18:52 UTC (permalink / raw)
  To: David S. Miller, Alexey Dobriyan, Eric W. Biederman,
	Octavian Purdila, netdev

This is a new approach to the "share sysctl tables" RFC series I
posted earlier this month.

In previous patches I proposed deriving 'struct net*' from the parent
ctl_entry's ->extra1 field, but that has seen opposition due to mixing
in information from the dentry cache/fs layers.

In this version, the clt_table_header is extended to hold a cookie at
creation time and pass it to the handlers. By default every
ctl_table_header that is netns specific will store the 'struct net*'
in the cookie.

I could go on with the patch series and share other ctl_tables between
network namespace in the same manner, but I stopped here to not waste
time on a solution that you do not consider applying for reasons I
don't see now.

If you like this, I'll post a full patch series:
* change proc_handler to accept a cookie
* change all proc_handler functions in the kernel to accept a cookie
* apply sysctl table sharing to other tables. Candidates would be:
  nf_conntrack_acct_init_sysctl, nf_conntrack_standalone_init_sysctl,
  unix_sysctl_register, but there may be others I'm not seeing now.

This series is against Linus's 2.6.38-rc6 (plus a few other patches).


 fs/proc/proc_sysctl.c       |   11 +++++++-
 include/linux/sysctl.h      |    8 +++++-
 include/net/ipv6.h          |    6 +---
 include/net/net_namespace.h |   26 ++++++++++++++++++
 kernel/sysctl.c             |   12 +++++---
 net/core/sysctl_net_core.c  |   28 ++-----------------
 net/ipv4/ip_fragment.c      |   34 ++++-------------------
 net/ipv4/route.c            |   36 +++++--------------------
 net/ipv4/sysctl_net_ipv4.c  |   53 ++++++-------------------------------
 net/ipv6/icmp.c             |   17 +----------
 net/ipv6/reassembly.c       |   34 ++++-------------------
 net/ipv6/route.c            |   54 ++++++++++---------------------------
 net/ipv6/sysctl_net_ipv6.c  |   61 +++++-------------------------------------
 net/sysctl_net.c            |   37 ++++++++++++++++++++++++--
 14 files changed, 143 insertions(+), 274 deletions(-)


 * [PATCH 1/9] sysctl: add ctl_header_cookie
 * [PATCH 2/9] sysctl: use ctl_header_cookie in proc_handler
 * [PATCH 3/9] sysctl: add netns_proc_dointvec and similar handlers
 * [PATCH 4/9] sysctl: ipv4: ipfrag: share ip4_frags_ns_ctl_table between nets
 * [PATCH 5/9] sysctl: net: share netns_core_table between nets
 * [PATCH 6/9] sysctl: route: share ipv4_route_flush_table between nets
 * [PATCH 7/9] sysctl: ipv4: share ipv4_net_table between nets
 * [PATCH 8/9] sysctl: ipv6: share ip6_frags_ns_ctl_table between nets
 * [PATCH 9/9] sysctl: ipv6: share ip6_ctl_table, ipv6_icmp_table and ipv6_route_table between nets

^ permalink raw reply

* Re: [PATCH ref0] net: add Faraday FTMAC100 10/100 Ethernet driver
From: Eric Dumazet @ 2011-02-25 18:47 UTC (permalink / raw)
  To: David Miller
  Cc: ratbert.chuang, netdev, linux-kernel, bhutchings, joe, dilinger,
	mirqus, ratbert, Ajit Khaparde
In-Reply-To: <1298659538.2659.103.camel@edumazet-laptop>

Le vendredi 25 février 2011 à 19:45 +0100, Eric Dumazet a écrit :
> Le vendredi 25 février 2011 à 10:34 -0800, David Miller a écrit :
> > From: Eric Dumazet <eric.dumazet@gmail.com>
> > Date: Fri, 25 Feb 2011 11:52:07 +0100
> > 
> > > Le vendredi 25 février 2011 à 17:45 +0800, Po-Yu Chuang a écrit :
> > > 
> > >> It's a little faster than v5 now. Thanks.
> > >> I will submit the current version later.
> > >> 
> > >> One more question just curious, why 128 bytes?
> > > 
> > > Probably its best for NIU hardware specs
> > > 
> > > You could try 64, as it should be enough for most IP/TCP/UDP processing.
> > 
> > IPV6.
> 
> drivers/net/benet/be.h:70:#define BE_HDR_LEN            64
> 
> Maybe we should have a comment somewhere.
> 
> CC Ajit Khaparde <ajit.khaparde@emulex.com>
> 


A compromise would be to use 128 for the allocation, but only copy 64
bytes.

^ permalink raw reply

* Re: [PATCH] don't allow CAP_NET_ADMIN to load non-netdev kernel modules
From: David Miller @ 2011-02-25 18:47 UTC (permalink / raw)
  To: segoon
  Cc: netdev, linux-kernel, kuznet, pekkas, jmorris, yoshfuji, kaber,
	eric.dumazet, therbert, xiaosuo, jesse
In-Reply-To: <20110225151414.GA5211@albatros>

From: Vasiliy Kulikov <segoon@openwall.com>
Date: Fri, 25 Feb 2011 18:14:14 +0300

> Since a8f80e8ff94ecba629542d9b4b5f5a8ee3eb565c any process with
> CAP_NET_ADMIN may load any module from /lib/modules/.  This doesn't mean
> that CAP_NET_ADMIN is a superset of CAP_SYS_MODULE as modules are limited
> to /lib/modules/**.  However, CAP_NET_ADMIN capability shouldn't allow
> anybody load any module not related to networking.

Why go through this naming change, which does break things, instead of
simply adding a capability mask tag or similar to modules somehow.  You
could stick it into a special elf section or similar.

Doesn't that make tons more sense than this?

^ permalink raw reply

* Re: [PATCH ref0] net: add Faraday FTMAC100 10/100 Ethernet driver
From: Eric Dumazet @ 2011-02-25 18:45 UTC (permalink / raw)
  To: David Miller
  Cc: ratbert.chuang, netdev, linux-kernel, bhutchings, joe, dilinger,
	mirqus, ratbert, Ajit Khaparde
In-Reply-To: <20110225.103456.226779149.davem@davemloft.net>

Le vendredi 25 février 2011 à 10:34 -0800, David Miller a écrit :
> From: Eric Dumazet <eric.dumazet@gmail.com>
> Date: Fri, 25 Feb 2011 11:52:07 +0100
> 
> > Le vendredi 25 février 2011 à 17:45 +0800, Po-Yu Chuang a écrit :
> > 
> >> It's a little faster than v5 now. Thanks.
> >> I will submit the current version later.
> >> 
> >> One more question just curious, why 128 bytes?
> > 
> > Probably its best for NIU hardware specs
> > 
> > You could try 64, as it should be enough for most IP/TCP/UDP processing.
> 
> IPV6.

drivers/net/benet/be.h:70:#define BE_HDR_LEN            64

Maybe we should have a comment somewhere.

CC Ajit Khaparde <ajit.khaparde@emulex.com>

^ permalink raw reply

* Re: via-rhine -- VT6105M and checksum offloading
From: Jan Ceuleers @ 2011-02-25 18:35 UTC (permalink / raw)
  To: Roger Luethi; +Cc: Benjamin LaHaise, David Miller, netdev
In-Reply-To: <20110225075303.GA8748@core.hellgate.ch>

On 25/02/11 08:53, Roger Luethi wrote:
> I have a patch to enable hw checksumming (the ethtool hooks are done, but I
> somehow missed the NETIF_F_GRO bit). Care to give it a whirl?

Can you post that, preferably rebased to net-next? Even if Benjamin 
doesn't get 'round to implementing all of the improvements Dave proposes 
perhaps Dave will be clement enough to apply it as-is if it proves to be 
a net positive?

Thanks, Jan

^ permalink raw reply

* Re: [PATCH ref0] net: add Faraday FTMAC100 10/100 Ethernet driver
From: David Miller @ 2011-02-25 18:34 UTC (permalink / raw)
  To: eric.dumazet
  Cc: ratbert.chuang, netdev, linux-kernel, bhutchings, joe, dilinger,
	mirqus, ratbert
In-Reply-To: <1298631127.2659.22.camel@edumazet-laptop>

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 25 Feb 2011 11:52:07 +0100

> Le vendredi 25 février 2011 à 17:45 +0800, Po-Yu Chuang a écrit :
> 
>> It's a little faster than v5 now. Thanks.
>> I will submit the current version later.
>> 
>> One more question just curious, why 128 bytes?
> 
> Probably its best for NIU hardware specs
> 
> You could try 64, as it should be enough for most IP/TCP/UDP processing.

IPV6.

^ permalink raw reply

* Re: [RFC] be2net: add rxhash support
From: Eric Dumazet @ 2011-02-25 18:21 UTC (permalink / raw)
  To: Ajit Khaparde; +Cc: netdev
In-Reply-To: <20110225174425.GA11203@akhaparde-VBox>

Le vendredi 25 février 2011 à 11:44 -0600, Ajit Khaparde a écrit :
> > -----Original Message-----
> > From: Eric Dumazet <eric.dumazet@gmail.com>
> > Sent: Thursday, February 24, 2011 2:25 PM
> > To: Khaparde, Ajit
> > Cc: netdev@vger.kernel.org
> > Subject: [RFC] be2net: add rxhash support
> > 
> > Ajit, it seems be2net provides RSS hash value in rx compl descriptor ?
> > 
> > Could we feed skb->rxhash with it ?
> > 
> > Thanks !
> Thanks Eric. Sure.
> This is a long pending change which fell through the cracks.
> But then because hashing is enabled in the device only when
> Number of Rx Queues is > 1, I would suggest the following patch.
> 
> Unaware of exact conventions, I have added signed-off-by to the patch already.
> 
> Thanks
> 
> -----
> [PATCH net-next] be2net: add rxhash support
> 
> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
> Signed-off-by: Ajit Khaparde <ajit.khaparde@emulex.com>
> ---
>  drivers/net/benet/be_main.c |   11 +++++++++++
>  1 files changed, 11 insertions(+), 0 deletions(-)
> 
> diff --git a/drivers/net/benet/be_main.c b/drivers/net/benet/be_main.c
> index 26f9c56..8c4b782 100644
> --- a/drivers/net/benet/be_main.c
> +++ b/drivers/net/benet/be_main.c
> @@ -1038,6 +1038,10 @@ static void be_rx_compl_process(struct be_adapter *adapter,
>  
>  	skb->truesize = skb->len + sizeof(struct sk_buff);
>  	skb->protocol = eth_type_trans(skb, adapter->netdev);
> +	if (adapter->netdev->features & NETIF_F_RXHASH)
> +		skb->rxhash = AMAP_GET_BITS(struct amap_eth_rx_compl,
> +					rsshash, rxcp);
> +
>  
>  	vlanf = AMAP_GET_BITS(struct amap_eth_rx_compl, vtp, rxcp);
>  	vtm = AMAP_GET_BITS(struct amap_eth_rx_compl, vtm, rxcp);
> @@ -1099,6 +1103,10 @@ static void be_rx_compl_process_gro(struct be_adapter *adapter,
>  		return;
>  	}
>  
> +	if (adapter->netdev->features & NETIF_F_RXHASH)
> +		skb->rxhash = AMAP_GET_BITS(struct amap_eth_rx_compl,
> +						rsshash, rxcp);
> +
>  	remaining = pkt_size;
>  	for (i = 0, j = -1; i < num_rcvd; i++) {
>  		page_info = get_rx_page_info(adapter, rxo, rxq_idx);
> @@ -2619,6 +2627,9 @@ static void be_netdev_init(struct net_device *netdev)
>  		NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
>  		NETIF_F_GRO | NETIF_F_TSO6;
>  
> +	if (be_multi_rxq(adapter))
> +		netdev->features |= NETIF_F_RXHASH;
> +
>  	netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO |
>  		NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
>  


I added some traces, and I am not sure its OK :

With one active tcp flow, I got different rxhash values :

[ 1064.674253] rxhash=bbd37952 rsshp=1 bank=1
[ 1064.738104] rxhash=37acd31d rsshp=1 bank=1
[ 1064.741684] rxhash=bbd37952 rsshp=1 bank=1
[ 1064.874283] rxhash=bbd37952 rsshp=1 bank=1
[ 1064.940201] rxhash=bbd37952 rsshp=1 bank=1
[ 1064.955278] rxhash=b668ace2 rsshp=1 bank=1
[ 1065.080028] rxhash=bbd37952 rsshp=1 bank=1
[ 1065.153360] rxhash=bbd37952 rsshp=1 bank=1
[ 1065.293164] rxhash=bbd37952 rsshp=1 bank=1
[ 1065.401862] rxhash=bbd37952 rsshp=1 bank=1
[ 1065.460506] rxhash=bbd37952 rsshp=1 bank=1
[ 1065.519980] rxhash=bbd37952 rsshp=1 bank=1
[ 1065.650160] rxhash=bbd37952 rsshp=1 bank=1
[ 1065.717585] rxhash=bbd37952 rsshp=1 bank=1
[ 1065.730909] rxhash=37acd31d rsshp=1 bank=1
[ 1065.840350] rxhash=bbd37952 rsshp=1 bank=1
[ 1065.900704] rxhash=bbd37952 rsshp=1 bank=1
[ 1065.931526] rxhash=b668ace2 rsshp=1 bank=1
[ 1066.503657] rxhash=bbd37952 rsshp=1 bank=1
[ 1066.570138] rxhash=bbd37952 rsshp=1 bank=1

How is it possible ?

(I have a VLAN config on top of a bonding)




^ permalink raw reply

* RE: Occasional link flap on Intel 82599 on boot in XAUI mode
From: Skidmore, Donald C @ 2011-02-25 18:11 UTC (permalink / raw)
  To: Brent Cook, netdev@vger.kernel.org
In-Reply-To: <201102251011.42400.bcook@breakingpoint.com>

>-----Original Message-----
>From: netdev-owner@vger.kernel.org [mailto:netdev-owner@vger.kernel.org]
>On Behalf Of Brent Cook
>Sent: Friday, February 25, 2011 8:12 AM
>To: netdev@vger.kernel.org
>Subject: Occasional link flap on Intel 82599 on boot in XAUI mode
>
>We have a custom system with dual 82599's in XAUI mode. One has its pair
>of ports connected to a 10G switch, the other has its pair of ports
>connected to an FPGA.
>
>Occasionally, on any of the interfaces, we will see the links flapping
>up and down when the system initially boots. This happens maybe one in
>20 boots.
>
>Feb 23 14:58:10 mfg kernel: [  594.254977] ixgbe: eth1 NIC Link is Down
>Feb 23 14:58:12 mfg kernel: [  596.230039] ixgbe: eth1 NIC Link is Up 10
>Gbps, Flow Control: RX/TX
>Feb 23 14:58:12 mfg kernel: [  596.256537] ixgbe: eth1 NIC Link is Down
>Feb 23 14:58:16 mfg kernel: [  600.228096] ixgbe: eth1 NIC Link is Up 10
>Gbps, Flow Control: RX/TX
>Feb 23 14:58:16 mfg kernel: [  600.240135] ixgbe: eth1 NIC Link is Down
>Feb 23 14:58:18 mfg kernel: [  602.227047] ixgbe: eth1 NIC Link is Up 10
>Gbps, Flow Control: RX/TX
>
>Simply forcing a down/up on the interface seems to correct the problem:
>
>ip link set eth1 down
>ip link set eth1 up
>
>Is anyone else using XAUI mode and has seen this? Here is the kernel
>information that we are using currently:
>
>Linux sprint.labnet.local 2.6.32.28-bps #1 SMP PREEMPT Mon Jan 31
>16:05:50 CST 2011 x86_64 GNU/Linux
>
>Feb 23 14:48:24 mfg kernel: [    3.488664] Intel(R) PRO/1000 Network
>Driver - version 7.3.21-k5-NAPI
>Feb 23 14:48:24 mfg kernel: [    3.495082] Copyright (c) 1999-2006 Intel
>Corporation.
>Feb 23 14:48:24 mfg kernel: [    3.500238] e1000e: Intel(R) PRO/1000
>Network Driver - 1.0.2-k2
>Feb 23 14:48:24 mfg kernel: [    3.506138] e1000e: Copyright (c) 1999-
>2008 Intel Corporation.
>Feb 23 14:48:24 mfg kernel: [    3.511988] Intel(R) Gigabit Ethernet
>Network Driver - version 1.3.16-k2
>Feb 23 14:48:24 mfg kernel: [    3.518666] Copyright (c) 2007-2009 Intel
>Corporation.
>Feb 23 14:48:24 mfg kernel: [    3.523817] ixgbe: Intel(R) 10 Gigabit
>PCI Express Network Driver - version 2.0.44-k2
>Feb 23 14:48:24 mfg kernel: [    3.531622] ixgbe: Copyright (c) 1999-
>2009 Intel Corporation.
>Feb 23 14:48:24 mfg kernel: [    3.537365] ixgbe 0000:01:00.0: PCI INT A
>-> GSI 24 (level, low) -> IRQ 24
>Feb 23 14:48:24 mfg kernel: [    3.632934] ixgbe: 0000:01:00.0:
>ixgbe_init_interrupt_scheme: Multiqueue Enabled: Rx Queue count = 8, Tx
>Queue count = 8
>Feb 23 14:48:24 mfg kernel: [    3.643769] ixgbe 0000:01:00.0: (PCI
>Express:5.0Gb/s:Width x8) 00:12:34:56:78:67
>Feb 23 14:48:24 mfg kernel: [    3.651221] ixgbe 0000:01:00.0: MAC: 2,
>PHY: 0, PBA No: ffffff-0ff
>Feb 23 14:48:24 mfg kernel: [    3.669390] ixgbe 0000:01:00.0: Intel(R)
>10 Gigabit Network Connection
>Feb 23 14:48:24 mfg kernel: [    3.675907] ixgbe 0000:01:00.1: PCI INT B
>-> GSI 47 (level, low) -> IRQ 47
>Feb 23 14:48:24 mfg kernel: [    3.771863] ixgbe: 0000:01:00.1:
>ixgbe_init_interrupt_scheme: Multiqueue Enabled: Rx Queue count = 8, Tx
>Queue count = 8
>Feb 23 14:48:24 mfg kernel: [    3.782699] ixgbe 0000:01:00.1: (PCI
>Express:5.0Gb/s:Width x8) 00:12:34:56:78:64
>Feb 23 14:48:24 mfg kernel: [    3.790151] ixgbe 0000:01:00.1: MAC: 2,
>PHY: 0, PBA No: ffffff-0ff
>Feb 23 14:48:24 mfg kernel: [    3.808314] ixgbe 0000:01:00.1: Intel(R)
>10 Gigabit Network Connection
>Feb 23 14:48:24 mfg kernel: [    3.814831] ixgbe 0000:02:00.0: PCI INT A
>-> GSI 35 (level, low) -> IRQ 35
>Feb 23 14:48:24 mfg kernel: [    3.910802] ixgbe: 0000:02:00.0:
>ixgbe_init_interrupt_scheme: Multiqueue Enabled: Rx Queue count = 8, Tx
>Queue count = 8
>Feb 23 14:48:24 mfg kernel: [    3.921636] ixgbe 0000:02:00.0: (PCI
>Express:5.0Gb/s:Width x8) 00:12:34:56:78:52
>Feb 23 14:48:24 mfg kernel: [    3.929087] ixgbe 0000:02:00.0: MAC: 2,
>PHY: 0, PBA No: ffffff-0ff
>Feb 23 14:48:24 mfg kernel: [    3.947246] ixgbe 0000:02:00.0: Intel(R)
>10 Gigabit Network Connection
>Feb 23 14:48:24 mfg kernel: [    3.953761] ixgbe 0000:02:00.1: PCI INT B
>-> GSI 36 (level, low) -> IRQ 36
>Feb 23 14:48:24 mfg kernel: [    4.049731] ixgbe: 0000:02:00.1:
>ixgbe_init_interrupt_scheme: Multiqueue Enabled: Rx Queue count = 8, Tx
>Queue count = 8
>Feb 23 14:48:24 mfg kernel: [    4.060566] ixgbe 0000:02:00.1: (PCI
>Express:5.0Gb/s:Width x8) 00:12:34:56:78:53
>Feb 23 14:48:24 mfg kernel: [    4.068018] ixgbe 0000:02:00.1: MAC: 2,
>PHY: 0, PBA No: ffffff-0ff
>Feb 23 14:48:24 mfg kernel: [    4.086179] ixgbe 0000:02:00.1: Intel(R)
>10 Gigabit Network Connection
>--
>To unsubscribe from this list: send the line "unsubscribe netdev" in
>the body of a message to majordomo@vger.kernel.org
>More majordomo info at  http://vger.kernel.org/majordomo-info.html

By any chance have you tried it with a newer driver?  Latest Source Forge is 3.2.9.

Also do you only see this link flap on boot, can you recreate the flap by unload and loading the ixgbe module?

Thanks,
-Don Skidmore <donald.c.skidmore@intel.com>

^ permalink raw reply

* Re: [PATCH] don't allow CAP_NET_ADMIN to load non-netdev kernel modules
From: Ben Hutchings @ 2011-02-25 17:48 UTC (permalink / raw)
  To: Valdis.Kletnieks
  Cc: Vasiliy Kulikov, David S. Miller, netdev, linux-kernel,
	Alexey Kuznetsov, Pekka Savola (ipv6), James Morris,
	Hideaki YOSHIFUJI, Patrick McHardy, Eric Dumazet, Tom Herbert,
	Changli Gao, Jesse Gross
In-Reply-To: <135187.1298654740@localhost>

On Fri, 2011-02-25 at 12:25 -0500, Valdis.Kletnieks@vt.edu wrote:
> On Fri, 25 Feb 2011 18:14:14 +0300, Vasiliy Kulikov said:
> > Since a8f80e8ff94ecba629542d9b4b5f5a8ee3eb565c any process with
> > CAP_NET_ADMIN may load any module from /lib/modules/.  This doesn't mean
> > that CAP_NET_ADMIN is a superset of CAP_SYS_MODULE as modules are limited
> > to /lib/modules/**.  However, CAP_NET_ADMIN capability shouldn't allow
> > anybody load any module not related to networking.
> > 
> > This patch restricts an ability of autoloading modules to netdev modules
> > with explicit aliases.  Currently there are only three users of the
> > feature: ipip, ip_gre and sit.
> 
> And you stop an attacker from simply recompiling the module with a suitable
> MODULE_ALIAS line added, how, exactly?  This patch may make sense down the
> road, but not while it's still trivial for a malicious root user to drop stuff
> into /lib/modules.

A process running as root normally has CAP_NET_ADMIN, but not every
process with CAP_NET_ADMIN will be running as root.

> And if you're going the route "but SELinux/SMACK/Tomoyo will prevent a malicious
> root user from doing that", then the obvious reply is "this should be part of those
> subsystems rather than something done one-off like this (especially as it has a chance
> of breaking legitimate setups that use the current scheme).

The notional attacker has CAP_NET_ADMIN, perhaps through a vulnerable
service or a vulnerable set-capability executable.  They do not yet have
full root access and so cannot install a module, even in the absence of
an LSM.

So long as the attacker is able to load arbitrary modules, however, they
could exploit a vulnerability in any installed (not loaded) module.
Again, LSMs are irrelevant to this as they do not protect against kernel
bugs.

Ben.

-- 
Ben Hutchings, Senior Software Engineer, Solarflare Communications
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.

^ permalink raw reply

* Re: [PATCH] don't allow CAP_NET_ADMIN to load non-netdev kernel modules
From: Vasiliy Kulikov @ 2011-02-25 17:47 UTC (permalink / raw)
  To: Valdis.Kletnieks
  Cc: David S. Miller, netdev, linux-kernel, Alexey Kuznetsov,
	Pekka Savola (ipv6), James Morris, Hideaki YOSHIFUJI,
	Patrick McHardy, Eric Dumazet, Tom Herbert, Changli Gao,
	Jesse Gross
In-Reply-To: <135187.1298654740@localhost>

On Fri, Feb 25, 2011 at 12:25 -0500, Valdis.Kletnieks@vt.edu wrote:
> And you stop an attacker from simply recompiling the module with a suitable
> MODULE_ALIAS line added, how, exactly?  This patch may make sense down the
> road, but not while it's still trivial for a malicious root user to drop stuff
> into /lib/modules.

The threat is not a malicious root, but non-root with CAP_NET_ADMIN.
It's hardly possible to load arbitrary module into the kernel having
CAP_NET_ADMIN without other capabilities.

> And if you're going the route "but SELinux/SMACK/Tomoyo will prevent a malicious
> root user from doing that", then the obvious reply is "this should be part of those
> subsystems rather than something done one-off like this (especially as it has a chance
> of breaking legitimate setups that use the current scheme).

No, I don't want to add anything about LSMs at all.


Thanks,

-- 
Vasiliy Kulikov
http://www.openwall.com - bringing security into open computing environments

^ permalink raw reply

* Re: [RFC] be2net: add rxhash support
From: Ajit Khaparde @ 2011-02-25 17:44 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: netdev

> -----Original Message-----
> From: Eric Dumazet <eric.dumazet@gmail.com>
> Sent: Thursday, February 24, 2011 2:25 PM
> To: Khaparde, Ajit
> Cc: netdev@vger.kernel.org
> Subject: [RFC] be2net: add rxhash support
> 
> Ajit, it seems be2net provides RSS hash value in rx compl descriptor ?
> 
> Could we feed skb->rxhash with it ?
> 
> Thanks !
Thanks Eric. Sure.
This is a long pending change which fell through the cracks.
But then because hashing is enabled in the device only when
Number of Rx Queues is > 1, I would suggest the following patch.

Unaware of exact conventions, I have added signed-off-by to the patch already.

Thanks

-----
[PATCH net-next] be2net: add rxhash support

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Ajit Khaparde <ajit.khaparde@emulex.com>
---
 drivers/net/benet/be_main.c |   11 +++++++++++
 1 files changed, 11 insertions(+), 0 deletions(-)

diff --git a/drivers/net/benet/be_main.c b/drivers/net/benet/be_main.c
index 26f9c56..8c4b782 100644
--- a/drivers/net/benet/be_main.c
+++ b/drivers/net/benet/be_main.c
@@ -1038,6 +1038,10 @@ static void be_rx_compl_process(struct be_adapter *adapter,
 
 	skb->truesize = skb->len + sizeof(struct sk_buff);
 	skb->protocol = eth_type_trans(skb, adapter->netdev);
+	if (adapter->netdev->features & NETIF_F_RXHASH)
+		skb->rxhash = AMAP_GET_BITS(struct amap_eth_rx_compl,
+					rsshash, rxcp);
+
 
 	vlanf = AMAP_GET_BITS(struct amap_eth_rx_compl, vtp, rxcp);
 	vtm = AMAP_GET_BITS(struct amap_eth_rx_compl, vtm, rxcp);
@@ -1099,6 +1103,10 @@ static void be_rx_compl_process_gro(struct be_adapter *adapter,
 		return;
 	}
 
+	if (adapter->netdev->features & NETIF_F_RXHASH)
+		skb->rxhash = AMAP_GET_BITS(struct amap_eth_rx_compl,
+						rsshash, rxcp);
+
 	remaining = pkt_size;
 	for (i = 0, j = -1; i < num_rcvd; i++) {
 		page_info = get_rx_page_info(adapter, rxo, rxq_idx);
@@ -2619,6 +2627,9 @@ static void be_netdev_init(struct net_device *netdev)
 		NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
 		NETIF_F_GRO | NETIF_F_TSO6;
 
+	if (be_multi_rxq(adapter))
+		netdev->features |= NETIF_F_RXHASH;
+
 	netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO |
 		NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
 
-- 
1.7.1

^ permalink raw reply related

* Re: [PATCH] don't allow CAP_NET_ADMIN to load non-netdev kernel modules
From: Valdis.Kletnieks @ 2011-02-25 17:25 UTC (permalink / raw)
  To: Vasiliy Kulikov
  Cc: David S. Miller, netdev, linux-kernel, Alexey Kuznetsov,
	Pekka Savola (ipv6), James Morris, Hideaki YOSHIFUJI,
	Patrick McHardy, Eric Dumazet, Tom Herbert, Changli Gao,
	Jesse Gross
In-Reply-To: <20110225151414.GA5211@albatros>

[-- Attachment #1: Type: text/plain, Size: 1145 bytes --]

On Fri, 25 Feb 2011 18:14:14 +0300, Vasiliy Kulikov said:
> Since a8f80e8ff94ecba629542d9b4b5f5a8ee3eb565c any process with
> CAP_NET_ADMIN may load any module from /lib/modules/.  This doesn't mean
> that CAP_NET_ADMIN is a superset of CAP_SYS_MODULE as modules are limited
> to /lib/modules/**.  However, CAP_NET_ADMIN capability shouldn't allow
> anybody load any module not related to networking.
> 
> This patch restricts an ability of autoloading modules to netdev modules
> with explicit aliases.  Currently there are only three users of the
> feature: ipip, ip_gre and sit.

And you stop an attacker from simply recompiling the module with a suitable
MODULE_ALIAS line added, how, exactly?  This patch may make sense down the
road, but not while it's still trivial for a malicious root user to drop stuff
into /lib/modules.

And if you're going the route "but SELinux/SMACK/Tomoyo will prevent a malicious
root user from doing that", then the obvious reply is "this should be part of those
subsystems rather than something done one-off like this (especially as it has a chance
of breaking legitimate setups that use the current scheme).

[-- Attachment #2: Type: application/pgp-signature, Size: 227 bytes --]

^ permalink raw reply

* Re: [PATCH] udp: avoid searching when no ports are available
From: Eric Dumazet @ 2011-02-25 16:55 UTC (permalink / raw)
  To: Daniel Baluta; +Cc: netdev, davem, Rohan Chitradurga
In-Reply-To: <AANLkTimodVFC8969md-f7iebS_9WvVahV9fPFFLtup73@mail.gmail.com>

Le vendredi 25 février 2011 à 18:45 +0200, Daniel Baluta a écrit :
> I guess now, the correct bitmap size is MAX_UDP_PORTS / (udptable->mask + 1)
> or  MAX_UDP_PORTS >> udptable->log,  right?
> 


Yes, but using bitmap_zero(bitmap, PORTS_PER_CHAIN) is faster.

It generates 4 machine instructions,
	movq   $0x0,(%r10)
	movq   $0x0,0x8(%r10)
	movq   $0x0,0x10(%r10)
	movq   $0x0,0x18(%r10)

while bitmap_zero(bitmap, some_non_constant_expression) is more
expensive (it calls an out of line function)


> We don't have special needs on a small machine. We just want that when
> when all UDP ports are exhausted, bind calls to fail faster.
> 
> I will be back with tests on latest kernel.

Hmm, please always use the latest kernel before sending patches.

Thanks



^ permalink raw reply

* Re: [PATCH] udp: avoid searching when no ports are available
From: Daniel Baluta @ 2011-02-25 16:45 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: netdev, davem, Rohan Chitradurga
In-Reply-To: <1298635575.2659.65.camel@edumazet-laptop>

On Fri, Feb 25, 2011 at 2:06 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> Le vendredi 25 février 2011 à 13:35 +0200, Daniel Baluta a écrit :
>> udp_lib_get_port uses a bitmap to mark used ports.
>>
>> When no ports are available we spend a lot of time, searching
>> for a port while holding hslot lock. Avoid this by checking if
>> bitmap is full.
>>
>>
>> Signed-off-by: Rohan Chitradurga <rohan@ixiacom.com>
>> Signed-off-by: Daniel Baluta <dbaluta@ixiacom.com>
>> ---
>>  net/ipv4/udp.c |    6 ++++++
>>  1 files changed, 6 insertions(+), 0 deletions(-)
>>
>> diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
>> index d37baaa..3e3592d 100644
>> --- a/net/ipv4/udp.c
>> +++ b/net/ipv4/udp.c
>> @@ -225,6 +225,12 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
>>                       udp_lib_lport_inuse(net, snum, hslot, bitmap, sk,
>>                                           saddr_comp, udptable->log);
>>
>> +                     /* avoid searching when no ports are available */
>> +                     if (bitmap_full(bitmap, PORTS_PER_CHAIN)) {
>> +                             spin_unlock_bh(&hslot->lock);
>> +                             break;
>> +                     }
>> +
>>                       snum = first;
>>                       /*
>>                        * Iterate on all possible values of snum for this hash.
>
> Really ? I wonder how you got your performance numbers then.
>
> First, PORTS_PER_CHAIN is wrong here, since its value is the max
> possible value (256 bits)

You are right. I have been working/testing on 2.6.32 where:

#define PORTS_PER_CHAIN (65536 / UDP_HTABLE_SIZE)

and I thought that the latest kernel has the same meaning
for PORTS_PER_CHAIN.

>
> #define UDP_HTABLE_SIZE_MIN              (CONFIG_BASE_SMALL ? 128 : 256)
> #define MAX_UDP_PORTS 65536
> #define PORTS_PER_CHAIN (MAX_UDP_PORTS / UDP_HTABLE_SIZE_MIN)   -> 256
>
> As soon as your machine (and most current machines have) has enough
> memory, UDP hash table size is not 256, but 1024 or 2048
>
> dmesg | grep "UDP hash"
> [    1.735203] UDP hash table entries: 2048 (order: 6, 327680 bytes)
>
> So real bitmap size is 64 or 32 bits.
>
> Your call to bitmap_full() always return false.

I guess now, the correct bitmap size is MAX_UDP_PORTS / (udptable->mask + 1)
or  MAX_UDP_PORTS >> udptable->log,  right?

> I dont like this patch. If you have special UDP needs on a small
> machine, just add to kernel boot param "uhash_entries=8192", so that the
> bitmap has 8 bits only.

We don't have special needs on a small machine. We just want that when
when all UDP ports are exhausted, bind calls to fail faster.

I will be back with tests on latest kernel.

thanks,
Daniel.

^ permalink raw reply

* Occasional link flap on Intel 82599 on boot in XAUI mode
From: Brent Cook @ 2011-02-25 16:11 UTC (permalink / raw)
  To: netdev

We have a custom system with dual 82599's in XAUI mode. One has its pair of ports connected to a 10G switch, the other has its pair of ports connected to an FPGA.

Occasionally, on any of the interfaces, we will see the links flapping up and down when the system initially boots. This happens maybe one in 20 boots.

Feb 23 14:58:10 mfg kernel: [  594.254977] ixgbe: eth1 NIC Link is Down
Feb 23 14:58:12 mfg kernel: [  596.230039] ixgbe: eth1 NIC Link is Up 10 Gbps, Flow Control: RX/TX
Feb 23 14:58:12 mfg kernel: [  596.256537] ixgbe: eth1 NIC Link is Down
Feb 23 14:58:16 mfg kernel: [  600.228096] ixgbe: eth1 NIC Link is Up 10 Gbps, Flow Control: RX/TX
Feb 23 14:58:16 mfg kernel: [  600.240135] ixgbe: eth1 NIC Link is Down
Feb 23 14:58:18 mfg kernel: [  602.227047] ixgbe: eth1 NIC Link is Up 10 Gbps, Flow Control: RX/TX

Simply forcing a down/up on the interface seems to correct the problem:

ip link set eth1 down
ip link set eth1 up

Is anyone else using XAUI mode and has seen this? Here is the kernel information that we are using currently:

Linux sprint.labnet.local 2.6.32.28-bps #1 SMP PREEMPT Mon Jan 31 16:05:50 CST 2011 x86_64 GNU/Linux

Feb 23 14:48:24 mfg kernel: [    3.488664] Intel(R) PRO/1000 Network Driver - version 7.3.21-k5-NAPI
Feb 23 14:48:24 mfg kernel: [    3.495082] Copyright (c) 1999-2006 Intel Corporation.
Feb 23 14:48:24 mfg kernel: [    3.500238] e1000e: Intel(R) PRO/1000 Network Driver - 1.0.2-k2
Feb 23 14:48:24 mfg kernel: [    3.506138] e1000e: Copyright (c) 1999-2008 Intel Corporation.
Feb 23 14:48:24 mfg kernel: [    3.511988] Intel(R) Gigabit Ethernet Network Driver - version 1.3.16-k2
Feb 23 14:48:24 mfg kernel: [    3.518666] Copyright (c) 2007-2009 Intel Corporation.
Feb 23 14:48:24 mfg kernel: [    3.523817] ixgbe: Intel(R) 10 Gigabit PCI Express Network Driver - version 2.0.44-k2
Feb 23 14:48:24 mfg kernel: [    3.531622] ixgbe: Copyright (c) 1999-2009 Intel Corporation.
Feb 23 14:48:24 mfg kernel: [    3.537365] ixgbe 0000:01:00.0: PCI INT A -> GSI 24 (level, low) -> IRQ 24
Feb 23 14:48:24 mfg kernel: [    3.632934] ixgbe: 0000:01:00.0: ixgbe_init_interrupt_scheme: Multiqueue Enabled: Rx Queue count = 8, Tx Queue count = 8
Feb 23 14:48:24 mfg kernel: [    3.643769] ixgbe 0000:01:00.0: (PCI Express:5.0Gb/s:Width x8) 00:12:34:56:78:67
Feb 23 14:48:24 mfg kernel: [    3.651221] ixgbe 0000:01:00.0: MAC: 2, PHY: 0, PBA No: ffffff-0ff
Feb 23 14:48:24 mfg kernel: [    3.669390] ixgbe 0000:01:00.0: Intel(R) 10 Gigabit Network Connection
Feb 23 14:48:24 mfg kernel: [    3.675907] ixgbe 0000:01:00.1: PCI INT B -> GSI 47 (level, low) -> IRQ 47
Feb 23 14:48:24 mfg kernel: [    3.771863] ixgbe: 0000:01:00.1: ixgbe_init_interrupt_scheme: Multiqueue Enabled: Rx Queue count = 8, Tx Queue count = 8
Feb 23 14:48:24 mfg kernel: [    3.782699] ixgbe 0000:01:00.1: (PCI Express:5.0Gb/s:Width x8) 00:12:34:56:78:64
Feb 23 14:48:24 mfg kernel: [    3.790151] ixgbe 0000:01:00.1: MAC: 2, PHY: 0, PBA No: ffffff-0ff
Feb 23 14:48:24 mfg kernel: [    3.808314] ixgbe 0000:01:00.1: Intel(R) 10 Gigabit Network Connection
Feb 23 14:48:24 mfg kernel: [    3.814831] ixgbe 0000:02:00.0: PCI INT A -> GSI 35 (level, low) -> IRQ 35
Feb 23 14:48:24 mfg kernel: [    3.910802] ixgbe: 0000:02:00.0: ixgbe_init_interrupt_scheme: Multiqueue Enabled: Rx Queue count = 8, Tx Queue count = 8
Feb 23 14:48:24 mfg kernel: [    3.921636] ixgbe 0000:02:00.0: (PCI Express:5.0Gb/s:Width x8) 00:12:34:56:78:52
Feb 23 14:48:24 mfg kernel: [    3.929087] ixgbe 0000:02:00.0: MAC: 2, PHY: 0, PBA No: ffffff-0ff
Feb 23 14:48:24 mfg kernel: [    3.947246] ixgbe 0000:02:00.0: Intel(R) 10 Gigabit Network Connection
Feb 23 14:48:24 mfg kernel: [    3.953761] ixgbe 0000:02:00.1: PCI INT B -> GSI 36 (level, low) -> IRQ 36
Feb 23 14:48:24 mfg kernel: [    4.049731] ixgbe: 0000:02:00.1: ixgbe_init_interrupt_scheme: Multiqueue Enabled: Rx Queue count = 8, Tx Queue count = 8
Feb 23 14:48:24 mfg kernel: [    4.060566] ixgbe 0000:02:00.1: (PCI Express:5.0Gb/s:Width x8) 00:12:34:56:78:53
Feb 23 14:48:24 mfg kernel: [    4.068018] ixgbe 0000:02:00.1: MAC: 2, PHY: 0, PBA No: ffffff-0ff
Feb 23 14:48:24 mfg kernel: [    4.086179] ixgbe 0000:02:00.1: Intel(R) 10 Gigabit Network Connection

^ permalink raw reply

* Re: module loading with CAP_NET_ADMIN
From: Vasiliy Kulikov @ 2011-02-25 15:57 UTC (permalink / raw)
  To: Michael Tokarev
  Cc: Ben Hutchings, netdev, linux-kernel, Kees Cook, Eugene Teo,
	Dan Rosenberg, David S. Miller
In-Reply-To: <4D67CAD7.7060408@msgid.tls.msk.ru>

On Fri, Feb 25, 2011 at 18:29 +0300, Michael Tokarev wrote:
> 25.02.2011 15:30, Vasiliy Kulikov wrote:
> > On Thu, Feb 24, 2011 at 16:34 +0000, Ben Hutchings wrote:
> >> On Thu, 2011-02-24 at 18:12 +0300, Vasiliy Kulikov wrote:
> >>> My proposal is changing request_module("%s", name) to something like
> >>> request_module("netdev-%s", name) inside of dev_load() and adding
> >>> aliases to related drivers.
> 
> It is not the kernel patching which we should worry about, kernel
> part is trivial.
> 
> What is not trivial is to patch all the systems out there who
> autoloads network drivers based on /etc/modprobe.d/network-aliases.conf
> (some local file), ie, numerous working setups which already
> uses this mechanism since stone age.  And patching these is
> not trivial at all, unfortunately.
> 
> Somewhat weird setups (one can load the modules explicitly, and
> nowadays this all is handled by udev anyway), but this change
> will break some working systems.
> 
> Maybe the cost (some pain for some users) isn't large enough
> but the outcome is good, and I think it _is_ good, but it needs
> some wider discussion first, imho.
> 
> I can't think of a way to handle this without breaking stuff.

Currently Linux slowly moves in the direction of rootless systems.  This
definitely need proper restrictions of CAP_* power.  Network admin does
nothing with general modules.  It _has_ to break something one day
because old assumptions about permission stuff don't conform CAP_*
things: old assumptions are very closely connected with just everything.

I'm not sure how this particular CAP_NET_ADMIN misuse should be fixed,
maybe distributions should supply script to upgrade modprobe configs.
Also note that change s/CAP_SYS_MODULE/CAP_NET_ADMIN/ was made in
2.6.32, so there is a possibility that the set of affected distributions
(that doesn't use udev stuff) is very small.


Thanks for your input,

-- 
Vasiliy Kulikov
http://www.openwall.com - bringing security into open computing environments

^ permalink raw reply

* [PATCH 4/7] ipv6: hash is calculated but not used afterwards
From: Hagen Paul Pfeifer @ 2011-02-25 15:45 UTC (permalink / raw)
  To: netdev; +Cc: fw
In-Reply-To: <1298648721-3026-1-git-send-email-hagen@jauu.net>

hash is declared and assigned but not used anymore. ipv6_addr_hash()
exhibit no side-effects.

Signed-off-by: Hagen Paul Pfeifer <hagen@jauu.net>
---
 net/ipv6/addrconf.c |    3 ---
 1 files changed, 0 insertions(+), 3 deletions(-)

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index fd6782e..3daaf3c 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -718,12 +718,9 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
 	struct inet6_ifaddr *ifa, *ifn;
 	struct inet6_dev *idev = ifp->idev;
 	int state;
-	int hash;
 	int deleted = 0, onlink = 0;
 	unsigned long expires = jiffies;
 
-	hash = ipv6_addr_hash(&ifp->addr);
-
 	spin_lock_bh(&ifp->state_lock);
 	state = ifp->state;
 	ifp->state = INET6_IFADDR_STATE_DEAD;
-- 
1.7.2.3


^ permalink raw reply related

* [PATCH 7/7] sched: protocol only needed when CONFIG_NET_CLS_ACT is enabled
From: Hagen Paul Pfeifer @ 2011-02-25 15:45 UTC (permalink / raw)
  To: netdev; +Cc: fw
In-Reply-To: <1298648721-3026-1-git-send-email-hagen@jauu.net>

Signed-off-by: Hagen Paul Pfeifer <hagen@jauu.net>
---
 net/sched/sch_api.c |    4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 1507415..7490f3f 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1672,12 +1672,12 @@ int tc_classify(struct sk_buff *skb, struct tcf_proto *tp,
 		struct tcf_result *res)
 {
 	int err = 0;
-	__be16 protocol;
 #ifdef CONFIG_NET_CLS_ACT
+	__be16 protocol;
 	struct tcf_proto *otp = tp;
 reclassify:
-#endif
 	protocol = skb->protocol;
+#endif
 
 	err = tc_classify_compat(skb, tp, res);
 #ifdef CONFIG_NET_CLS_ACT
-- 
1.7.2.3


^ permalink raw reply related

* [PATCH 3/7] ipv6: totlen is declared and assigned but not used
From: Hagen Paul Pfeifer @ 2011-02-25 15:45 UTC (permalink / raw)
  To: netdev; +Cc: fw
In-Reply-To: <1298648721-3026-1-git-send-email-hagen@jauu.net>

Signed-off-by: Hagen Paul Pfeifer <hagen@jauu.net>
---
 net/ipv6/ip6_output.c |    3 ---
 1 files changed, 0 insertions(+), 3 deletions(-)

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 2600e22..25a2647 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -274,13 +274,10 @@ int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
 {
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct ipv6hdr *hdr;
-	int totlen;
 
 	skb->protocol = htons(ETH_P_IPV6);
 	skb->dev = dev;
 
-	totlen = len + sizeof(struct ipv6hdr);
-
 	skb_reset_network_header(skb);
 	skb_put(skb, sizeof(struct ipv6hdr));
 	hdr = ipv6_hdr(skb);
-- 
1.7.2.3


^ permalink raw reply related

* [PATCH 6/7] ipv6: ignore rtnl_unicast() return code
From: Hagen Paul Pfeifer @ 2011-02-25 15:45 UTC (permalink / raw)
  To: netdev; +Cc: fw
In-Reply-To: <1298648721-3026-1-git-send-email-hagen@jauu.net>

rtnl_unicast() return value is not of interest, we can silently ignore
it, save some instructions and four byte on the stack.

Signed-off-by: Hagen Paul Pfeifer <hagen@jauu.net>
---
 net/ipv6/ip6mr.c |    3 +--
 1 files changed, 1 insertions(+), 2 deletions(-)

diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 0e1d53b..618f67cc 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1039,7 +1039,6 @@ static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
 
 	while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
 		if (ipv6_hdr(skb)->version == 0) {
-			int err;
 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
 
 			if (__ip6mr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
@@ -1050,7 +1049,7 @@ static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
 				skb_trim(skb, nlh->nlmsg_len);
 				((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
 			}
-			err = rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
+			rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
 		} else
 			ip6_mr_forward(net, mrt, skb, c);
 	}
-- 
1.7.2.3


^ permalink raw reply related

* [PATCH 2/7] dccp: newdp is declared/assigned but never be used
From: Hagen Paul Pfeifer @ 2011-02-25 15:45 UTC (permalink / raw)
  To: netdev; +Cc: fw
In-Reply-To: <1298648721-3026-1-git-send-email-hagen@jauu.net>

Declaration and assignment of newdp is removed. Usage of dccp_sk()
exhibit no side effects.

Signed-off-by: Hagen Paul Pfeifer <hagen@jauu.net>
---
 net/dccp/ipv6.c |    3 ---
 1 files changed, 0 insertions(+), 3 deletions(-)

diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index dca711d..460d545 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -484,7 +484,6 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
 	struct inet6_request_sock *ireq6 = inet6_rsk(req);
 	struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
 	struct inet_sock *newinet;
-	struct dccp_sock *newdp;
 	struct dccp6_sock *newdp6;
 	struct sock *newsk;
 	struct ipv6_txoptions *opt;
@@ -498,7 +497,6 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
 			return NULL;
 
 		newdp6 = (struct dccp6_sock *)newsk;
-		newdp = dccp_sk(newsk);
 		newinet = inet_sk(newsk);
 		newinet->pinet6 = &newdp6->inet6;
 		newnp = inet6_sk(newsk);
@@ -578,7 +576,6 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
 	newdp6 = (struct dccp6_sock *)newsk;
 	newinet = inet_sk(newsk);
 	newinet->pinet6 = &newdp6->inet6;
-	newdp = dccp_sk(newsk);
 	newnp = inet6_sk(newsk);
 
 	memcpy(newnp, np, sizeof(struct ipv6_pinfo));
-- 
1.7.2.3


^ permalink raw reply related

* [PATCH 5/7] ipv6: variable next is never used in this function
From: Hagen Paul Pfeifer @ 2011-02-25 15:45 UTC (permalink / raw)
  To: netdev; +Cc: fw
In-Reply-To: <1298648721-3026-1-git-send-email-hagen@jauu.net>

Signed-off-by: Hagen Paul Pfeifer <hagen@jauu.net>
---
 net/ipv6/route.c |    4 +---
 1 files changed, 1 insertions(+), 3 deletions(-)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index f786aed..7e9443f 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1075,11 +1075,9 @@ out:
 
 int icmp6_dst_gc(void)
 {
-	struct dst_entry *dst, *next, **pprev;
+	struct dst_entry *dst, **pprev;
 	int more = 0;
 
-	next = NULL;
-
 	spin_lock_bh(&icmp6_dst_lock);
 	pprev = &icmp6_dst_gc_list;
 
-- 
1.7.2.3


^ permalink raw reply related

* [PATCH 1/7] net: handle addr_type of 0 properly
From: Hagen Paul Pfeifer @ 2011-02-25 15:45 UTC (permalink / raw)
  To: netdev; +Cc: fw

addr_type of 0 means that the type should be adopted from from_dev and
not from __hw_addr_del_multiple(). Unfortunately it isn't so and
addr_type will always be considered. Fix this by implementing the
considered and documented behavior.

Signed-off-by: Hagen Paul Pfeifer <hagen@jauu.net>
---
 net/core/dev_addr_lists.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index 508f9c1..133fd22 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -144,7 +144,7 @@ void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
 
 	list_for_each_entry(ha, &from_list->list, list) {
 		type = addr_type ? addr_type : ha->type;
-		__hw_addr_del(to_list, ha->addr, addr_len, addr_type);
+		__hw_addr_del(to_list, ha->addr, addr_len, type);
 	}
 }
 EXPORT_SYMBOL(__hw_addr_del_multiple);
-- 
1.7.2.3


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox