Netdev List
 help / color / mirror / Atom feed
* [PATCH 6/7] net: Allow setting the network namespace by fd
From: Eric W. Biederman @ 2011-05-07  2:25 UTC (permalink / raw)
  To: linux-arch-u79uwXL29TY76Z2rM5mHXA
  Cc: netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Linux Containers,
	Eric W. Biederman, linux-fsdevel-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <1304735101-1824-1-git-send-email-ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>

Take advantage of the new abstraction and allow network devices
to be placed in any network namespace that we have a fd to talk
about.

Signed-off-by: Eric W. Biederman <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
---
 include/linux/if_link.h     |    1 +
 include/net/net_namespace.h |    1 +
 net/core/net_namespace.c    |   33 +++++++++++++++++++++++++++++++--
 net/core/rtnetlink.c        |    5 ++++-
 4 files changed, 37 insertions(+), 3 deletions(-)

diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index f4a2e6b..0ee969a 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -136,6 +136,7 @@ enum {
 	IFLA_PORT_SELF,
 	IFLA_AF_SPEC,
 	IFLA_GROUP,		/* Group the device belongs to */
+	IFLA_NET_NS_FD,
 	__IFLA_MAX
 };
 
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 3ae4919..dcc8f57 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -119,6 +119,7 @@ static inline struct net *copy_net_ns(unsigned long flags, struct net *net_ns)
 extern struct list_head net_namespace_list;
 
 extern struct net *get_net_ns_by_pid(pid_t pid);
+extern struct net *get_net_ns_by_fd(int pid);
 
 #ifdef CONFIG_NET_NS
 extern void __put_net(struct net *net);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index bf7707e..b7403ff 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -8,6 +8,8 @@
 #include <linux/idr.h>
 #include <linux/rculist.h>
 #include <linux/nsproxy.h>
+#include <linux/proc_fs.h>
+#include <linux/file.h>
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
 
@@ -343,6 +345,28 @@ struct net *get_net_ns_by_pid(pid_t pid)
 }
 EXPORT_SYMBOL_GPL(get_net_ns_by_pid);
 
+struct net *get_net_ns_by_fd(int fd)
+{
+	struct proc_inode *ei;
+	struct file *file;
+	struct net *net;
+
+	net = ERR_PTR(-EINVAL);
+	file = proc_ns_fget(fd);
+	if (!file)
+		goto out;
+
+	ei = PROC_I(file->f_dentry->d_inode);
+	if (ei->ns_ops != &netns_operations)
+		goto out;
+
+	net = get_net(ei->ns);
+out:
+	if (file)
+		fput(file);
+	return net;
+}
+
 static int __init net_ns_init(void)
 {
 	struct net_generic *ng;
@@ -577,10 +601,15 @@ EXPORT_SYMBOL_GPL(unregister_pernet_device);
 #ifdef CONFIG_NET_NS
 static void *netns_get(struct task_struct *task)
 {
-	struct net *net;
+	struct net *net = NULL;
+	struct nsproxy *nsproxy;
+
 	rcu_read_lock();
-	net = get_net(task->nsproxy->net_ns);
+	nsproxy = task_nsproxy(task);
+	if (nsproxy)
+		net = get_net(nsproxy->net_ns);
 	rcu_read_unlock();
+
 	return net;
 }
 
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index d7c4bb4..dca9602 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1043,6 +1043,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = {
 	[IFLA_LINKMODE]		= { .type = NLA_U8 },
 	[IFLA_LINKINFO]		= { .type = NLA_NESTED },
 	[IFLA_NET_NS_PID]	= { .type = NLA_U32 },
+	[IFLA_NET_NS_FD]	= { .type = NLA_U32 },
 	[IFLA_IFALIAS]	        = { .type = NLA_STRING, .len = IFALIASZ-1 },
 	[IFLA_VFINFO_LIST]	= {. type = NLA_NESTED },
 	[IFLA_VF_PORTS]		= { .type = NLA_NESTED },
@@ -1091,6 +1092,8 @@ struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[])
 	 */
 	if (tb[IFLA_NET_NS_PID])
 		net = get_net_ns_by_pid(nla_get_u32(tb[IFLA_NET_NS_PID]));
+	else if (tb[IFLA_NET_NS_FD])
+		net = get_net_ns_by_fd(nla_get_u32(tb[IFLA_NET_NS_FD]));
 	else
 		net = get_net(src_net);
 	return net;
@@ -1221,7 +1224,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
 	int send_addr_notify = 0;
 	int err;
 
-	if (tb[IFLA_NET_NS_PID]) {
+	if (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD]) {
 		struct net *net = rtnl_link_get_net(dev_net(dev), tb);
 		if (IS_ERR(net)) {
 			err = PTR_ERR(net);
-- 
1.6.5.2.143.g8cc62

^ permalink raw reply related

* [PATCH 7/7] ns: Wire up the setns system call
From: Eric W. Biederman @ 2011-05-07  2:25 UTC (permalink / raw)
  To: linux-arch
  Cc: linux-kernel, netdev, linux-fsdevel, jamal, Daniel Lezcano,
	Linux Containers, Renato Westphal, Eric W. Biederman
In-Reply-To: <1304735101-1824-1-git-send-email-ebiederm@xmission.com>

v2: Most of the architecture support added by Daniel Lezcano <dlezcano@fr.ibm.com>
v3: ported to v2.6.36-rc4 by: Eric W. Biederman <ebiederm@xmission.com>
v4: Moved wiring up of the system call to another patch
v5: ported to v2.6.39-rc6

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 arch/alpha/include/asm/unistd.h        |    3 ++-
 arch/alpha/kernel/systbls.S            |    1 +
 arch/arm/include/asm/unistd.h          |    1 +
 arch/arm/kernel/calls.S                |    1 +
 arch/avr32/include/asm/unistd.h        |    3 ++-
 arch/avr32/kernel/syscall_table.S      |    1 +
 arch/blackfin/include/asm/unistd.h     |    3 ++-
 arch/blackfin/mach-common/entry.S      |    1 +
 arch/cris/arch-v10/kernel/entry.S      |    1 +
 arch/cris/arch-v32/kernel/entry.S      |    1 +
 arch/cris/include/asm/unistd.h         |    3 ++-
 arch/frv/include/asm/unistd.h          |    3 ++-
 arch/frv/kernel/entry.S                |    1 +
 arch/h8300/include/asm/unistd.h        |    3 ++-
 arch/h8300/kernel/syscalls.S           |    1 +
 arch/ia64/include/asm/unistd.h         |    3 ++-
 arch/ia64/kernel/entry.S               |    1 +
 arch/m32r/include/asm/unistd.h         |    3 ++-
 arch/m32r/kernel/syscall_table.S       |    1 +
 arch/m68k/include/asm/unistd.h         |    3 ++-
 arch/m68k/kernel/syscalltable.S        |    1 +
 arch/microblaze/include/asm/unistd.h   |    3 ++-
 arch/microblaze/kernel/syscall_table.S |    1 +
 arch/mips/include/asm/unistd.h         |   15 +++++++++------
 arch/mips/kernel/scall32-o32.S         |    1 +
 arch/mips/kernel/scall64-64.S          |    1 +
 arch/mips/kernel/scall64-n32.S         |    1 +
 arch/mips/kernel/scall64-o32.S         |    1 +
 arch/mn10300/include/asm/unistd.h      |    3 ++-
 arch/mn10300/kernel/entry.S            |    1 +
 arch/parisc/include/asm/unistd.h       |    4 ++--
 arch/parisc/kernel/syscall_table.S     |    1 +
 arch/powerpc/include/asm/systbl.h      |    1 +
 arch/powerpc/include/asm/unistd.h      |    3 ++-
 arch/s390/include/asm/unistd.h         |    3 ++-
 arch/s390/kernel/syscalls.S            |    1 +
 arch/sh/include/asm/unistd_32.h        |    3 ++-
 arch/sh/include/asm/unistd_64.h        |    3 ++-
 arch/sh/kernel/syscalls_32.S           |    1 +
 arch/sh/kernel/syscalls_64.S           |    1 +
 arch/sparc/include/asm/unistd.h        |    3 ++-
 arch/sparc/kernel/systbls_32.S         |    2 +-
 arch/sparc/kernel/systbls_64.S         |    4 ++--
 arch/x86/ia32/ia32entry.S              |    1 +
 arch/x86/include/asm/unistd_32.h       |    3 ++-
 arch/x86/include/asm/unistd_64.h       |    2 ++
 arch/x86/kernel/syscall_table_32.S     |    1 +
 arch/xtensa/include/asm/unistd.h       |    4 +++-
 include/asm-generic/unistd.h           |    4 +++-
 49 files changed, 81 insertions(+), 30 deletions(-)

diff --git a/arch/alpha/include/asm/unistd.h b/arch/alpha/include/asm/unistd.h
index 058937b..664383d 100644
--- a/arch/alpha/include/asm/unistd.h
+++ b/arch/alpha/include/asm/unistd.h
@@ -452,10 +452,11 @@
 #define __NR_fanotify_init		494
 #define __NR_fanotify_mark		495
 #define __NR_prlimit64			496
+#define __NR_setns                      497
 
 #ifdef __KERNEL__
 
-#define NR_SYSCALLS			497
+#define NR_SYSCALLS			498
 
 #define __ARCH_WANT_IPC_PARSE_VERSION
 #define __ARCH_WANT_OLD_READDIR
diff --git a/arch/alpha/kernel/systbls.S b/arch/alpha/kernel/systbls.S
index a6a1de9..4663fd5 100644
--- a/arch/alpha/kernel/systbls.S
+++ b/arch/alpha/kernel/systbls.S
@@ -515,6 +515,7 @@ sys_call_table:
 	.quad sys_fanotify_init
 	.quad sys_fanotify_mark				/* 495 */
 	.quad sys_prlimit64
+	.quad sys_setns
 
 	.size sys_call_table, . - sys_call_table
 	.type sys_call_table, @object
diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h
index 87dbe3e..41afe2e 100644
--- a/arch/arm/include/asm/unistd.h
+++ b/arch/arm/include/asm/unistd.h
@@ -400,6 +400,7 @@
 #define __NR_open_by_handle_at		(__NR_SYSCALL_BASE+371)
 #define __NR_clock_adjtime		(__NR_SYSCALL_BASE+372)
 #define __NR_syncfs			(__NR_SYSCALL_BASE+373)
+#define __NR_setns                      (__NR_SYSCALL_BASE+374)
 
 /*
  * The following SWIs are ARM private.
diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S
index 7fbf28c..a05f759 100644
--- a/arch/arm/kernel/calls.S
+++ b/arch/arm/kernel/calls.S
@@ -383,6 +383,7 @@
 		CALL(sys_open_by_handle_at)
 		CALL(sys_clock_adjtime)
 		CALL(sys_syncfs)
+		CALL(sys_setns)
 #ifndef syscalls_counted
 .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls
 #define syscalls_counted
diff --git a/arch/avr32/include/asm/unistd.h b/arch/avr32/include/asm/unistd.h
index 89861a2..800ea91 100644
--- a/arch/avr32/include/asm/unistd.h
+++ b/arch/avr32/include/asm/unistd.h
@@ -299,9 +299,10 @@
 #define __NR_signalfd		279
 /* 280 was __NR_timerfd */
 #define __NR_eventfd		281
+#define __NR_setns              282
 
 #ifdef __KERNEL__
-#define NR_syscalls		282
+#define NR_syscalls		283
 
 /* Old stuff */
 #define __IGNORE_uselib
diff --git a/arch/avr32/kernel/syscall_table.S b/arch/avr32/kernel/syscall_table.S
index e76bad1..c7fd394 100644
--- a/arch/avr32/kernel/syscall_table.S
+++ b/arch/avr32/kernel/syscall_table.S
@@ -296,4 +296,5 @@ sys_call_table:
 	.long	sys_ni_syscall		/* 280, was sys_timerfd */
 	.long	sys_eventfd
 	.long	sys_recvmmsg
+	.long	sys_setns
 	.long	sys_ni_syscall		/* r8 is saturated at nr_syscalls */
diff --git a/arch/blackfin/include/asm/unistd.h b/arch/blackfin/include/asm/unistd.h
index ff9a9f3..b638e6b 100644
--- a/arch/blackfin/include/asm/unistd.h
+++ b/arch/blackfin/include/asm/unistd.h
@@ -397,8 +397,9 @@
 #define __NR_open_by_handle_at	376
 #define __NR_clock_adjtime	377
 #define __NR_syncfs		378
+#define __NR_setns              379
 
-#define __NR_syscall		379
+#define __NR_syscall		380
 #define NR_syscalls		__NR_syscall
 
 /* Old optional stuff no one actually uses */
diff --git a/arch/blackfin/mach-common/entry.S b/arch/blackfin/mach-common/entry.S
index f96933f..dda11ef 100644
--- a/arch/blackfin/mach-common/entry.S
+++ b/arch/blackfin/mach-common/entry.S
@@ -1753,6 +1753,7 @@ ENTRY(_sys_call_table)
 	.long _sys_open_by_handle_at
 	.long _sys_clock_adjtime
 	.long _sys_syncfs
+	.long _sys_setns
 
 	.rept NR_syscalls-(.-_sys_call_table)/4
 	.long _sys_ni_syscall
diff --git a/arch/cris/arch-v10/kernel/entry.S b/arch/cris/arch-v10/kernel/entry.S
index 0d6420d..1161883 100644
--- a/arch/cris/arch-v10/kernel/entry.S
+++ b/arch/cris/arch-v10/kernel/entry.S
@@ -937,6 +937,7 @@ sys_call_table:
 	.long sys_inotify_init1
 	.long sys_preadv
 	.long sys_pwritev
+	.long sys_setns			/* 335 */
 
         /*
          * NOTE!! This doesn't have to be exact - we just have
diff --git a/arch/cris/arch-v32/kernel/entry.S b/arch/cris/arch-v32/kernel/entry.S
index 3abf12c..84fed7e 100644
--- a/arch/cris/arch-v32/kernel/entry.S
+++ b/arch/cris/arch-v32/kernel/entry.S
@@ -880,6 +880,7 @@ sys_call_table:
 	.long sys_inotify_init1
 	.long sys_preadv
 	.long sys_pwritev
+	.long sys_setns			/* 335 */
 
         /*
          * NOTE!! This doesn't have to be exact - we just have
diff --git a/arch/cris/include/asm/unistd.h b/arch/cris/include/asm/unistd.h
index f6fad83..c59a107 100644
--- a/arch/cris/include/asm/unistd.h
+++ b/arch/cris/include/asm/unistd.h
@@ -339,10 +339,11 @@
 #define __NR_inotify_init1	332
 #define __NR_preadv		333
 #define __NR_pwritev		334
+#define __NR_setns              335
 
 #ifdef __KERNEL__
 
-#define NR_syscalls 335
+#define NR_syscalls 336
 
 #include <arch/unistd.h>
 
diff --git a/arch/frv/include/asm/unistd.h b/arch/frv/include/asm/unistd.h
index b28da49..6470f11 100644
--- a/arch/frv/include/asm/unistd.h
+++ b/arch/frv/include/asm/unistd.h
@@ -343,10 +343,11 @@
 #define __NR_pwritev		334
 #define __NR_rt_tgsigqueueinfo	335
 #define __NR_perf_event_open	336
+#define __NR_setns              337
 
 #ifdef __KERNEL__
 
-#define NR_syscalls 337
+#define NR_syscalls 338
 
 #define __ARCH_WANT_IPC_PARSE_VERSION
 /* #define __ARCH_WANT_OLD_READDIR */
diff --git a/arch/frv/kernel/entry.S b/arch/frv/kernel/entry.S
index 63d579b..017d6d7 100644
--- a/arch/frv/kernel/entry.S
+++ b/arch/frv/kernel/entry.S
@@ -1526,5 +1526,6 @@ sys_call_table:
 	.long sys_pwritev
 	.long sys_rt_tgsigqueueinfo	/* 335 */
 	.long sys_perf_event_open
+	.long sys_setns
 
 syscall_table_size = (. - sys_call_table)
diff --git a/arch/h8300/include/asm/unistd.h b/arch/h8300/include/asm/unistd.h
index 50f2c5a..85c9552 100644
--- a/arch/h8300/include/asm/unistd.h
+++ b/arch/h8300/include/asm/unistd.h
@@ -325,10 +325,11 @@
 #define __NR_move_pages		317
 #define __NR_getcpu		318
 #define __NR_epoll_pwait	319
+#define __NR_setns              320
 
 #ifdef __KERNEL__
 
-#define NR_syscalls 320
+#define NR_syscalls 321
 
 #define __ARCH_WANT_IPC_PARSE_VERSION
 #define __ARCH_WANT_OLD_READDIR
diff --git a/arch/h8300/kernel/syscalls.S b/arch/h8300/kernel/syscalls.S
index faefaff..f4b2e67 100644
--- a/arch/h8300/kernel/syscalls.S
+++ b/arch/h8300/kernel/syscalls.S
@@ -333,6 +333,7 @@ SYMBOL_NAME_LABEL(sys_call_table)
 	.long SYMBOL_NAME(sys_ni_syscall)	/* sys_move_pages */
 	.long SYMBOL_NAME(sys_getcpu)
 	.long SYMBOL_NAME(sys_ni_syscall)	/* sys_epoll_pwait */
+	.long SYMBOL_NAME(sys_setns)		/* 320 */
 
 	.macro	call_sp addr
 	mov.l	#SYMBOL_NAME(\addr),er6
diff --git a/arch/ia64/include/asm/unistd.h b/arch/ia64/include/asm/unistd.h
index 404d037..2fbda93 100644
--- a/arch/ia64/include/asm/unistd.h
+++ b/arch/ia64/include/asm/unistd.h
@@ -319,11 +319,12 @@
 #define __NR_open_by_handle_at  	1327
 #define __NR_clock_adjtime		1328
 #define __NR_syncfs			1329
+#define __NR_setns                      1330
 
 #ifdef __KERNEL__
 
 
-#define NR_syscalls			306 /* length of syscall table */
+#define NR_syscalls			307 /* length of syscall table */
 
 /*
  * The following defines stop scripts/checksyscalls.sh from complaining about
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index 6de2e23..9ca8019 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -1775,6 +1775,7 @@ sys_call_table:
 	data8 sys_open_by_handle_at
 	data8 sys_clock_adjtime
 	data8 sys_syncfs
+	data8 sys_setns				// 1330
 
 	.org sys_call_table + 8*NR_syscalls	// guard against failures to increase NR_syscalls
 #endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */
diff --git a/arch/m32r/include/asm/unistd.h b/arch/m32r/include/asm/unistd.h
index c705456..3c7dafc 100644
--- a/arch/m32r/include/asm/unistd.h
+++ b/arch/m32r/include/asm/unistd.h
@@ -330,10 +330,11 @@
 /* #define __NR_timerfd		322 removed */
 #define __NR_eventfd		323
 #define __NR_fallocate		324
+#define __NR_setns              325
 
 #ifdef __KERNEL__
 
-#define NR_syscalls 325
+#define NR_syscalls 326
 
 #define __ARCH_WANT_IPC_PARSE_VERSION
 #define __ARCH_WANT_STAT64
diff --git a/arch/m32r/kernel/syscall_table.S b/arch/m32r/kernel/syscall_table.S
index 60536e2..528f2e6 100644
--- a/arch/m32r/kernel/syscall_table.S
+++ b/arch/m32r/kernel/syscall_table.S
@@ -324,3 +324,4 @@ ENTRY(sys_call_table)
 	.long sys_ni_syscall
 	.long sys_eventfd
 	.long sys_fallocate
+	.long sys_setns			/* 325 */
diff --git a/arch/m68k/include/asm/unistd.h b/arch/m68k/include/asm/unistd.h
index 29e1790..c11a093 100644
--- a/arch/m68k/include/asm/unistd.h
+++ b/arch/m68k/include/asm/unistd.h
@@ -347,10 +347,11 @@
 #define __NR_open_by_handle_at	341
 #define __NR_clock_adjtime	342
 #define __NR_syncfs		343
+#define __NR_setns              344
 
 #ifdef __KERNEL__
 
-#define NR_syscalls		344
+#define NR_syscalls		345
 
 #define __ARCH_WANT_IPC_PARSE_VERSION
 #define __ARCH_WANT_OLD_READDIR
diff --git a/arch/m68k/kernel/syscalltable.S b/arch/m68k/kernel/syscalltable.S
index 9b8393d..b5a7bbf 100644
--- a/arch/m68k/kernel/syscalltable.S
+++ b/arch/m68k/kernel/syscalltable.S
@@ -362,6 +362,7 @@ ENTRY(sys_call_table)
 	.long sys_open_by_handle_at
 	.long sys_clock_adjtime
 	.long sys_syncfs
+	.long sys_setns
 
 	.rept NR_syscalls-(.-sys_call_table)/4
 		.long sys_ni_syscall
diff --git a/arch/microblaze/include/asm/unistd.h b/arch/microblaze/include/asm/unistd.h
index 30edd61..dcebe59 100644
--- a/arch/microblaze/include/asm/unistd.h
+++ b/arch/microblaze/include/asm/unistd.h
@@ -390,8 +390,9 @@
 #define __NR_open_by_handle_at	372
 #define __NR_clock_adjtime	373
 #define __NR_syncfs		374
+#define __NR_setns              375
 
-#define __NR_syscalls		375
+#define __NR_syscalls		376
 
 #ifdef __KERNEL__
 #ifndef __ASSEMBLY__
diff --git a/arch/microblaze/kernel/syscall_table.S b/arch/microblaze/kernel/syscall_table.S
index 85cea81..d915a12 100644
--- a/arch/microblaze/kernel/syscall_table.S
+++ b/arch/microblaze/kernel/syscall_table.S
@@ -379,3 +379,4 @@ ENTRY(sys_call_table)
 	.long sys_open_by_handle_at
 	.long sys_clock_adjtime
 	.long sys_syncfs
+	.long sys_setns			/* 375 */
diff --git a/arch/mips/include/asm/unistd.h b/arch/mips/include/asm/unistd.h
index fa2e37e..d0d84eb 100644
--- a/arch/mips/include/asm/unistd.h
+++ b/arch/mips/include/asm/unistd.h
@@ -363,16 +363,17 @@
 #define __NR_open_by_handle_at		(__NR_Linux + 340)
 #define __NR_clock_adjtime		(__NR_Linux + 341)
 #define __NR_syncfs			(__NR_Linux + 342)
+#define __NR_setns                      (__NR_Linux + 343)
 
 /*
  * Offset of the last Linux o32 flavoured syscall
  */
-#define __NR_Linux_syscalls		342
+#define __NR_Linux_syscalls		343
 
 #endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */
 
 #define __NR_O32_Linux			4000
-#define __NR_O32_Linux_syscalls		342
+#define __NR_O32_Linux_syscalls		343
 
 #if _MIPS_SIM == _MIPS_SIM_ABI64
 
@@ -682,16 +683,17 @@
 #define __NR_open_by_handle_at		(__NR_Linux + 299)
 #define __NR_clock_adjtime		(__NR_Linux + 300)
 #define __NR_syncfs			(__NR_Linux + 301)
+#define __NR_setns                      (__NR_Linux + 302)
 
 /*
  * Offset of the last Linux 64-bit flavoured syscall
  */
-#define __NR_Linux_syscalls		301
+#define __NR_Linux_syscalls		302
 
 #endif /* _MIPS_SIM == _MIPS_SIM_ABI64 */
 
 #define __NR_64_Linux			5000
-#define __NR_64_Linux_syscalls		301
+#define __NR_64_Linux_syscalls		302
 
 #if _MIPS_SIM == _MIPS_SIM_NABI32
 
@@ -1006,16 +1008,17 @@
 #define __NR_open_by_handle_at		(__NR_Linux + 304)
 #define __NR_clock_adjtime		(__NR_Linux + 305)
 #define __NR_syncfs			(__NR_Linux + 306)
+#define __NR_setns                      (__NR_Linux + 307)
 
 /*
  * Offset of the last N32 flavoured syscall
  */
-#define __NR_Linux_syscalls		306
+#define __NR_Linux_syscalls		307
 
 #endif /* _MIPS_SIM == _MIPS_SIM_NABI32 */
 
 #define __NR_N32_Linux			6000
-#define __NR_N32_Linux_syscalls		306
+#define __NR_N32_Linux_syscalls		307
 
 #ifdef __KERNEL__
 
diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S
index 7f5468b..c6a2b58 100644
--- a/arch/mips/kernel/scall32-o32.S
+++ b/arch/mips/kernel/scall32-o32.S
@@ -590,6 +590,7 @@ einval:	li	v0, -ENOSYS
 	sys	sys_open_by_handle_at	3	/* 4340 */
 	sys	sys_clock_adjtime	2
 	sys	sys_syncfs		1
+	sys	sys_setns		2
 	.endm
 
 	/* We pre-compute the number of _instruction_ bytes needed to
diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S
index a2e1fcb..57e4184 100644
--- a/arch/mips/kernel/scall64-64.S
+++ b/arch/mips/kernel/scall64-64.S
@@ -429,4 +429,5 @@ sys_call_table:
 	PTR	sys_open_by_handle_at
 	PTR	sys_clock_adjtime		/* 5300 */
 	PTR	sys_syncfs
+	PTR	sys_setns
 	.size	sys_call_table,.-sys_call_table
diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
index b2c7624..91ae887 100644
--- a/arch/mips/kernel/scall64-n32.S
+++ b/arch/mips/kernel/scall64-n32.S
@@ -429,4 +429,5 @@ EXPORT(sysn32_call_table)
 	PTR	sys_open_by_handle_at
 	PTR	compat_sys_clock_adjtime	/* 6305 */
 	PTR	sys_syncfs
+	PTR	sys_setns
 	.size	sysn32_call_table,.-sysn32_call_table
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index 049a9c8..02e6ae5 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -547,4 +547,5 @@ sys_call_table:
 	PTR	compat_sys_open_by_handle_at	/* 4340 */
 	PTR	compat_sys_clock_adjtime
 	PTR	sys_syncfs
+	PTR	sys_setns
 	.size	sys_call_table,.-sys_call_table
diff --git a/arch/mn10300/include/asm/unistd.h b/arch/mn10300/include/asm/unistd.h
index 9d056f5..faf57bb 100644
--- a/arch/mn10300/include/asm/unistd.h
+++ b/arch/mn10300/include/asm/unistd.h
@@ -349,10 +349,11 @@
 #define __NR_rt_tgsigqueueinfo	336
 #define __NR_perf_event_open	337
 #define __NR_recvmmsg		338
+#define __NR_setns              339
 
 #ifdef __KERNEL__
 
-#define NR_syscalls 339
+#define NR_syscalls 340
 
 /*
  * specify the deprecated syscalls we want to support on this arch
diff --git a/arch/mn10300/kernel/entry.S b/arch/mn10300/kernel/entry.S
index fb93ad7..ae435e1 100644
--- a/arch/mn10300/kernel/entry.S
+++ b/arch/mn10300/kernel/entry.S
@@ -759,6 +759,7 @@ ENTRY(sys_call_table)
 	.long sys_rt_tgsigqueueinfo
 	.long sys_perf_event_open
 	.long sys_recvmmsg
+	.long sys_setns
 
 
 nr_syscalls=(.-sys_call_table)/4
diff --git a/arch/parisc/include/asm/unistd.h b/arch/parisc/include/asm/unistd.h
index 3eb82c2..bbaed57 100644
--- a/arch/parisc/include/asm/unistd.h
+++ b/arch/parisc/include/asm/unistd.h
@@ -814,9 +814,9 @@
 #define __NR_recvmmsg		(__NR_Linux + 319)
 #define __NR_accept4		(__NR_Linux + 320)
 #define __NR_prlimit64		(__NR_Linux + 321)
+#define __NR_setns              (__NR_Linux + 322)
 
-#define __NR_Linux_syscalls	(__NR_prlimit64 + 1)
-
+#define __NR_Linux_syscalls	(__NR_setns + 1)
 
 #define __IGNORE_select		/* newselect */
 #define __IGNORE_fadvise64	/* fadvise64_64 */
diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S
index 4be85ee..151349c 100644
--- a/arch/parisc/kernel/syscall_table.S
+++ b/arch/parisc/kernel/syscall_table.S
@@ -420,6 +420,7 @@
 	ENTRY_COMP(recvmmsg)
 	ENTRY_SAME(accept4)		/* 320 */
 	ENTRY_SAME(prlimit64)
+	ENTRY_SAME(setns)
 
 	/* Nothing yet */
 
diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h
index 60f64b1..2a5f60c 100644
--- a/arch/powerpc/include/asm/systbl.h
+++ b/arch/powerpc/include/asm/systbl.h
@@ -352,3 +352,4 @@ SYSCALL_SPU(name_to_handle_at)
 COMPAT_SYS_SPU(open_by_handle_at)
 COMPAT_SYS_SPU(clock_adjtime)
 SYSCALL_SPU(syncfs)
+SYSCALL_SPU(setns)
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index 3c21564..2780f7a 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -371,10 +371,11 @@
 #define __NR_open_by_handle_at	346
 #define __NR_clock_adjtime	347
 #define __NR_syncfs		348
+#define __NR_setns              349
 
 #ifdef __KERNEL__
 
-#define __NR_syscalls		349
+#define __NR_syscalls		350
 
 #define __NR__exit __NR_exit
 #define NR_syscalls	__NR_syscalls
diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h
index e821525..3399bed 100644
--- a/arch/s390/include/asm/unistd.h
+++ b/arch/s390/include/asm/unistd.h
@@ -276,7 +276,8 @@
 #define __NR_open_by_handle_at	336
 #define __NR_clock_adjtime	337
 #define __NR_syncfs		338
-#define NR_syscalls 339
+#define __NR_setns              339
+#define NR_syscalls 340
 
 /* 
  * There are some system calls that are not present on 64 bit, some
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index 9c65fd4..3ec04ba 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -347,3 +347,4 @@ SYSCALL(sys_name_to_handle_at,sys_name_to_handle_at,sys_name_to_handle_at_wrappe
 SYSCALL(sys_open_by_handle_at,sys_open_by_handle_at,compat_sys_open_by_handle_at_wrapper)
 SYSCALL(sys_clock_adjtime,sys_clock_adjtime,compat_sys_clock_adjtime_wrapper)
 SYSCALL(sys_syncfs,sys_syncfs,sys_syncfs_wrapper)
+SYSCALL(sys_setns,sys_setns,sys_setns)
diff --git a/arch/sh/include/asm/unistd_32.h b/arch/sh/include/asm/unistd_32.h
index ca7765e..26c697c 100644
--- a/arch/sh/include/asm/unistd_32.h
+++ b/arch/sh/include/asm/unistd_32.h
@@ -373,8 +373,9 @@
 #define __NR_open_by_handle_at	360
 #define __NR_clock_adjtime	361
 #define __NR_syncfs		362
+#define __NR_setns              363
 
-#define NR_syscalls 363
+#define NR_syscalls 364
 
 #ifdef __KERNEL__
 
diff --git a/arch/sh/include/asm/unistd_64.h b/arch/sh/include/asm/unistd_64.h
index a694009..330668a 100644
--- a/arch/sh/include/asm/unistd_64.h
+++ b/arch/sh/include/asm/unistd_64.h
@@ -394,10 +394,11 @@
 #define __NR_open_by_handle_at	371
 #define __NR_clock_adjtime	372
 #define __NR_syncfs		373
+#define __NR_setns              374
 
 #ifdef __KERNEL__
 
-#define NR_syscalls 374
+#define NR_syscalls 375
 
 #define __ARCH_WANT_IPC_PARSE_VERSION
 #define __ARCH_WANT_OLD_READDIR
diff --git a/arch/sh/kernel/syscalls_32.S b/arch/sh/kernel/syscalls_32.S
index 030966a..67dcf66 100644
--- a/arch/sh/kernel/syscalls_32.S
+++ b/arch/sh/kernel/syscalls_32.S
@@ -380,3 +380,4 @@ ENTRY(sys_call_table)
 	.long sys_open_by_handle_at	/* 360 */
 	.long sys_clock_adjtime
 	.long sys_syncfs
+	.long sys_setns
diff --git a/arch/sh/kernel/syscalls_64.S b/arch/sh/kernel/syscalls_64.S
index ca0a614..dade54e 100644
--- a/arch/sh/kernel/syscalls_64.S
+++ b/arch/sh/kernel/syscalls_64.S
@@ -400,3 +400,4 @@ sys_call_table:
 	.long sys_open_by_handle_at
 	.long sys_clock_adjtime
 	.long sys_syncfs
+	.long sys_setns
diff --git a/arch/sparc/include/asm/unistd.h b/arch/sparc/include/asm/unistd.h
index 9d897b6..10b3105 100644
--- a/arch/sparc/include/asm/unistd.h
+++ b/arch/sparc/include/asm/unistd.h
@@ -404,8 +404,9 @@
 #define __NR_open_by_handle_at	333
 #define __NR_clock_adjtime	334
 #define __NR_syncfs		335
+#define __NR_setns              336
 
-#define NR_syscalls		336
+#define NR_syscalls		337
 
 #ifdef __32bit_syscall_numbers__
 /* Sparc 32-bit only has the "setresuid32", "getresuid32" variants,
diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S
index 47ac73c..88d0a93 100644
--- a/arch/sparc/kernel/systbls_32.S
+++ b/arch/sparc/kernel/systbls_32.S
@@ -84,4 +84,4 @@ sys_call_table:
 /*320*/	.long sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv
 /*325*/	.long sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg, sys_fanotify_init
 /*330*/	.long sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, sys_open_by_handle_at, sys_clock_adjtime
-/*335*/	.long sys_syncfs
+/*335*/	.long sys_syncfs, sys_setns
diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S
index 4f3170c..19cdb12 100644
--- a/arch/sparc/kernel/systbls_64.S
+++ b/arch/sparc/kernel/systbls_64.S
@@ -85,7 +85,7 @@ sys_call_table32:
 /*320*/	.word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, compat_sys_preadv
 	.word compat_sys_pwritev, compat_sys_rt_tgsigqueueinfo, sys_perf_event_open, compat_sys_recvmmsg, sys_fanotify_init
 /*330*/	.word sys32_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, compat_sys_open_by_handle_at, compat_sys_clock_adjtime
-	.word sys_syncfs
+	.word sys_syncfs, sys_setns
 
 #endif /* CONFIG_COMPAT */
 
@@ -162,4 +162,4 @@ sys_call_table:
 /*320*/	.word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv
 	.word sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg, sys_fanotify_init
 /*330*/	.word sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, sys_open_by_handle_at, sys_clock_adjtime
-	.word sys_syncfs
+	.word sys_syncfs, sys_setns
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 849a9d2..58bef77 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -848,4 +848,5 @@ ia32_sys_call_table:
 	.quad compat_sys_open_by_handle_at
 	.quad compat_sys_clock_adjtime
 	.quad sys_syncfs
+	.quad sys_setns				/* 345 */
 ia32_syscall_end:
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index a755ef5..be6c177 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -350,10 +350,11 @@
 #define __NR_open_by_handle_at  342
 #define __NR_clock_adjtime	343
 #define __NR_syncfs             344
+#define __NR_setns		345
 
 #ifdef __KERNEL__
 
-#define NR_syscalls 345
+#define NR_syscalls 346
 
 #define __ARCH_WANT_IPC_PARSE_VERSION
 #define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index 160fa76..c46f833 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -677,6 +677,8 @@ __SYSCALL(__NR_open_by_handle_at, sys_open_by_handle_at)
 __SYSCALL(__NR_clock_adjtime, sys_clock_adjtime)
 #define __NR_syncfs                             306
 __SYSCALL(__NR_syncfs, sys_syncfs)
+#define __NR_setns				307
+__SYSCALL(__NR_setns, sys_setns)
 
 #ifndef __NO_STUBS
 #define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index abce34d..3d77b82 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -344,3 +344,4 @@ ENTRY(sys_call_table)
 	.long sys_open_by_handle_at
 	.long sys_clock_adjtime
 	.long sys_syncfs
+	.long sys_setns			/* 345 */
diff --git a/arch/xtensa/include/asm/unistd.h b/arch/xtensa/include/asm/unistd.h
index 528042c..a6f934f 100644
--- a/arch/xtensa/include/asm/unistd.h
+++ b/arch/xtensa/include/asm/unistd.h
@@ -683,8 +683,10 @@ __SYSCALL(305, sys_ni_syscall, 0)
 __SYSCALL(306, sys_eventfd, 1)
 #define __NR_recvmmsg				307
 __SYSCALL(307, sys_recvmmsg, 5)
+#define __NR_setns				308
+__SYSCALL(308, sys_setns, 2)
 
-#define __NR_syscall_count			308
+#define __NR_syscall_count			309
 
 /*
  * sysxtensa syscall handler
diff --git a/include/asm-generic/unistd.h b/include/asm-generic/unistd.h
index 07c40d5..5fcd304 100644
--- a/include/asm-generic/unistd.h
+++ b/include/asm-generic/unistd.h
@@ -654,9 +654,11 @@ __SYSCALL(__NR_open_by_handle_at, sys_open_by_handle_at)
 __SYSCALL(__NR_clock_adjtime, sys_clock_adjtime)
 #define __NR_syncfs 267
 __SYSCALL(__NR_syncfs, sys_syncfs)
+#define __NR_setns 268
+__SYSCALL(__NR_setns, sys_setns)
 
 #undef __NR_syscalls
-#define __NR_syscalls 268
+#define __NR_syscalls 269
 
 /*
  * All syscalls below here should go away really,
-- 
1.6.5.2.143.g8cc62

^ permalink raw reply related

* [PATCH 3/7] ns proc: Add support for the network namespace.
From: Eric W. Biederman @ 2011-05-07  2:24 UTC (permalink / raw)
  To: linux-arch
  Cc: linux-kernel, netdev, linux-fsdevel, jamal, Daniel Lezcano,
	Linux Containers, Renato Westphal, Eric W. Biederman
In-Reply-To: <1304735101-1824-1-git-send-email-ebiederm@xmission.com>

Implementing file descriptors for the network namespace
is simple and straight forward.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/proc/namespaces.c     |    3 +++
 include/linux/proc_fs.h  |    1 +
 net/core/net_namespace.c |   31 +++++++++++++++++++++++++++++++
 3 files changed, 35 insertions(+), 0 deletions(-)

diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index 6ae9f07..dcbd483 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -16,6 +16,9 @@
 
 
 static const struct proc_ns_operations *ns_entries[] = {
+#ifdef CONFIG_NET_NS
+	&netns_operations,
+#endif
 };
 
 static const struct file_operations ns_file_operations = {
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index a6d2c6d..62126ec 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -265,6 +265,7 @@ struct proc_ns_operations {
 	void (*put)(void *ns);
 	int (*install)(struct nsproxy *nsproxy, void *ns);
 };
+extern const struct proc_ns_operations netns_operations;
 
 union proc_op {
 	int (*proc_get_link)(struct inode *, struct path *);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 3f86026..bf7707e 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -573,3 +573,34 @@ void unregister_pernet_device(struct pernet_operations *ops)
 	mutex_unlock(&net_mutex);
 }
 EXPORT_SYMBOL_GPL(unregister_pernet_device);
+
+#ifdef CONFIG_NET_NS
+static void *netns_get(struct task_struct *task)
+{
+	struct net *net;
+	rcu_read_lock();
+	net = get_net(task->nsproxy->net_ns);
+	rcu_read_unlock();
+	return net;
+}
+
+static void netns_put(void *ns)
+{
+	put_net(ns);
+}
+
+static int netns_install(struct nsproxy *nsproxy, void *ns)
+{
+	put_net(nsproxy->net_ns);
+	nsproxy->net_ns = get_net(ns);
+	return 0;
+}
+
+const struct proc_ns_operations netns_operations = {
+	.name		= "net",
+	.type		= CLONE_NEWNET,
+	.get		= netns_get,
+	.put		= netns_put,
+	.install	= netns_install,
+};
+#endif
-- 
1.6.5.2.143.g8cc62


^ permalink raw reply related

* Re: [PATCH 2/2] ipv4: Initialize on-stack cork more efficiently.
From: Eric Dumazet @ 2011-05-07  5:11 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, herbert
In-Reply-To: <1304721317.2821.156.camel@edumazet-laptop>

Le samedi 07 mai 2011 à 00:35 +0200, Eric Dumazet a écrit :
> Le vendredi 06 mai 2011 à 15:26 -0700, David Miller a écrit :
> > ip_setup_cork() explicitly initializes every member of
> > inet_cork except flags, addr, and opt.  So we can simply
> > set those three members to zero instead of using a
> > memset() via an empty struct assignment.
> > 
> > Signed-off-by: David S. Miller <davem@davemloft.net>
> > ---
> >  net/ipv4/ip_output.c |    5 ++++-
> >  1 files changed, 4 insertions(+), 1 deletions(-)
> > 
> > diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
> > index eb0647a..5f5fe4f 100644
> > --- a/net/ipv4/ip_output.c
> > +++ b/net/ipv4/ip_output.c
> > @@ -1408,7 +1408,7 @@ struct sk_buff *ip_make_skb(struct sock *sk,
> >  			    struct ipcm_cookie *ipc, struct rtable **rtp,
> >  			    unsigned int flags)
> >  {
> > -	struct inet_cork cork = {};
> > +	struct inet_cork cork;
> >  	struct sk_buff_head queue;
> >  	int err;
> >  
> > @@ -1417,6 +1417,9 @@ struct sk_buff *ip_make_skb(struct sock *sk,
> >  
> >  	__skb_queue_head_init(&queue);
> >  
> > +	cork.flags = 0;
> > +	cork.addr = 0;
> > +	cork.opt = 0;
> >  	err = ip_setup_cork(sk, &cork, ipc, rtp);
> >  	if (err)
> >  		return ERR_PTR(err);
> 
> Very nice, thanks for finishing this stuff :)
> 
> Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
> 

By the way, when I spotted this "struct inet_cork cork = {};" to be
optimized, my idea was to add yet another case of fastpath to UDP send :

For small datagrams (most UDP uses : RTP, DNS...),
perform the user->kernel copy before route lookup, so that we can
perform an RCU route lookup. This would tremendously speedup UDP, since
the refcount handling is our last hot spot (not counting qdisc if
present)

   PerfTop:   16142 irqs/sec  kernel:97.5%  exact:  0.0% [1000Hz cycles],  (all, 16 CPUs)
-----------------------------------------------------------------------------------------------------------

             samples  pcnt function                 DSO
             _______ _____ ________________________ ______________________

            16735.00 24.2% __ip_route_output_key    vmlinux               
             9706.00 14.1% dst_release              vmlinux               
             6754.00  9.8% __ip_make_skb            vmlinux               
             5737.00  8.3% udp_send_skb             vmlinux               
             5384.00  7.8% ip_finish_output         vmlinux               
             3578.00  5.2% udp_sendmsg              vmlinux               
             1435.00  2.1% copy_user_generic_string vmlinux               
             1358.00  2.0% ia32_sysenter_target     vmlinux               
             1095.00  1.6% __ip_append_data         vmlinux               
              832.00  1.2% kfree                    vmlinux               
              794.00  1.2% __memset                 vmlinux               
              677.00  1.0% fget_light               vmlinux               
              641.00  0.9% sock_wfree               vmlinux               
              637.00  0.9% dev_queue_xmit           vmlinux               



^ permalink raw reply

* Re: [PATCH 2/2] ipv4: Initialize on-stack cork more efficiently.
From: David Miller @ 2011-05-07  5:21 UTC (permalink / raw)
  To: eric.dumazet; +Cc: netdev, herbert
In-Reply-To: <1304745104.2821.590.camel@edumazet-laptop>

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sat, 07 May 2011 07:11:44 +0200

> By the way, when I spotted this "struct inet_cork cork = {};" to be
> optimized, my idea was to add yet another case of fastpath to UDP send :
> 
> For small datagrams (most UDP uses : RTP, DNS...),
> perform the user->kernel copy before route lookup, so that we can
> perform an RCU route lookup. This would tremendously speedup UDP, since
> the refcount handling is our last hot spot (not counting qdisc if
> present)

Interesting idea.

This reminds me, remember about the input noref route lookup stuff
going away with the routing cache removal?  It turns out that when we
do my "routes embedded in fib nexthop" for input, the noref stuff can
be used. :)


^ permalink raw reply

* RE: [RFC v2] virtio: add virtio-over-PCI driver
From: Kushwaha Prabhakar-B32579 @ 2011-05-07  5:59 UTC (permalink / raw)
  To: Ira W. Snyder
  Cc: Zang Roy-R61911, Gala Kumar-B11780, Gupta Maneesh-B18878,
	Aggrwal Poonam-B10812, Kalra Ashish-B00888,
	linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org,
	netdev@vger.kernel.org, linuxppc-dev@ozlabs.org
In-Reply-To: <20110506160627.GB14069@ovro.caltech.edu>

Thanks Ira for your kind reply.
I will look for the mentioned pointers :) 


Prabhakar 

> -----Original Message-----
> From: Ira W. Snyder [mailto:iws@ovro.caltech.edu]
> Sent: Friday, May 06, 2011 9:36 PM
> To: Kushwaha Prabhakar-B32579
> Cc: Zang Roy-R61911; Gala Kumar-B11780; Gupta Maneesh-B18878; Aggrwal
> Poonam-B10812; Kalra Ashish-B00888; linux-kernel@vger.kernel.org;
> linuxppc-dev@ozlabs.org; netdev@vger.kernel.org
> Subject: Re: [RFC v2] virtio: add virtio-over-PCI driver
> 
> On Fri, May 06, 2011 at 12:00:34PM +0000, Kushwaha Prabhakar-B32579
> wrote:
> > Hi,
> >
> > I want to use this patch as base patch for "FSL 85xx platform" to
> support PCIe Agent.
> > The work looks to be little old now. So wanted to understand if any
> development has happened further on it.
> >
> > In case no, I would take this work forward for PCIe Agent.
> >
> > Any help/suggestions are most appreciated in this regard.
> >
> 
> Hi Prabhakar,
> 
> I use PCI agent mode on an mpc8349emds board. All of the important setup
> is done very early in the boot process, by U-Boot. Search the U-Boot
> source for CONFIG_PCISLAVE. I hunch that the setup needed for 85xx boards
> are similar.
> 
> This virtio-over-PCI work is now very old. It was intended to provide a
> communication mechanism between a PCI Master and many PCI Agents
> (slaves).
> Dave Miller (networking maintainer) suggested to use virtio for this so
> that many different devices could be used. Such as:
> - network interface
> - serial port (for serial console)
> 
> I am aware of other ongoing work in this area. Specifically, some ARM
> developers are working on a virtio API using their message registers.
> This work is much newer, and will be a much better starting place for
> you.
> 
> Search the virtualization mailing list for:
> "[PATCH 00/02] virtio: Virtio platform driver"
> 
> Here is a link to some of their code:
> http://www.spinics.net/lists/linux-sh/msg07188.html
> 
> I am currently using a custom driver to provide a network device on my
> PCI agents. Searching the mailing list archives for "PCINet", you will
> find early versions of the driver. I am happy to provide you a current
> copy. It does not use virtio at all, and is unlikely to be accepted into
> mainline Linux.
> 
> I am happy to provide any of my code if you think it would help you get
> started. Specifically, the current version of "PCINet" show how to use
> the DMA controller in order to get good network performance. I am also
> happy to help port code to 83xx, as well as test on 83xx. Please ask any
> questions you may have.
> 
> I have people ask about this code about once every two months. There is
> plenty of interest in a mainline Linux solution to this problem. :) I
> will be moving to 85xx someday, and I hope there is an accepted mainline
> solution by then.
> 
> I hope it helps,
> Ira
> 
> > -----Original Message-----
> > From: linux-kernel-owner@vger.kernel.org
> > [mailto:linux-kernel-owner@vger.kernel.org] On Behalf Of Ira Snyder
> > Sent: Friday, 27 February, 2009 3:19 AM
> > To: Arnd Bergmann
> > Cc: linux-kernel@vger.kernel.org; Rusty Russell; Jan-Bernd Themann;
> > linuxppc-dev@ozlabs.org; netdev@vger.kernel.org
> > Subject: Re: [RFC v2] virtio: add virtio-over-PCI driver
> >
> > On Thu, Feb 26, 2009 at 09:37:14PM +0100, Arnd Bergmann wrote:
> > > On Thursday 26 February 2009, Ira Snyder wrote:
> > > > On Thu, Feb 26, 2009 at 05:15:27PM +0100, Arnd Bergmann wrote:
> > > >
> > > > I think so too. I was just getting something working, and thought
> > > > it would be better to have it "out there" rather than be working
> > > > on it forever. I'll try to break things up as I have time.
> > >
> > > Ok, perfect!
> > >
> > > > For the "libraries", would you suggest breaking things into
> > > > seperate code files, and using EXPORT_SYMBOL_GPL()? I'm not very
> > > > familiar with doing that, I've mostly been writing code within the
> > > > existing device driver frameworks. Or do I need export symbol at
> all? I'm not sure...
> > >
> > > You have both options. When you list each file as a separate module
> > > in the Makefile, you use EXPORT_SYMBOL_GPL to mark functions that
> > > get called by dependent modules, but this will work only in one way.
> > >
> > > You can also link multiple files together into one module, although
> > > it is less common to link a single source file into multiple modules.
> > >
> >
> > Ok. I'm more familiar with the EXPORT_SYMBOL_GPL interface, so I'll do
> that. If we decide it sucks later, we'll change it.
> >
> > > > I always thought you were supposed to use packed for data
> > > > structures that are external to the system. I purposely designed
> > > > the structures so they wouldn't need padding.
> > >
> > > That would only make sense for structures that are explicitly
> > > unaligned, like a register layout using
> > >
> > > struct my_registers {
> > > 	__le16 first;
> > > 	__le32 second __attribute__((packed));
> > > 	__le16 third;
> > > };
> > >
> > > Even here, I'd recommend listing the individual members as packed
> > > rather than the entire struct. Obviously if you layout the members
> > > in a sane way, you don't need either.
> > >
> >
> > Ok. I'll drop the __attribute__((packed)) and make sure there aren't
> problems. I don't suspect any, though.
> >
> > > > I mostly don't need it. In fact, the only place I'm using
> > > > registers not specific to the messaging unit is in the probe
> > > > routine, where I setup the 1GB window into host memory and setting
> > > > up access to the guest memory on the PCI bus.
> > >
> > > You could add the registers you need for this to the "reg" property
> > > of your device, to be mapped with of_iomap.
> > >
> > > If the registers for setting up this window don't logically fit into
> > > the same device as the one you already use, the cleanest solution
> > > would be to have another device just for this and then make a
> > > function call into that driver to set up the window.
> > >
> >
> > The registers are part of the board control registers. They don't fit
> at all in the message unit. Doing this in the bootloader seems like a
> logical place, but that would require any testers to flash a new U-Boot
> image into their mpc8349emds boards.
> >
> > The first set of access is used to set up a 1GB region in the memory
> map that accesses the host's memory. Any reads/writes to addresses
> 0x80000000-0xc0000000 actually hit the host's memory.
> >
> > The last access sets up PCI BAR1 to hit the memory from
> dma_alloc_coherent(). The bootloader already sets up the window as 16K,
> it just doesn't point it anywhere. Maybe this /should/ go into the
> bootloader. Like above, it would require testers to flash a new U-Boot
> image into their mpc8349emds boards.
> >
> > > > Now, I wouldn't need to access these registers at all if the
> > > > bootloader could handle it. I just don't know if it is possible to
> > > > have Linux not use some memory that the bootloader allocated,
> > > > other than with the mem=XXX trick, which I'm sure wouldn't be
> acceptable.
> > > > I've just used regular RAM so this is portable to my custom board
> > > > (mpc8349emds based) and a regular mpc8349emds. I didn't want to
> > > > change anything board specific.
> > > >
> > > > I would love to have the bootloader allocate (or reserve somewhere
> > > > in the memory map) 16K of RAM, and not be required to allocate it
> > > > with dma_alloc_coherent(). It would save me plenty of headaches.
> > >
> > > I believe you can do that through the "memory" devices in the device
> > > tree, by leaving out a small part of the description of main memory,
> > > at putting it into the "reg" property of your own device.
> > >
> >
> > I'll explore this option. I didn't even know you could do this.  Is a
> driver that requires the trick acceptable for mainline inclusion? Just
> like setting up the 16K PCI window, this is very platform specific.
> >
> > This limits the guest driver to systems which are able to change
> Linux's view of their memory somehow. Maybe this isn't a problem.
> >
> > > > Code complexity only. Also, it was easier to write 80-char lines
> > > > with something like:
> > > >
> > > > vop_get_desc(vq, idx, &desc);
> > > > if (desc.flags & VOP_DESC_F_NEXT) {
> > > > 	/* do something */
> > > > }
> > > >
> > > > Instead of:
> > > > if (le16_to_cpu(vq->desc[idx].flags) & VOP_DESC_F_NEXT) {
> > > > 	/* do something */
> > > > }
> > > >
> > > > Plus, I didn't have to remember how many bits were in each field.
> > > > I just thought it made everything simpler to understand.
> Suggestions?
> > >
> > > hmm, in this particular case, you could change the definition of
> > > VOP_DESC_F_NEXT to
> > >
> > > #define VOP_DESC_F_NEXT cpu_to_le16(1)
> > >
> > > and then do the code as the even simpler (source and object code
> > > wise)
> > >
> > > if (vq->desc[idx].flags) & VOP_DESC_F_NEXT)
> > >
> > > I'm not sure if you can do something along these lines for the other
> > > cases as well though.
> > >
> >
> > That's a good idea. It wouldn't fix the addresses, lengths, and next
> fields, though. I'll make the change and see how bad it is, then report
> back. It may not be so bad after all.
> >
> > > > I used 3 so they would would align to 1024 byte boundaries within
> > > > a 4K page. Then the layout was 16K on the bus, each 4K page is a
> > > > single virtio-device, and each 1K block is a single virtqueue. The
> > > > first 1K is for virtio-device status and feature bits, etc.
> > > >
> > > > Packing them differently isn't a problem. It was just easier to
> > > > code because setting up a window with the correct size is so
> > > > platform specific.
> > >
> > > Ok. I guess the important question is what part of the code makes
> > > this decision. Ideally, the virtio-net glue would instantiate the
> > > device with the right number of queues.
> > >
> >
> > Yeah, virtio doesn't work that way.
> >
> > The virtio drivers just call find_vq() with a different index for each
> queue they want to use. You have no way of knowing how many queues each
> virtio driver will want, unless you go read their source code.
> >
> > virtio-net currently uses 3 queues, but we only support the first two.
> > The third is optional (for now...), and non-symmetric.
> >
> > Thanks again,
> > Ira
> > --
> > To unsubscribe from this list: send the line "unsubscribe
> > linux-kernel" in the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > Please read the FAQ at  http://www.tux.org/lkml/
> >
> >
> >
> >



^ permalink raw reply

* [PATCHv5 2/2] tg3: Allow ethtool to enable/disable loopback.
From: Mahesh Bandewar @ 2011-05-07  6:18 UTC (permalink / raw)
  To: Matt Carlson, David Miller
  Cc: netdev, Michael Chan, Tom Herbert, Michał Mirosław,
	Mahesh Bandewar
In-Reply-To: <1304559247-16111-1-git-send-email-maheshb@google.com>

This patch adds tg3_set_features() to handle loopback mode. Currently the
capability is added for the devices which support internal MAC loopback mode.
So when enabled, it enables internal-MAC loopback.

Signed-off-by: Mahesh Bandewar <maheshb@google.com>
---
Changes since v4:
 (a) Added TG3_FLAG_LOOPBACK_ENABLED flag to keep loopback state in driver's 
     private data-structure.
 (b) Corrected the loopback implementation by using tp->mac_mode.
 (c) Forced Link up when in loopback mode. 

Changes since v3:
 (a) Corrected the condition (|| => &&) where loopback capability is added.
 (b) set_features() always returns 0.
 (c) Clear the loopback bit in ndo_open callback to avoid discrepancy.

Changes since v2:
 Implemtned Joe Perches's style change.

Changes since v1:
 Implemented Matt Carlson's suggestions.
  (a) Enable this capability on the devices which are capable of MAC-loopback
      mode.
  (b) check if the device is running before making changes.
  (c) check bits before making changes.


 drivers/net/tg3.c |   78 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 drivers/net/tg3.h |    1 +
 2 files changed, 77 insertions(+), 2 deletions(-)

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 7c7c9a8..b7270c2 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -3373,8 +3373,8 @@ relink:
 		tg3_phy_copper_begin(tp);
 
 		tg3_readphy(tp, MII_BMSR, &bmsr);
-		if (!tg3_readphy(tp, MII_BMSR, &bmsr) &&
-		    (bmsr & BMSR_LSTATUS))
+		if ((!tg3_readphy(tp, MII_BMSR, &bmsr) && (bmsr & BMSR_LSTATUS)) ||
+		    (tp->mac_mode & MAC_MODE_PORT_INT_LPBACK))
 			current_link_up = 1;
 	}
 
@@ -6309,6 +6309,43 @@ dma_error:
 	return NETDEV_TX_OK;
 }
 
+static void tg3_set_loopback(struct net_device *dev)
+{
+	struct tg3 *tp = netdev_priv(dev);
+
+	if (tg3_flag(tp, LOOPBACK_ENABLED)) {
+		if (tp->mac_mode & MAC_MODE_PORT_INT_LPBACK)
+			return;
+
+		/*
+		 * Clear MAC_MODE_HALF_DUPLEX or you won't get packets back in
+		 * loopback mode if Half-Duplex mode was negotiated earlier.
+		 */
+		tp->mac_mode &= ~MAC_MODE_HALF_DUPLEX;
+
+		/* Enable internal MAC loopback mode */
+		tp->mac_mode |= MAC_MODE_PORT_INT_LPBACK;
+		spin_lock_bh(&tp->lock);
+		tw32(MAC_MODE, tp->mac_mode);
+		netif_carrier_on(tp->dev);
+		spin_unlock_bh(&tp->lock);
+		netdev_info(dev, "Internal MAC loopback mode enabled.\n");
+	} else {
+		if (!(tp->mac_mode & MAC_MODE_PORT_INT_LPBACK))
+			return;
+
+		/* Disable internal MAC loopback mode */
+		tp->mac_mode &= ~MAC_MODE_PORT_INT_LPBACK;
+		spin_lock_bh(&tp->lock);
+		tw32(MAC_MODE, tp->mac_mode);
+		/* Force link status check */
+		tg3_setup_phy(tp, 1);
+		spin_unlock_bh(&tp->lock);
+		netdev_info(dev, "Internal MAC loopback mode disabled.\n");
+	}
+
+}
+
 static u32 tg3_fix_features(struct net_device *dev, u32 features)
 {
 	struct tg3 *tp = netdev_priv(dev);
@@ -6319,6 +6356,24 @@ static u32 tg3_fix_features(struct net_device *dev, u32 features)
 	return features;
 }
 
+static int tg3_set_features(struct net_device *dev, u32 features)
+{
+	struct tg3 *tp = netdev_priv(dev);
+	u32 changed = dev->features ^ features;
+
+	if (changed & NETIF_F_LOOPBACK) {
+		if (tg3_flag(tp, LOOPBACK_ENABLED))
+			tg3_flag_clear(tp, LOOPBACK_ENABLED);
+		else
+			tg3_flag_set(tp, LOOPBACK_ENABLED);
+
+		if (netif_running(dev))
+			tg3_set_loopback(dev);
+	}
+
+	return 0;
+}
+
 static inline void tg3_set_mtu(struct net_device *dev, struct tg3 *tp,
 			       int new_mtu)
 {
@@ -9485,6 +9540,13 @@ static int tg3_open(struct net_device *dev)
 
 	netif_tx_start_all_queues(dev);
 
+	/*
+	 * Reset loopback feature if it was turned on while the device was down
+	 * make sure that it's installed properly now.
+	 */
+	if (tg3_flag(tp, LOOPBACK_ENABLED))
+		tg3_set_loopback(dev);
+
 	return 0;
 
 err_out3:
@@ -15029,6 +15091,7 @@ static const struct net_device_ops tg3_netdev_ops = {
 	.ndo_tx_timeout		= tg3_tx_timeout,
 	.ndo_change_mtu		= tg3_change_mtu,
 	.ndo_fix_features	= tg3_fix_features,
+	.ndo_set_features	= tg3_set_features,
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller	= tg3_poll_controller,
 #endif
@@ -15045,6 +15108,7 @@ static const struct net_device_ops tg3_netdev_ops_dma_bug = {
 	.ndo_do_ioctl		= tg3_ioctl,
 	.ndo_tx_timeout		= tg3_tx_timeout,
 	.ndo_change_mtu		= tg3_change_mtu,
+	.ndo_set_features	= tg3_set_features,
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller	= tg3_poll_controller,
 #endif
@@ -15242,6 +15306,16 @@ static int __devinit tg3_init_one(struct pci_dev *pdev,
 	dev->features |= hw_features;
 	dev->vlan_features |= hw_features;
 
+	/*
+	 * Add loopback capability only for a subset of devices that support
+	 * MAC-LOOPBACK. Eventually this need to be enhanced to allow INT-PHY
+	 * loopback for the remaining devices.
+	 */
+	if (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5780 &&
+	    !tg3_flag(tp, CPMU_PRESENT))
+		/* Add the loopback capability */
+		dev->hw_features |= NETIF_F_LOOPBACK;
+
 	if (tp->pci_chip_rev_id == CHIPREV_ID_5705_A1 &&
 	    !tg3_flag(tp, TSO_CAPABLE) &&
 	    !(tr32(TG3PCI_PCISTATE) & PCISTATE_BUS_SPEED_HIGH)) {
diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h
index ce010cd3..d087ef0 100644
--- a/drivers/net/tg3.h
+++ b/drivers/net/tg3.h
@@ -2891,6 +2891,7 @@ enum TG3_FLAGS {
 	TG3_FLAG_57765_PLUS,
 	TG3_FLAG_APE_HAS_NCSI,
 	TG3_FLAG_5717_PLUS,
+	TG3_FLAG_LOOPBACK_ENABLED,
 
 	/* Add new flags before this comment and TG3_FLAG_NUMBER_OF_FLAGS */
 	TG3_FLAG_NUMBER_OF_FLAGS,	/* Last entry in enum TG3_FLAGS */
-- 
1.7.3.1


^ permalink raw reply related

* Re: [PATCH 0/7] Network namespace manipulation with file descriptors
From: Alex Bligh @ 2011-05-07  6:58 UTC (permalink / raw)
  To: Eric W. Biederman, linux-arch-u79uwXL29TY76Z2rM5mHXA
  Cc: Linux Containers, netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <m1tyd7p7tq.fsf-+imSwln9KH6u2/kzUuoCbdi2O/JbrIOy@public.gmane.org>



--On 6 May 2011 19:23:29 -0700 "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org> 
wrote:

> This patchset addresses the user interface limitations by introducing
> proc files you can open to get file descriptors that keep alive and
> refer to your a tasks namespaces.  Those file descriptors can be passed
> to the new setns system call or the NET_NS_FD argument in netlink
> messages.

This is conceptually very interesting. I am one of those people you
describe with a routing daemon (or more accurately a wrapper around
existing daemons) that does the unshare() and keeps the network
alive. It also has a control socket etc.

You say:
> This addresses three specific problems that can make namespaces hard to
> work with.
> - Namespaces require a dedicated process to pin them in memory.
> - It is not possible to use a namespace unless you are the child
>   of the original creator.
> - Namespaces don't have names that userspace can use to talk about
>   them.

At least for me, the best way to solve the second blob would be to
be able to unshare to an existing namespace. That way I would be able
to run a daemon (without modification) in a pre-existing namespace.
The user interface here would just be an option to 'unshare'. I
don't think your patch allows this, does it? Right now I'm effectively
doing that by causing the pid concerned to fork() and do my bidding,
but that is far from perfect.

As a secondary issue, ever without your patch, it would be really
useful to be able to read from userspace the current network namespace.
(i.e. the pid concerned, or 1 if not unshared). I would like to
simply modify a routing daemon's init script so it doesn't start
if in the host, e.g. at the top:
  [ `cat /proc/.../networknamespace` eq 1 ] && exit 0

-- 
Alex Bligh

^ permalink raw reply

* Re: [PATCH 0/4] [RFC] virtio-net: Improve small packet performance
From: Krishna Kumar2 @ 2011-05-07  7:15 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: davem, eric.dumazet, kvm, netdev, rusty
In-Reply-To: <20110505153413.GC1915@redhat.com>

"Michael S. Tsirkin" <mst@redhat.com> wrote on 05/05/2011 09:04:13 PM:

> > I haven't tuned the threshhold, it is left it at 3/4. I ran
> > the new qemu/vhost/guest, and the results for 1K, 2K and 16K
> > are below. Note this is a different kernel version from my
> > earlier test results. So, f.e., BW1 represents 2.6.39-rc2,
> > the original kernel; while BW2 represents 2.6.37-rc5 (MST's
> > kernel).
>
> Weird. My kernel is actually 2.6.39-rc2. So which is which?

I cloned git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git

# git branch -a
  vhost
* vhost-net-next-event-idx-v1
  remotes/origin/HEAD -> origin/vhost
  remotes/origin/for-linus
  remotes/origin/master
  remotes/origin/net-2.6
  remotes/origin/vhost
  remotes/origin/vhost-broken
  remotes/origin/vhost-devel
  remotes/origin/vhost-mrg-rxbuf
  remotes/origin/vhost-net
  remotes/origin/vhost-net-next
  remotes/origin/vhost-net-next-event-idx-v1
  remotes/origin/vhost-net-next-rebased
  remotes/origin/virtio-layout-aligned
  remotes/origin/virtio-layout-minimal
  remotes/origin/virtio-layout-original
  remotes/origin/virtio-layout-padded
  remotes/origin/virtio-publish-used

# git checkout vhost-net-next-event-idx-v1
Already on 'vhost-net-next-event-idx-v1'

# head -4 Makefile
VERSION = 2
PATCHLEVEL = 6
SUBLEVEL = 37
EXTRAVERSION = -rc5

I am not sure what I am missing.

thanks,

- KK


^ permalink raw reply

* Re: [PATCH] bonding: convert to ndo_fix_features
From: Michał Mirosław @ 2011-05-07  7:37 UTC (permalink / raw)
  To: Jay Vosburgh; +Cc: netdev, Andy Gospodarek
In-Reply-To: <5525.1304705884@death>

On Fri, May 06, 2011 at 11:18:04AM -0700, Jay Vosburgh wrote:
> Michał Mirosław <mirq-linux@rere.qmqm.pl> wrote:
> >This should also fix updating of vlan_features and propagating changes to
> >VLAN devices on the bond.
> >
> >Side effect: it allows user to force-disable some offloads on the bond
> >interface.
> >
> >Note: NETIF_F_VLAN_CHALLENGED is managed by bond_fix_features() now.
> >
> >BTW, What are the problems in creating VLAN devices on an empty bond
> >(as stated in one of bond_setup() comments)?
> 	If there are no slaves, then the bond does not have a MAC
> address assigned (because it gets its initial MAC from the first slave).
> It's therefore impossible to pass a MAC address up to the VLAN
> interface.
> 
> 	So the limitation is that the bond must have at least one slave
> before a VLAN may be configured above it.

Hmm. That might be worked aroud by generating random MAC then. This would
allow the user to first set a new MAC, create VLANs and then add slaves
when they show up.

Best Regards,
Michał Mirosław

^ permalink raw reply

* Re: [PATCHv5 2/2] tg3: Allow ethtool to enable/disable loopback.
From: Michał Mirosław @ 2011-05-07  7:43 UTC (permalink / raw)
  To: Mahesh Bandewar
  Cc: Matt Carlson, David Miller, netdev, Michael Chan, Tom Herbert
In-Reply-To: <1304749117-1989-1-git-send-email-maheshb@google.com>

On Fri, May 06, 2011 at 11:18:37PM -0700, Mahesh Bandewar wrote:
> This patch adds tg3_set_features() to handle loopback mode. Currently the
> capability is added for the devices which support internal MAC loopback mode.
> So when enabled, it enables internal-MAC loopback.
[...]
> diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
> index 7c7c9a8..b7270c2 100644
> --- a/drivers/net/tg3.c
> +++ b/drivers/net/tg3.c
[...]
> @@ -6319,6 +6356,24 @@ static u32 tg3_fix_features(struct net_device *dev, u32 features)
>  	return features;
>  }
>  
> +static int tg3_set_features(struct net_device *dev, u32 features)
> +{
> +	struct tg3 *tp = netdev_priv(dev);
> +	u32 changed = dev->features ^ features;
> +
> +	if (changed & NETIF_F_LOOPBACK) {
> +		if (tg3_flag(tp, LOOPBACK_ENABLED))
> +			tg3_flag_clear(tp, LOOPBACK_ENABLED);
> +		else
> +			tg3_flag_set(tp, LOOPBACK_ENABLED);
> +
> +		if (netif_running(dev))
> +			tg3_set_loopback(dev);
> +	}
> +
> +	return 0;
> +}
> +
>  static inline void tg3_set_mtu(struct net_device *dev, struct tg3 *tp,
>  			       int new_mtu)
>  {
> @@ -9485,6 +9540,13 @@ static int tg3_open(struct net_device *dev)
>  
>  	netif_tx_start_all_queues(dev);
>  
> +	/*
> +	 * Reset loopback feature if it was turned on while the device was down
> +	 * make sure that it's installed properly now.
> +	 */
> +	if (tg3_flag(tp, LOOPBACK_ENABLED))
> +		tg3_set_loopback(dev);
> +
>  	return 0;
>  
>  err_out3:
[...]

So, you've just implemented what I said about enabling loopback at the end
of tg3_open(), but you also added (redundant) flag that mirrors
dev->features & NETIF_F_LOOPBACK. Why?

Best Regards,
Michał Mirosław

^ permalink raw reply

* Re: [PATCH] hamachi: Delete TX checksumming code commented out since 1999
From: Michał Mirosław @ 2011-05-07  7:59 UTC (permalink / raw)
  To: David Miller; +Cc: netdev
In-Reply-To: <20110506.115945.104066424.davem@davemloft.net>

2011/5/6 David Miller <davem@davemloft.net>:
>
> TX checksumming support has been ifdef commented out of this driver
> for more than 10 years, and it makes references to aspects of the IPv4
> stack from back then as well.
>
> If someone has one of these rare cards and wants to properly resurrect
> TX checksumming support, they can still get at this code in the
> version control history.
>
> Signed-off-by: David S. Miller <davem@davemloft.net>
> ---
>
> I stumbled over this cruft while auditing ip_queue_xmit() users.
>
>  drivers/net/hamachi.c |   79 -------------------------------------------------
>  1 files changed, 0 insertions(+), 79 deletions(-)
>
> diff --git a/drivers/net/hamachi.c b/drivers/net/hamachi.c
> index 80d25ed..f5fba73 100644
> --- a/drivers/net/hamachi.c
> +++ b/drivers/net/hamachi.c
> @@ -132,14 +132,8 @@ static int tx_params[MAX_UNITS] = {-1, -1, -1, -1, -1, -1, -1, -1};
>  /*
>  * RX_CHECKSUM turns on card-generated receive checksum generation for
>  *   TCP and UDP packets.  Otherwise the upper layers do the calculation.
> - * TX_CHECKSUM won't do anything too useful, even if it works.  There's no
> - *   easy mechanism by which to tell the TCP/UDP stack that it need not
> - *   generate checksums for this device.  But if somebody can find a way
> - *   to get that to work, most of the card work is in here already.
>  * 3/10/1999 Pete Wyckoff <wyckoff@ca.sandia.gov>
>  */
> -#undef  TX_CHECKSUM
> -#define RX_CHECKSUM

This also disables RX checksumming. You wanted to leave '#define
RX_CHECKSUM' in place?

Best Regards,
Michał Mirosław

^ permalink raw reply

* Re: [PATCH 2/7] ns: Introduce the setns syscall
From: Rémi Denis-Courmont @ 2011-05-07  8:01 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: linux-arch-u79uwXL29TY76Z2rM5mHXA, netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Linux Containers,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <1304735101-1824-2-git-send-email-ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>

Le samedi 7 mai 2011 05:24:56 Eric W. Biederman, vous avez écrit :
> Pieces of this puzzle can also be solved by instead of
> coming up with a general purpose system call coming up
> with targed system calls perhaps socketat that solve
> a subset of the larger problem.  Overall that appears
> to be more work for less reward.

socketat() is still required for multithreaded namespace-aware userspace, I 
believe.

-- 
Rémi Denis-Courmont
http://www.remlab.info/
http://fi.linkedin.com/in/remidenis

^ permalink raw reply

* Re: [PATCH] hamachi: Delete TX checksumming code commented out since 1999
From: David Miller @ 2011-05-07  8:02 UTC (permalink / raw)
  To: mirqus; +Cc: netdev
In-Reply-To: <BANLkTi=ZeY6X1XOdK2UXsODNBqzbbG+4KQ@mail.gmail.com>

From: Michał Mirosław <mirqus@gmail.com>
Date: Sat, 7 May 2011 09:59:28 +0200

> This also disables RX checksumming. You wanted to leave '#define
> RX_CHECKSUM' in place?

Oops, I'll fix that, thanks.

^ permalink raw reply

* Re: [PATCH 7/7] ns: Wire up the setns system call
From: Geert Uytterhoeven @ 2011-05-07  8:27 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: linux-arch, linux-kernel, netdev, linux-fsdevel, jamal,
	Daniel Lezcano, Linux Containers, Renato Westphal
In-Reply-To: <1304735101-1824-7-git-send-email-ebiederm@xmission.com>

On Sat, May 7, 2011 at 04:25, Eric W. Biederman <ebiederm@xmission.com> wrote:
>  arch/m68k/include/asm/unistd.h         |    3 ++-
>  arch/m68k/kernel/syscalltable.S        |    1 +

As the unified syscalltable for m68k/m68knommu is not yet in mainline
(planned for
2.6.40), you should also add it to arch/m68k/kernel/entry_mm.S.

Gr{oetje,eeting}s,

                        Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds

^ permalink raw reply

* Re: [Bugme-new] [Bug 34322] New: No ECN marking in IPv6
From: Eric Dumazet @ 2011-05-07  9:44 UTC (permalink / raw)
  To: Steinar H. Gunderson, David Miller
  Cc: Andrew Morton, netdev, bugzilla-daemon, bugme-daemon,
	YOSHIFUJI Hideaki
In-Reply-To: <20110506171249.GA29942@uio.no>

From: Steinar H. Gunderson <sgunderson@bigfoot.com>

Le vendredi 06 mai 2011 à 19:12 +0200, Steinar H. Gunderson a écrit :

> Sure, but is really checking against NULL the right way of checking for IPv6
> sockets? I'd imagined I should have checked address family or something
> instead...
> 

It should be fine.

I cooked for you the official patch and made sure it worked with a RED
ECN setup, and one ipv6 tcp xmit.

# tc -s -d qdisc show dev eth1
...
qdisc red 11: parent 1:11 limit 120Kb min 8Kb max 80Kb ecn ewma 2 Plog 21 Scell_log 11
 Sent 114694826 bytes 76446 pkt (dropped 15, overlimits 485 requeues 0) 
 rate 12126Kbit 1011pps backlog 0b 0p requeues 0 
  marked 470 early 15 pdrop 0 other 0


Thanks again !

[PATCH] ipv6: restore correct ECN handling on TCP xmit

Since commit e9df2e8fd8fbc9 (Use appropriate sock tclass setting for
routing lookup) we lost ability to properly add ECN codemarks to ipv6
TCP frames.

It seems like TCP_ECN_send() calls INET_ECN_xmit(), which only sets the
ECN bit in the IPv4 ToS field (inet_sk(sk)->tos), but after the patch,
what's checked is inet6_sk(sk)->tclass, which is a completely different
field.

Close bug https://bugzilla.kernel.org/show_bug.cgi?id=34322

[Eric Dumazet] : added the INET_ECN_dontxmit() fix and replace macros
by inline functions for clarity.

Signed-off-by: Steinar H. Gunderson <sgunderson@bigfoot.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
---
 include/net/inet_ecn.h |   16 +++++++++++++---
 1 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h
index 88bdd01..2fa8d13 100644
--- a/include/net/inet_ecn.h
+++ b/include/net/inet_ecn.h
@@ -38,9 +38,19 @@ static inline __u8 INET_ECN_encapsulate(__u8 outer, __u8 inner)
 	return outer;
 }
 
-#define	INET_ECN_xmit(sk) do { inet_sk(sk)->tos |= INET_ECN_ECT_0; } while (0)
-#define	INET_ECN_dontxmit(sk) \
-	do { inet_sk(sk)->tos &= ~INET_ECN_MASK; } while (0)
+static inline void INET_ECN_xmit(struct sock *sk)
+{
+	inet_sk(sk)->tos |= INET_ECN_ECT_0;
+	if (inet6_sk(sk) != NULL)
+		inet6_sk(sk)->tclass |= INET_ECN_ECT_0;
+}
+
+static inline void INET_ECN_dontxmit(struct sock *sk)
+{
+	inet_sk(sk)->tos &= ~INET_ECN_MASK;
+	if (inet6_sk(sk) != NULL)
+		inet6_sk(sk)->tclass &= ~INET_ECN_MASK;
+}
 
 #define IP6_ECN_flow_init(label) do {		\
       (label) &= ~htonl(INET_ECN_MASK << 20);	\



^ permalink raw reply related

* Re: [Bugme-new] [Bug 34322] New: No ECN marking in IPv6
From: Steinar H. Gunderson @ 2011-05-07  9:59 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David Miller, Andrew Morton, netdev, bugzilla-daemon,
	bugme-daemon, YOSHIFUJI Hideaki
In-Reply-To: <1304761486.2821.945.camel@edumazet-laptop>

On Sat, May 07, 2011 at 11:44:46AM +0200, Eric Dumazet wrote:
> I cooked for you the official patch and made sure it worked with a RED
> ECN setup, and one ipv6 tcp xmit.

Great, thanks :-) This looks good to me.

/* Steinar */
-- 
Homepage: http://www.sesse.net/

^ permalink raw reply

* [net-next-2.6 0/5][pull request] Intel Wired LAN Driver Update
From: Jeff Kirsher @ 2011-05-07 10:25 UTC (permalink / raw)
  To: davem; +Cc: Jeff Kirsher, netdev, gospo, bphilips

The following series contains updates to e100, e1000, igb and ixgbe.

Sorry for the delay on the e100/e1000/igb convert to set_phys_id patches,
it was due to me falling ill and not completing the patches in a timely
manner.

The following are changes since commit 706527280ec38fcdcd0466f10b607105fd23801b:
  ipv4: Initialize cork->opt using NULL not 0
and are available in the git repository at:
  master.kernel.org:/pub/scm/linux/kernel/git/jkirsher/net-next-2.6 master

Emil Tantilov (2):
  ixgbe: add rxhash support
  ixgbe: add ethtool counters for OS2BMC

Jeff Kirsher (3):
  e100: implemenet set_phys_id
  e1000: convert to set_phys_id
  igb: convert to ethtool set_phys_id

 drivers/net/e100.c                |   66 +++++++++++++++---------------------
 drivers/net/e1000/e1000.h         |    3 --
 drivers/net/e1000/e1000_ethtool.c |   50 +++++++++-------------------
 drivers/net/igb/igb_ethtool.c     |   37 ++++++++++----------
 drivers/net/ixgbe/ixgbe_ethtool.c |   11 +++++-
 drivers/net/ixgbe/ixgbe_main.c    |   21 ++++++++++-
 drivers/net/ixgbe/ixgbe_type.h    |    8 ++++
 7 files changed, 100 insertions(+), 96 deletions(-)

-- 
1.7.4.4


^ permalink raw reply

* [net-next-2.6 1/5] e100: implemenet set_phys_id
From: Jeff Kirsher @ 2011-05-07 10:25 UTC (permalink / raw)
  To: davem; +Cc: Jeff Kirsher, netdev, gospo, bphilips, Stephen Hemminger
In-Reply-To: <1304763923-6839-1-git-send-email-jeffrey.t.kirsher@intel.com>

Based on the original patch from Stephen Hemminger.
Implement set_phys_id to control LED.

CC: Stephen Hemminger <shemminger@vyatta.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/e100.c |   66 ++++++++++++++++++++++------------------------------
 1 files changed, 28 insertions(+), 38 deletions(-)

diff --git a/drivers/net/e100.c b/drivers/net/e100.c
index 29f812d..e336c79 100644
--- a/drivers/net/e100.c
+++ b/drivers/net/e100.c
@@ -593,7 +593,6 @@ struct nic {
 	enum phy phy;
 	struct params params;
 	struct timer_list watchdog;
-	struct timer_list blink_timer;
 	struct mii_if_info mii;
 	struct work_struct tx_timeout_task;
 	enum loopback loopback;
@@ -618,7 +617,6 @@ struct nic {
 	u32 rx_tco_frames;
 	u32 rx_over_length_errors;
 
-	u16 leds;
 	u16 eeprom_wc;
 	__le16 eeprom[256];
 	spinlock_t mdio_lock;
@@ -2353,30 +2351,6 @@ err_clean_rx:
 #define E100_82552_LED_OVERRIDE 0x19
 #define E100_82552_LED_ON       0x000F /* LEDTX and LED_RX both on */
 #define E100_82552_LED_OFF      0x000A /* LEDTX and LED_RX both off */
-static void e100_blink_led(unsigned long data)
-{
-	struct nic *nic = (struct nic *)data;
-	enum led_state {
-		led_on     = 0x01,
-		led_off    = 0x04,
-		led_on_559 = 0x05,
-		led_on_557 = 0x07,
-	};
-	u16 led_reg = MII_LED_CONTROL;
-
-	if (nic->phy == phy_82552_v) {
-		led_reg = E100_82552_LED_OVERRIDE;
-
-		nic->leds = (nic->leds == E100_82552_LED_ON) ?
-		            E100_82552_LED_OFF : E100_82552_LED_ON;
-	} else {
-		nic->leds = (nic->leds & led_on) ? led_off :
-		            (nic->mac < mac_82559_D101M) ? led_on_557 :
-		            led_on_559;
-	}
-	mdio_write(nic->netdev, nic->mii.phy_id, led_reg, nic->leds);
-	mod_timer(&nic->blink_timer, jiffies + HZ / 4);
-}
 
 static int e100_get_settings(struct net_device *netdev, struct ethtool_cmd *cmd)
 {
@@ -2600,19 +2574,38 @@ static void e100_diag_test(struct net_device *netdev,
 	msleep_interruptible(4 * 1000);
 }
 
-static int e100_phys_id(struct net_device *netdev, u32 data)
+static int e100_set_phys_id(struct net_device *netdev,
+			    enum ethtool_phys_id_state state)
 {
 	struct nic *nic = netdev_priv(netdev);
+	enum led_state {
+		led_on     = 0x01,
+		led_off    = 0x04,
+		led_on_559 = 0x05,
+		led_on_557 = 0x07,
+	};
 	u16 led_reg = (nic->phy == phy_82552_v) ? E100_82552_LED_OVERRIDE :
-	              MII_LED_CONTROL;
+		MII_LED_CONTROL;
+	u16 leds = 0;
+
+	switch (state) {
+	case ETHTOOL_ID_ACTIVE:
+		return 2;
 
-	if (!data || data > (u32)(MAX_SCHEDULE_TIMEOUT / HZ))
-		data = (u32)(MAX_SCHEDULE_TIMEOUT / HZ);
-	mod_timer(&nic->blink_timer, jiffies);
-	msleep_interruptible(data * 1000);
-	del_timer_sync(&nic->blink_timer);
-	mdio_write(netdev, nic->mii.phy_id, led_reg, 0);
+	case ETHTOOL_ID_ON:
+		leds = (nic->phy == phy_82552_v) ? E100_82552_LED_ON :
+		       (nic->mac < mac_82559_D101M) ? led_on_557 : led_on_559;
+		break;
+
+	case ETHTOOL_ID_OFF:
+		leds = (nic->phy == phy_82552_v) ? E100_82552_LED_OFF : led_off;
+		break;
+
+	case ETHTOOL_ID_INACTIVE:
+		break;
+	}
 
+	mdio_write(netdev, nic->mii.phy_id, led_reg, leds);
 	return 0;
 }
 
@@ -2693,7 +2686,7 @@ static const struct ethtool_ops e100_ethtool_ops = {
 	.set_ringparam		= e100_set_ringparam,
 	.self_test		= e100_diag_test,
 	.get_strings		= e100_get_strings,
-	.phys_id		= e100_phys_id,
+	.set_phys_id		= e100_set_phys_id,
 	.get_ethtool_stats	= e100_get_ethtool_stats,
 	.get_sset_count		= e100_get_sset_count,
 };
@@ -2834,9 +2827,6 @@ static int __devinit e100_probe(struct pci_dev *pdev,
 	init_timer(&nic->watchdog);
 	nic->watchdog.function = e100_watchdog;
 	nic->watchdog.data = (unsigned long)nic;
-	init_timer(&nic->blink_timer);
-	nic->blink_timer.function = e100_blink_led;
-	nic->blink_timer.data = (unsigned long)nic;
 
 	INIT_WORK(&nic->tx_timeout_task, e100_tx_timeout_task);
 
-- 
1.7.4.4


^ permalink raw reply related

* [net-next-2.6 2/5] e1000: convert to set_phys_id
From: Jeff Kirsher @ 2011-05-07 10:25 UTC (permalink / raw)
  To: davem; +Cc: Jeff Kirsher, netdev, gospo, bphilips, Stephen Hemminger
In-Reply-To: <1304763923-6839-1-git-send-email-jeffrey.t.kirsher@intel.com>

Based on the original patch from Stephen Hemminger.
Convert to new LED control infrastucture and remove no longer
necessary bits.

CC: Stephen Hemminger <shemminger@vyatta.com>
Tested-by: Jeff Pieper <jeffrey.e.pieper@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/e1000/e1000.h         |    3 --
 drivers/net/e1000/e1000_ethtool.c |   50 ++++++++++++-------------------------
 2 files changed, 16 insertions(+), 37 deletions(-)

diff --git a/drivers/net/e1000/e1000.h b/drivers/net/e1000/e1000.h
index b1b23dd..8676899 100644
--- a/drivers/net/e1000/e1000.h
+++ b/drivers/net/e1000/e1000.h
@@ -238,9 +238,6 @@ struct e1000_adapter {
 	struct work_struct reset_task;
 	u8 fc_autoneg;
 
-	struct timer_list blink_timer;
-	unsigned long led_status;
-
 	/* TX */
 	struct e1000_tx_ring *tx_ring;      /* One per active queue */
 	unsigned int restart_queue;
diff --git a/drivers/net/e1000/e1000_ethtool.c b/drivers/net/e1000/e1000_ethtool.c
index 4fa727c..ec0fa42 100644
--- a/drivers/net/e1000/e1000_ethtool.c
+++ b/drivers/net/e1000/e1000_ethtool.c
@@ -1755,46 +1755,28 @@ static int e1000_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
 	return 0;
 }
 
-/* toggle LED 4 times per second = 2 "blinks" per second */
-#define E1000_ID_INTERVAL	(HZ/4)
-
-/* bit defines for adapter->led_status */
-#define E1000_LED_ON		0
-
-static void e1000_led_blink_callback(unsigned long data)
+static int e1000_set_phys_id(struct net_device *netdev,
+			     enum ethtool_phys_id_state state)
 {
-	struct e1000_adapter *adapter = (struct e1000_adapter *) data;
+	struct e1000_adapter *adapter = netdev_priv(netdev);
 	struct e1000_hw *hw = &adapter->hw;
 
-	if (test_and_change_bit(E1000_LED_ON, &adapter->led_status))
-		e1000_led_off(hw);
-	else
-		e1000_led_on(hw);
-
-	mod_timer(&adapter->blink_timer, jiffies + E1000_ID_INTERVAL);
-}
+	switch (state) {
+	case ETHTOOL_ID_ACTIVE:
+		e1000_setup_led(hw);
+		return 2;
 
-static int e1000_phys_id(struct net_device *netdev, u32 data)
-{
-	struct e1000_adapter *adapter = netdev_priv(netdev);
-	struct e1000_hw *hw = &adapter->hw;
+	case ETHTOOL_ID_ON:
+		e1000_led_on(hw);
+		break;
 
-	if (!data)
-		data = INT_MAX;
+	case ETHTOOL_ID_OFF:
+		e1000_led_off(hw);
+		break;
 
-	if (!adapter->blink_timer.function) {
-		init_timer(&adapter->blink_timer);
-		adapter->blink_timer.function = e1000_led_blink_callback;
-		adapter->blink_timer.data = (unsigned long)adapter;
+	case ETHTOOL_ID_INACTIVE:
+		e1000_cleanup_led(hw);
 	}
-	e1000_setup_led(hw);
-	mod_timer(&adapter->blink_timer, jiffies);
-	msleep_interruptible(data * 1000);
-	del_timer_sync(&adapter->blink_timer);
-
-	e1000_led_off(hw);
-	clear_bit(E1000_LED_ON, &adapter->led_status);
-	e1000_cleanup_led(hw);
 
 	return 0;
 }
@@ -1931,7 +1913,7 @@ static const struct ethtool_ops e1000_ethtool_ops = {
 	.set_tso                = e1000_set_tso,
 	.self_test              = e1000_diag_test,
 	.get_strings            = e1000_get_strings,
-	.phys_id                = e1000_phys_id,
+	.set_phys_id            = e1000_set_phys_id,
 	.get_ethtool_stats      = e1000_get_ethtool_stats,
 	.get_sset_count         = e1000_get_sset_count,
 	.get_coalesce           = e1000_get_coalesce,
-- 
1.7.4.4


^ permalink raw reply related

* [net-next-2.6 3/5] igb: convert to ethtool set_phys_id
From: Jeff Kirsher @ 2011-05-07 10:25 UTC (permalink / raw)
  To: davem; +Cc: Jeff Kirsher, netdev, gospo, bphilips, Stephen Hemminger
In-Reply-To: <1304763923-6839-1-git-send-email-jeffrey.t.kirsher@intel.com>

Based on patch from Stephen Hemminger.
Convert igb driver to use new set_phys_id ethtool interface.

CC: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/igb/igb_ethtool.c |   37 +++++++++++++++++++------------------
 1 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/drivers/net/igb/igb_ethtool.c b/drivers/net/igb/igb_ethtool.c
index 6e29634..fdc895e 100644
--- a/drivers/net/igb/igb_ethtool.c
+++ b/drivers/net/igb/igb_ethtool.c
@@ -1964,27 +1964,28 @@ static int igb_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
 /* bit defines for adapter->led_status */
 #define IGB_LED_ON		0
 
-static int igb_phys_id(struct net_device *netdev, u32 data)
+static int igb_set_phys_id(struct net_device *netdev,
+			   enum ethtool_phys_id_state state)
 {
 	struct igb_adapter *adapter = netdev_priv(netdev);
 	struct e1000_hw *hw = &adapter->hw;
-	unsigned long timeout;
 
-	timeout = data * 1000;
-
-	/*
-	 *  msleep_interruptable only accepts unsigned int so we are limited
-	 * in how long a duration we can wait
-	 */
-	if (!timeout || timeout > UINT_MAX)
-		timeout = UINT_MAX;
-
-	igb_blink_led(hw);
-	msleep_interruptible(timeout);
-
-	igb_led_off(hw);
-	clear_bit(IGB_LED_ON, &adapter->led_status);
-	igb_cleanup_led(hw);
+	switch (state) {
+	case ETHTOOL_ID_ACTIVE:
+		igb_blink_led(hw);
+		return 2;
+	case ETHTOOL_ID_ON:
+		igb_blink_led(hw);
+		break;
+	case ETHTOOL_ID_OFF:
+		igb_led_off(hw);
+		break;
+	case ETHTOOL_ID_INACTIVE:
+		igb_led_off(hw);
+		clear_bit(IGB_LED_ON, &adapter->led_status);
+		igb_cleanup_led(hw);
+		break;
+	}
 
 	return 0;
 }
@@ -2216,7 +2217,7 @@ static const struct ethtool_ops igb_ethtool_ops = {
 	.set_tso                = igb_set_tso,
 	.self_test              = igb_diag_test,
 	.get_strings            = igb_get_strings,
-	.phys_id                = igb_phys_id,
+	.set_phys_id            = igb_set_phys_id,
 	.get_sset_count         = igb_get_sset_count,
 	.get_ethtool_stats      = igb_get_ethtool_stats,
 	.get_coalesce           = igb_get_coalesce,
-- 
1.7.4.4


^ permalink raw reply related

* [net-next-2.6 4/5] ixgbe: add rxhash support
From: Jeff Kirsher @ 2011-05-07 10:25 UTC (permalink / raw)
  To: davem; +Cc: Emil Tantilov, netdev, gospo, bphilips, Jeff Kirsher
In-Reply-To: <1304763923-6839-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Emil Tantilov <emil.s.tantilov@intel.com>

feed RSS hash into skb->rxhash

Signed-off-by: Emil Tantilov <emil.s.tantilov@intel.com>
Tested-by: Evan Swanson <evan.swanson@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ixgbe/ixgbe_ethtool.c |    7 ++++++-
 drivers/net/ixgbe/ixgbe_main.c    |   14 +++++++++++++-
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_ethtool.c b/drivers/net/ixgbe/ixgbe_ethtool.c
index f2efa32..545b231 100644
--- a/drivers/net/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ixgbe/ixgbe_ethtool.c
@@ -2253,8 +2253,13 @@ static int ixgbe_set_flags(struct net_device *netdev, u32 data)
 	need_reset = (data & ETH_FLAG_RXVLAN) !=
 		     (netdev->features & NETIF_F_HW_VLAN_RX);
 
+	if ((data & ETH_FLAG_RXHASH) &&
+	    !(adapter->flags & IXGBE_FLAG_RSS_ENABLED))
+		return -EOPNOTSUPP;
+
 	rc = ethtool_op_set_flags(netdev, data, ETH_FLAG_LRO | ETH_FLAG_NTUPLE |
-					ETH_FLAG_RXVLAN | ETH_FLAG_TXVLAN);
+				  ETH_FLAG_RXVLAN | ETH_FLAG_TXVLAN |
+				  ETH_FLAG_RXHASH);
 	if (rc)
 		return rc;
 
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index eebb192..56cc9a1 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -1063,8 +1063,14 @@ static int __ixgbe_notify_dca(struct device *dev, void *data)
 
 	return 0;
 }
-
 #endif /* CONFIG_IXGBE_DCA */
+
+static inline void ixgbe_rx_hash(union ixgbe_adv_rx_desc *rx_desc,
+				 struct sk_buff *skb)
+{
+	skb->rxhash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss);
+}
+
 /**
  * ixgbe_receive_skb - Send a completed packet up the stack
  * @adapter: board private structure
@@ -1456,6 +1462,8 @@ static void ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
 		}
 
 		ixgbe_rx_checksum(adapter, rx_desc, skb);
+		if (adapter->netdev->features & NETIF_F_RXHASH)
+			ixgbe_rx_hash(rx_desc, skb);
 
 		/* probably a little skewed due to removing CRC */
 		total_rx_bytes += skb->len;
@@ -7361,6 +7369,7 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev,
 	netdev->features |= NETIF_F_TSO;
 	netdev->features |= NETIF_F_TSO6;
 	netdev->features |= NETIF_F_GRO;
+	netdev->features |= NETIF_F_RXHASH;
 
 	switch (adapter->hw.mac.type) {
 	case ixgbe_mac_82599EB:
@@ -7441,6 +7450,9 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev,
 	if (err)
 		goto err_sw_init;
 
+	if (!(adapter->flags & IXGBE_FLAG_RSS_ENABLED))
+		netdev->features &= ~NETIF_F_RXHASH;
+
 	switch (pdev->device) {
 	case IXGBE_DEV_ID_82599_SFP:
 		/* Only this subdevice supports WOL */
-- 
1.7.4.4


^ permalink raw reply related

* [net-next-2.6 5/5] ixgbe: add ethtool counters for OS2BMC
From: Jeff Kirsher @ 2011-05-07 10:25 UTC (permalink / raw)
  To: davem; +Cc: Emil Tantilov, netdev, gospo, bphilips, Jeff Kirsher
In-Reply-To: <1304763923-6839-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Emil Tantilov <emil.s.tantilov@intel.com>

OS2BMC registers are available for X540.
This patch adds ethtool counters based on those registers.

Signed-off-by: Emil Tantilov <emil.s.tantilov@intel.com>
Tested-by: Evan Swanson <evan.swanson@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ixgbe/ixgbe_ethtool.c |    4 ++++
 drivers/net/ixgbe/ixgbe_main.c    |    7 ++++++-
 drivers/net/ixgbe/ixgbe_type.h    |    8 ++++++++
 3 files changed, 18 insertions(+), 1 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_ethtool.c b/drivers/net/ixgbe/ixgbe_ethtool.c
index 545b231..1fdd075 100644
--- a/drivers/net/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ixgbe/ixgbe_ethtool.c
@@ -102,6 +102,10 @@ static struct ixgbe_stats ixgbe_gstrings_stats[] = {
 	{"alloc_rx_page_failed", IXGBE_STAT(alloc_rx_page_failed)},
 	{"alloc_rx_buff_failed", IXGBE_STAT(alloc_rx_buff_failed)},
 	{"rx_no_dma_resources", IXGBE_STAT(hw_rx_no_dma_resources)},
+	{"os2bmc_rx_by_bmc", IXGBE_STAT(stats.o2bgptc)},
+	{"os2bmc_tx_by_bmc", IXGBE_STAT(stats.b2ospc)},
+	{"os2bmc_tx_by_host", IXGBE_STAT(stats.o2bspc)},
+	{"os2bmc_rx_by_host", IXGBE_STAT(stats.b2ogprc)},
 #ifdef IXGBE_FCOE
 	{"fcoe_bad_fccrc", IXGBE_STAT(stats.fccrc)},
 	{"rx_fcoe_dropped", IXGBE_STAT(stats.fcoerpdc)},
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index 56cc9a1..a3e384b 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -5912,8 +5912,13 @@ void ixgbe_update_stats(struct ixgbe_adapter *adapter)
 		hwstats->gotc += IXGBE_READ_REG(hw, IXGBE_GOTCH);
 		hwstats->tor += IXGBE_READ_REG(hw, IXGBE_TORH);
 		break;
-	case ixgbe_mac_82599EB:
 	case ixgbe_mac_X540:
+		/* OS2BMC stats are X540 only*/
+		hwstats->o2bgptc += IXGBE_READ_REG(hw, IXGBE_O2BGPTC);
+		hwstats->o2bspc += IXGBE_READ_REG(hw, IXGBE_O2BSPC);
+		hwstats->b2ospc += IXGBE_READ_REG(hw, IXGBE_B2OSPC);
+		hwstats->b2ogprc += IXGBE_READ_REG(hw, IXGBE_B2OGPRC);
+	case ixgbe_mac_82599EB:
 		hwstats->gorc += IXGBE_READ_REG(hw, IXGBE_GORCL);
 		IXGBE_READ_REG(hw, IXGBE_GORCH); /* to clear */
 		hwstats->gotc += IXGBE_READ_REG(hw, IXGBE_GOTCL);
diff --git a/drivers/net/ixgbe/ixgbe_type.h b/drivers/net/ixgbe/ixgbe_type.h
index b1d523c..70e6870 100644
--- a/drivers/net/ixgbe/ixgbe_type.h
+++ b/drivers/net/ixgbe/ixgbe_type.h
@@ -672,6 +672,10 @@
 #define IXGBE_FCOEDWRC  0x0242C /* Number of FCoE DWords Received */
 #define IXGBE_FCOEPTC   0x08784 /* Number of FCoE Packets Transmitted */
 #define IXGBE_FCOEDWTC  0x08788 /* Number of FCoE DWords Transmitted */
+#define IXGBE_O2BGPTC   0x041C4
+#define IXGBE_O2BSPC    0x087B0
+#define IXGBE_B2OSPC    0x041C0
+#define IXGBE_B2OGPRC   0x02F90
 #define IXGBE_PCRC8ECL  0x0E810
 #define IXGBE_PCRC8ECH  0x0E811
 #define IXGBE_PCRC8ECH_MASK     0x1F
@@ -2554,6 +2558,10 @@ struct ixgbe_hw_stats {
 	u64 fcoeptc;
 	u64 fcoedwrc;
 	u64 fcoedwtc;
+	u64 b2ospc;
+	u64 b2ogprc;
+	u64 o2bgptc;
+	u64 o2bspc;
 };
 
 /* forward declaration */
-- 
1.7.4.4


^ permalink raw reply related

* Scalability of interface creation and deletion
From: Alex Bligh @ 2011-05-07 11:08 UTC (permalink / raw)
  To: netdev; +Cc: Alex Bligh

I am trying to track down why interface creation slows down badly with
large numbers of interfaces (~1,000 interfaces) and why deletion is so
slow. Use case: restarting routers needs to be fast; some failover methods
require interface up/down; some routers need lots of interfaces.

I have written a small shell script to create and delete a number of
interfaces supplied on the command line (script appended below). It
is important to run this with udev, udev-bridge etc. disabled. In
my environment
(Ubuntu 2.6.32-28-generic, Lucid). I did this by
 * service upstart-udev-bridge stop
 * service udev stop
 * unshare -n bash
If you don't do this, you are simply timing your distro's interface
scripts.

Note the "-n" parameter creates the supplied number of veth pair
interfaces. As these are pairs, there are twice as many interfaces actually
created.

So, the results which are pretty repeatable are as follows:

                            100 pairs      500 pairs
Interface creation               14ms          110ms
Interface deletion              160ms          148ms

Now I don't think interface deletion has in fact got faster: simply
the overhead of loading the script is spread over more processes.
But there are two obvious conclusions:

1. Interface creation slows down hugely with more interfaces
2. Interface deletion is normally much slower than interface creation

strace -T -ttt on the "ip" command used to do this does not show the delay
where I thought it would be - cataloguing the existing interfaces. Instead,
it's the final send() to the netlink socket which does the relevant action
which appears to be slow, for both addition and detion. Adding the last
interface takes 200ms in that syscall, the first is quick (symptomatic of a
slowdown); for deletion the last send syscall is quick.

Poking about in net/core/dev.c, I see that interface names are hashed using
a hash with a maximum of 256 entries. However, these seem to be hash
buckets supporting multiple entries so I can't imagine a chain of 4 entries
is problematic.

I am having difficulty seeing what might be the issue in interface
creation. Any ideas?

In interface deletion, my attention is drawn to netdev_wait_allrefs,
which does this:
        refcnt = netdev_refcnt_read(dev);

        while (refcnt != 0) {
                ...
                msleep(250);

                refcnt = netdev_refcnt_read(dev);
		....
        }

I am guessing that this is going to do the msleep 50% of the time,
explaining 125ms of the observed time. How would people react to
exponential backoff instead (untested):

	int backoff = 10;
        refcnt = netdev_refcnt_read(dev);

        while (refcnt != 0) {
                ...
                msleep(backoff);
                if ((backoff *= 2) > 250)
                  backoff = 250;
		
                refcnt = netdev_refcnt_read(dev);
		....
        }


-- 
Alex Bligh



#!/bin/bash

# Usage:
#   ifaceseq [options]
#
# Options:
#   -n NUM : use NUM interfaces
#   -t TYPE : use TYPE of interfaces (supported: veth, vlan)

numifs=10
itype=veth

while getopts n:t: flag; do
    case ${flag} in
	n) numifs=${OPTARG} ;;
	t) itype=${OPTARG} ;;
    esac
done

shift $((OPTIND-1))

createifs ()
{
    echo `date` creating $numifs interfaces
    case ${itype} in
	vlan)
	    for i in `seq 1 $numifs` ; do
		ip link add link eth0 name vlan${i} type vlan id ${i}
	    done
	    ;;
	*)
	    for i in `seq 1 $numifs` ; do
		ip link add testa${i} type veth peer name testb${i}
	    done
    esac
    echo `date` done
}

deleteifs ()
{
    echo `date` deleting $numifs interfaces
    case ${itype} in
	vlan)
	    for i in `seq 1 $numifs` ; do
		ip link delete dev vlan${i}
	    done
	    ;;
	*)
	    for i in `seq 1 $numifs` ; do
		ip link delete testa${i}
	    done
    esac
    echo `date` done
}

time createifs;
time deleteifs;




^ permalink raw reply

* [PATCH] net: bonding: factor out rlock(bond->lock) in xmit path
From: Michał Mirosław @ 2011-05-07 11:48 UTC (permalink / raw)
  To: netdev; +Cc: Jay Vosburgh, Andy Gospodarek

Pull read_lock(&bond->lock) and BOND_IS_OK() to bond_start_xmit() from
mode-dependent xmit functions.

netif_running() is always true in hard_start_xmit.

Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
---
 drivers/net/bonding/bond_3ad.c  |   10 +-----
 drivers/net/bonding/bond_alb.c  |   11 +-----
 drivers/net/bonding/bond_main.c |   74 +++++++++++++++++----------------------
 3 files changed, 35 insertions(+), 60 deletions(-)

diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index d4160f8..c7537abc 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -2403,14 +2403,6 @@ int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev)
 	struct ad_info ad_info;
 	int res = 1;
 
-	/* make sure that the slaves list will
-	 * not change during tx
-	 */
-	read_lock(&bond->lock);
-
-	if (!BOND_IS_OK(bond))
-		goto out;
-
 	if (bond_3ad_get_active_agg_info(bond, &ad_info)) {
 		pr_debug("%s: Error: bond_3ad_get_active_agg_info failed\n",
 			 dev->name);
@@ -2464,7 +2456,7 @@ out:
 		/* no suitable interface, frame not sent */
 		dev_kfree_skb(skb);
 	}
-	read_unlock(&bond->lock);
+
 	return NETDEV_TX_OK;
 }
 
diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index 3b7b040..8f2d2e7 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -1225,16 +1225,10 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
 	skb_reset_mac_header(skb);
 	eth_data = eth_hdr(skb);
 
-	/* make sure that the curr_active_slave and the slaves list do
-	 * not change during tx
+	/* make sure that the curr_active_slave do not change during tx
 	 */
-	read_lock(&bond->lock);
 	read_lock(&bond->curr_slave_lock);
 
-	if (!BOND_IS_OK(bond)) {
-		goto out;
-	}
-
 	switch (ntohs(skb->protocol)) {
 	case ETH_P_IP: {
 		const struct iphdr *iph = ip_hdr(skb);
@@ -1334,13 +1328,12 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
 		}
 	}
 
-out:
 	if (res) {
 		/* no suitable interface, frame not sent */
 		dev_kfree_skb(skb);
 	}
 	read_unlock(&bond->curr_slave_lock);
-	read_unlock(&bond->lock);
+
 	return NETDEV_TX_OK;
 }
 
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 04a2205..1f8902e 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -3975,10 +3975,6 @@ static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *bond_dev
 	int i, slave_no, res = 1;
 	struct iphdr *iph = ip_hdr(skb);
 
-	read_lock(&bond->lock);
-
-	if (!BOND_IS_OK(bond))
-		goto out;
 	/*
 	 * Start with the curr_active_slave that joined the bond as the
 	 * default for sending IGMP traffic.  For failover purposes one
@@ -4025,7 +4021,7 @@ out:
 		/* no suitable interface, frame not sent */
 		dev_kfree_skb(skb);
 	}
-	read_unlock(&bond->lock);
+
 	return NETDEV_TX_OK;
 }
 
@@ -4039,24 +4035,18 @@ static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_d
 	struct bonding *bond = netdev_priv(bond_dev);
 	int res = 1;
 
-	read_lock(&bond->lock);
 	read_lock(&bond->curr_slave_lock);
 
-	if (!BOND_IS_OK(bond))
-		goto out;
+	if (bond->curr_active_slave)
+		res = bond_dev_queue_xmit(bond, skb,
+			bond->curr_active_slave->dev);
 
-	if (!bond->curr_active_slave)
-		goto out;
-
-	res = bond_dev_queue_xmit(bond, skb, bond->curr_active_slave->dev);
-
-out:
 	if (res)
 		/* no suitable interface, frame not sent */
 		dev_kfree_skb(skb);
 
 	read_unlock(&bond->curr_slave_lock);
-	read_unlock(&bond->lock);
+
 	return NETDEV_TX_OK;
 }
 
@@ -4073,11 +4063,6 @@ static int bond_xmit_xor(struct sk_buff *skb, struct net_device *bond_dev)
 	int i;
 	int res = 1;
 
-	read_lock(&bond->lock);
-
-	if (!BOND_IS_OK(bond))
-		goto out;
-
 	slave_no = bond->xmit_hash_policy(skb, bond->slave_cnt);
 
 	bond_for_each_slave(bond, slave, i) {
@@ -4097,12 +4082,11 @@ static int bond_xmit_xor(struct sk_buff *skb, struct net_device *bond_dev)
 		}
 	}
 
-out:
 	if (res) {
 		/* no suitable interface, frame not sent */
 		dev_kfree_skb(skb);
 	}
-	read_unlock(&bond->lock);
+
 	return NETDEV_TX_OK;
 }
 
@@ -4117,11 +4101,6 @@ static int bond_xmit_broadcast(struct sk_buff *skb, struct net_device *bond_dev)
 	int i;
 	int res = 1;
 
-	read_lock(&bond->lock);
-
-	if (!BOND_IS_OK(bond))
-		goto out;
-
 	read_lock(&bond->curr_slave_lock);
 	start_at = bond->curr_active_slave;
 	read_unlock(&bond->curr_slave_lock);
@@ -4160,7 +4139,6 @@ out:
 		dev_kfree_skb(skb);
 
 	/* frame sent to all suitable interfaces */
-	read_unlock(&bond->lock);
 	return NETDEV_TX_OK;
 }
 
@@ -4192,10 +4170,8 @@ static inline int bond_slave_override(struct bonding *bond,
 	struct slave *slave = NULL;
 	struct slave *check_slave;
 
-	read_lock(&bond->lock);
-
-	if (!BOND_IS_OK(bond) || !skb->queue_mapping)
-		goto out;
+	if (!skb->queue_mapping)
+		return 1;
 
 	/* Find out if any slaves have the same mapping as this skb. */
 	bond_for_each_slave(bond, check_slave, i) {
@@ -4211,8 +4187,6 @@ static inline int bond_slave_override(struct bonding *bond,
 		res = bond_dev_queue_xmit(bond, skb, slave->dev);
 	}
 
-out:
-	read_unlock(&bond->lock);
 	return res;
 }
 
@@ -4234,17 +4208,10 @@ static u16 bond_select_queue(struct net_device *dev, struct sk_buff *skb)
 	return txq;
 }
 
-static netdev_tx_t bond_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t __bond_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct bonding *bond = netdev_priv(dev);
 
-	/*
-	 * If we risk deadlock from transmitting this in the
-	 * netpoll path, tell netpoll to queue the frame for later tx
-	 */
-	if (is_netpoll_tx_blocked(dev))
-		return NETDEV_TX_BUSY;
-
 	if (TX_QUEUE_OVERRIDE(bond->params.mode)) {
 		if (!bond_slave_override(bond, skb))
 			return NETDEV_TX_OK;
@@ -4274,6 +4241,29 @@ static netdev_tx_t bond_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	}
 }
 
+static netdev_tx_t bond_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct bonding *bond = netdev_priv(dev);
+	netdev_tx_t ret = NETDEV_TX_OK;
+
+	/*
+	 * If we risk deadlock from transmitting this in the
+	 * netpoll path, tell netpoll to queue the frame for later tx
+	 */
+	if (is_netpoll_tx_blocked(dev))
+		return NETDEV_TX_BUSY;
+
+	read_lock(&bond->lock);
+
+	if (bond->slave_cnt)
+		ret = __bond_start_xmit(skb, dev);
+	else
+		dev_kfree_skb(skb);
+
+	read_unlock(&bond->lock);
+
+	return ret;
+}
 
 /*
  * set bond mode specific net device operations
-- 
1.7.2.5


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox