Netdev List
 help / color / mirror / Atom feed
* [PATCH 2/9] drivers: OLPC: update various drivers to include olpc-ec.h
From: Andres Salomon @ 2012-07-19  1:26 UTC (permalink / raw)
  To: Andrew Morton
  Cc: devel, Daniel Drake, libertas-dev, Dan Williams, netdev,
	Jon Nettleton, x86, linux-wireless, linux-kernel,
	platform-driver-x86, Richard A. Smith, Paul Fox,
	Greg Kroah-Hartman, Anton Vorontsov, H. Peter Anvin,
	Thomas Gleixner, Chris Ball, David Woodhouse, Ingo Molnar,
	John W. Linville, Matthew Garrett
In-Reply-To: <20120718182144.2d7b0b50@dev.queued.net>

Switch over to using olpc-ec.h in multiple steps, so as not to break builds.
This covers every driver that calls olpc_ec_cmd().

Signed-off-by: Andres Salomon <dilinger@queued.net>
---
 arch/x86/include/asm/olpc.h            |    1 -
 arch/x86/platform/olpc/olpc-xo1-pm.c   |    1 +
 arch/x86/platform/olpc/olpc-xo1-sci.c  |    1 +
 arch/x86/platform/olpc/olpc-xo15-sci.c |    1 +
 arch/x86/platform/olpc/olpc.c          |    1 +
 drivers/net/wireless/libertas/if_usb.c |    1 +
 drivers/platform/x86/xo1-rfkill.c      |    3 +--
 drivers/power/olpc_battery.c           |    1 +
 drivers/staging/olpc_dcon/olpc_dcon.c  |    1 +
 9 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/olpc.h b/arch/x86/include/asm/olpc.h
index 513e999..5b28f3e 100644
--- a/arch/x86/include/asm/olpc.h
+++ b/arch/x86/include/asm/olpc.h
@@ -4,7 +4,6 @@
 #define _ASM_X86_OLPC_H
 
 #include <asm/geode.h>
-#include <linux/olpc-ec.h>
 
 struct olpc_platform_t {
 	int flags;
diff --git a/arch/x86/platform/olpc/olpc-xo1-pm.c b/arch/x86/platform/olpc/olpc-xo1-pm.c
index 0ce8616c..8054b64 100644
--- a/arch/x86/platform/olpc/olpc-xo1-pm.c
+++ b/arch/x86/platform/olpc/olpc-xo1-pm.c
@@ -18,6 +18,7 @@
 #include <linux/pm.h>
 #include <linux/mfd/core.h>
 #include <linux/suspend.h>
+#include <linux/olpc-ec.h>
 
 #include <asm/io.h>
 #include <asm/olpc.h>
diff --git a/arch/x86/platform/olpc/olpc-xo1-sci.c b/arch/x86/platform/olpc/olpc-xo1-sci.c
index 04b8c73..63d4aa4 100644
--- a/arch/x86/platform/olpc/olpc-xo1-sci.c
+++ b/arch/x86/platform/olpc/olpc-xo1-sci.c
@@ -23,6 +23,7 @@
 #include <linux/power_supply.h>
 #include <linux/suspend.h>
 #include <linux/workqueue.h>
+#include <linux/olpc-ec.h>
 
 #include <asm/io.h>
 #include <asm/msr.h>
diff --git a/arch/x86/platform/olpc/olpc-xo15-sci.c b/arch/x86/platform/olpc/olpc-xo15-sci.c
index 23e5b9d..9899fef 100644
--- a/arch/x86/platform/olpc/olpc-xo15-sci.c
+++ b/arch/x86/platform/olpc/olpc-xo15-sci.c
@@ -13,6 +13,7 @@
 #include <linux/slab.h>
 #include <linux/workqueue.h>
 #include <linux/power_supply.h>
+#include <linux/olpc-ec.h>
 
 #include <acpi/acpi_bus.h>
 #include <acpi/acpi_drivers.h>
diff --git a/arch/x86/platform/olpc/olpc.c b/arch/x86/platform/olpc/olpc.c
index 796e199..a3fa180 100644
--- a/arch/x86/platform/olpc/olpc.c
+++ b/arch/x86/platform/olpc/olpc.c
@@ -22,6 +22,7 @@
 #include <linux/syscore_ops.h>
 #include <linux/debugfs.h>
 #include <linux/mutex.h>
+#include <linux/olpc-ec.h>
 
 #include <asm/geode.h>
 #include <asm/setup.h>
diff --git a/drivers/net/wireless/libertas/if_usb.c b/drivers/net/wireless/libertas/if_usb.c
index cd3b0d4..a9828b1 100644
--- a/drivers/net/wireless/libertas/if_usb.c
+++ b/drivers/net/wireless/libertas/if_usb.c
@@ -10,6 +10,7 @@
 #include <linux/netdevice.h>
 #include <linux/slab.h>
 #include <linux/usb.h>
+#include <linux/olpc-ec.h>
 
 #ifdef CONFIG_OLPC
 #include <asm/olpc.h>
diff --git a/drivers/platform/x86/xo1-rfkill.c b/drivers/platform/x86/xo1-rfkill.c
index b57ad86..1da13ed 100644
--- a/drivers/platform/x86/xo1-rfkill.c
+++ b/drivers/platform/x86/xo1-rfkill.c
@@ -12,8 +12,7 @@
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/rfkill.h>
-
-#include <asm/olpc.h>
+#include <linux/olpc-ec.h>
 
 static bool card_blocked;
 
diff --git a/drivers/power/olpc_battery.c b/drivers/power/olpc_battery.c
index 7385092..eaf917d 100644
--- a/drivers/power/olpc_battery.c
+++ b/drivers/power/olpc_battery.c
@@ -17,6 +17,7 @@
 #include <linux/power_supply.h>
 #include <linux/jiffies.h>
 #include <linux/sched.h>
+#include <linux/olpc-ec.h>
 #include <asm/olpc.h>
 
 
diff --git a/drivers/staging/olpc_dcon/olpc_dcon.c b/drivers/staging/olpc_dcon/olpc_dcon.c
index 992275c..2c4bd74 100644
--- a/drivers/staging/olpc_dcon/olpc_dcon.c
+++ b/drivers/staging/olpc_dcon/olpc_dcon.c
@@ -27,6 +27,7 @@
 #include <linux/uaccess.h>
 #include <linux/ctype.h>
 #include <linux/reboot.h>
+#include <linux/olpc-ec.h>
 #include <asm/tsc.h>
 #include <asm/olpc.h>
 
-- 
1.7.2.5

^ permalink raw reply related

* [PATCH 1/9] Platform: OLPC: add a stub to drivers/platform/ for the OLPC EC driver
From: Andres Salomon @ 2012-07-19  1:25 UTC (permalink / raw)
  To: Andrew Morton
  Cc: devel, Daniel Drake, libertas-dev, Dan Williams, netdev,
	Jon Nettleton, x86, linux-wireless, linux-kernel,
	platform-driver-x86, Richard A. Smith, Paul Fox,
	Greg Kroah-Hartman, Anton Vorontsov, H. Peter Anvin,
	Thomas Gleixner, Chris Ball, David Woodhouse, Ingo Molnar,
	John W. Linville, Matthew Garrett
In-Reply-To: <20120718182144.2d7b0b50@dev.queued.net>

The OLPC EC driver has outgrown arch/x86/platform/.  It's time to both
share common code amongst different architectures, as well as move it out
of arch/x86/.  The XO-1.75 is ARM-based, and the EC driver shares a lot of
code with the x86 code.

Signed-off-by: Andres Salomon <dilinger@queued.net>
---
 arch/x86/include/asm/olpc.h     |   19 +++----------------
 arch/x86/platform/olpc/olpc.c   |    4 ++--
 drivers/platform/Makefile       |    1 +
 drivers/platform/olpc/olpc-ec.c |   16 ++++++++++++++++
 include/linux/olpc-ec.h         |   29 +++++++++++++++++++++++++++++
 5 files changed, 51 insertions(+), 18 deletions(-)
 create mode 100644 drivers/platform/olpc/olpc-ec.c
 create mode 100644 include/linux/olpc-ec.h

diff --git a/arch/x86/include/asm/olpc.h b/arch/x86/include/asm/olpc.h
index 87bdbca..513e999 100644
--- a/arch/x86/include/asm/olpc.h
+++ b/arch/x86/include/asm/olpc.h
@@ -4,6 +4,7 @@
 #define _ASM_X86_OLPC_H
 
 #include <asm/geode.h>
+#include <linux/olpc-ec.h>
 
 struct olpc_platform_t {
 	int flags;
@@ -102,22 +103,8 @@ extern int pci_olpc_init(void);
 
 /* EC related functions */
 
-extern int olpc_ec_cmd(unsigned char cmd, unsigned char *inbuf, size_t inlen,
-		unsigned char *outbuf, size_t outlen);
-
-/* EC commands */
-
-#define EC_FIRMWARE_REV			0x08
-#define EC_WRITE_SCI_MASK		0x1b
-#define EC_WAKE_UP_WLAN			0x24
-#define EC_WLAN_LEAVE_RESET		0x25
-#define EC_READ_EB_MODE			0x2a
-#define EC_SET_SCI_INHIBIT		0x32
-#define EC_SET_SCI_INHIBIT_RELEASE	0x34
-#define EC_WLAN_ENTER_RESET		0x35
-#define EC_WRITE_EXT_SCI_MASK		0x38
-#define EC_SCI_QUERY			0x84
-#define EC_EXT_SCI_QUERY		0x85
+extern int olpc_ec_cmd_x86(unsigned char cmd, unsigned char *inbuf,
+		size_t inlen, unsigned char *outbuf, size_t outlen);
 
 /* SCI source values */
 
diff --git a/arch/x86/platform/olpc/olpc.c b/arch/x86/platform/olpc/olpc.c
index a4bee53..796e199 100644
--- a/arch/x86/platform/olpc/olpc.c
+++ b/arch/x86/platform/olpc/olpc.c
@@ -125,7 +125,7 @@ static int __wait_on_obf(unsigned int line, unsigned int port, int desired)
  * <http://wiki.laptop.org/go/Ec_specification>.  Unfortunately, while
  * OpenFirmware's source is available, the EC's is not.
  */
-int olpc_ec_cmd(unsigned char cmd, unsigned char *inbuf, size_t inlen,
+int olpc_ec_cmd_x86(unsigned char cmd, unsigned char *inbuf, size_t inlen,
 		unsigned char *outbuf,  size_t outlen)
 {
 	unsigned long flags;
@@ -201,7 +201,7 @@ err:
 	spin_unlock_irqrestore(&ec_lock, flags);
 	return ret;
 }
-EXPORT_SYMBOL_GPL(olpc_ec_cmd);
+EXPORT_SYMBOL_GPL(olpc_ec_cmd_x86);
 
 void olpc_ec_wakeup_set(u16 value)
 {
diff --git a/drivers/platform/Makefile b/drivers/platform/Makefile
index 782953a..b17c16c 100644
--- a/drivers/platform/Makefile
+++ b/drivers/platform/Makefile
@@ -3,3 +3,4 @@
 #
 
 obj-$(CONFIG_X86)		+= x86/
+obj-$(CONFIG_OLPC)		+= olpc/
diff --git a/drivers/platform/olpc/olpc-ec.c b/drivers/platform/olpc/olpc-ec.c
new file mode 100644
index 0000000..4202603
--- /dev/null
+++ b/drivers/platform/olpc/olpc-ec.c
@@ -0,0 +1,16 @@
+/*
+ * Generic driver for the OLPC Embedded Controller.
+ *
+ * Copyright (C) 2011-2012 One Laptop per Child Foundation.
+ *
+ * Licensed under the GPL v2 or later.
+ */
+#include <linux/module.h>
+#include <asm/olpc.h>
+
+int olpc_ec_cmd(u8 cmd, u8 *inbuf, size_t inlen, u8 *outbuf, size_t outlen)
+{
+	/* Currently a stub; this will be expanded upon later. */
+	return olpc_ec_cmd_x86(cmd, inbuf, inlen, outbuf, outlen);
+}
+EXPORT_SYMBOL_GPL(olpc_ec_cmd);
diff --git a/include/linux/olpc-ec.h b/include/linux/olpc-ec.h
new file mode 100644
index 0000000..6d4e426
--- /dev/null
+++ b/include/linux/olpc-ec.h
@@ -0,0 +1,29 @@
+#ifndef _LINUX_OLPC_EC_H
+#define _LINUX_OLPC_EC_H
+
+/* XO-1 EC commands */
+#define EC_FIRMWARE_REV			0x08
+#define EC_WRITE_SCI_MASK		0x1b
+#define EC_WAKE_UP_WLAN			0x24
+#define EC_WLAN_LEAVE_RESET		0x25
+#define EC_READ_EB_MODE			0x2a
+#define EC_SET_SCI_INHIBIT		0x32
+#define EC_SET_SCI_INHIBIT_RELEASE	0x34
+#define EC_WLAN_ENTER_RESET		0x35
+#define EC_WRITE_EXT_SCI_MASK		0x38
+#define EC_SCI_QUERY			0x84
+#define EC_EXT_SCI_QUERY		0x85
+
+#ifdef CONFIG_OLPC
+
+extern int olpc_ec_cmd(u8 cmd, u8 *inbuf, size_t inlen, u8 *outbuf,
+		size_t outlen);
+
+#else
+
+static inline int olpc_ec_cmd(u8 cmd, u8 *inbuf, size_t inlen, u8 *outbuf,
+		size_t outlen) { return -ENODEV; }
+
+#endif /* CONFIG_OLPC */
+
+#endif /* _LINUX_OLPC_EC_H */
-- 
1.7.2.5

^ permalink raw reply related

* [PATCH 0/9] OLPC: create a generic OLPC EC driver
From: Andres Salomon @ 2012-07-19  1:21 UTC (permalink / raw)
  To: Andrew Morton
  Cc: devel, Daniel Drake, libertas-dev, Dan Williams, netdev,
	Jon Nettleton, x86, linux-wireless, linux-kernel,
	platform-driver-x86, Richard A. Smith, Paul Fox,
	Greg Kroah-Hartman, Anton Vorontsov, H. Peter Anvin,
	Thomas Gleixner, Chris Ball, David Woodhouse, Ingo Molnar,
	John W. Linville, Matthew Garrett

The OLPC EC (Embedded Controller) code that is currently upstream is
x86-only, originally written for the XO-1.  Since then, we've had the
XO-1.5 (also x86), and XO-1.75 (arm-based) enter mass production.  The
1.75 uses a vastly different EC protocol, and future hardware revisions
are likely to change it even further.

However, the drivers do share quite a bit of code, so it makes sense to
have a platform-agnostic driver that calls into platform-specific hooks
for each XO's EC driver.  This is the first stab and creating such a
beast (with further patches pending).  Aside from the lack of code
duplication, this is helpful for fixing bugs in one place (for example,
we fixed an EC suspend/resume bug in 1.75 that I've just seen happen on
1.5 without these patches.  With these patches, the problem goes away).

These patches are against Linus's current HEAD; let me know if they
don't apply somewhere, and I'll happily redo them against the -next
tree.  I'm assuming that these changes (which touch places like x86,
wireless, and staging) should go through either the x86 tree, or
through akpm's tree.

Alternatively, if the reviews are positive and I can get SOBs from the
relevant maintainers, I can set up a platform-olpc tree somewhere and
request a pull from Linus.

^ permalink raw reply

* [PATCH] Crash in tun
From: Mikulas Patocka @ 2012-07-19  1:12 UTC (permalink / raw)
  To: Maxim Krasnyansky, vtun; +Cc: netdev, davem

Hi

I want to report a crash when using the tun driver. The crash can be 
reproduced by starting and stopping miredo in the loop:

while ! dmesg | grep -q BUG; do
	/etc/init.d/miredo start;/etc/init.d/miredo stop
done

The crash happens in iput_final because inode->i_sb->s_op is NULL.

The crash happens in 3.4, 3.4.5, 3.5-rc7. The crash does not happen in 
3.3. When I attempted to bisect, unrelated changes in completely different 
driver regarding memory allocation triggered the crash --- so it is 
likely buggy even in 3.3 and before, it just didn't show up.


What is obviously wrong:
in the tun driver "struct socket" is embedded in "struct tun_struct".

The backtrace goes through:
netdev_run_todo -> tun_free_netdev -> sk_release_kernel -> sock_release.

sock_release calls iput(SOCK_INODE(sock)). SOCK_INODE assumes that struct 
socket is embedded in "struct socket_alloc" (which is not true, it is 
embedded in "struct tun_struct"), gets a pointer to non-existing inode --- 
and there goes the crash.


The crash can be fixed by writing any non-NULL value to tun->socket.file 
to prevent sock_release from calling iput(SOCK_INODE(sock)). Or maybe you 
come up with a better fix.


Note another bug - when you are repeatedly starting and stopping miredo, 
even if it doesn't crash, the value "sockets: used" in 
/proc/*/net/sockstat keeps on decreasing. That's because sock_release 
decrements sockets_in_use, but there was no sock_alloc to increment it.


Mikulas

---

Signed-off-by: Mikulas Patocka <mikulas@artax.karlin.mff.cuni.cz>

---
 drivers/net/tun.c |    2 ++
 net/socket.c      |    4 +++-
 2 files changed, 5 insertions(+), 1 deletion(-)

Index: linux-3.4.5-fast/drivers/net/tun.c
===================================================================
--- linux-3.4.5-fast.orig/drivers/net/tun.c	2012-07-19 02:42:56.000000000 +0200
+++ linux-3.4.5-fast/drivers/net/tun.c	2012-07-19 02:50:13.000000000 +0200
@@ -358,6 +358,8 @@ static void tun_free_netdev(struct net_d
 {
 	struct tun_struct *tun = netdev_priv(dev);
 
+	/* Prevent the code in sock_release from calling iput. */
+	tun->socket.file = (void *)1;
 	sk_release_kernel(tun->socket.sk);
 }
 
Index: linux-3.4.5-fast/net/socket.c
===================================================================
--- linux-3.4.5-fast.orig/net/socket.c	2012-07-19 03:00:30.000000000 +0200
+++ linux-3.4.5-fast/net/socket.c	2012-07-19 03:05:36.000000000 +0200
@@ -522,7 +522,9 @@ void sock_release(struct socket *sock)
 	if (rcu_dereference_protected(sock->wq, 1)->fasync_list)
 		printk(KERN_ERR "sock_release: fasync list not empty!\n");
 
-	percpu_sub(sockets_in_use, 1);
+	/* a hack - sockets_in_use should not be decremented when tun calls this */
+	if (sock->file != (void *)1)
+		percpu_sub(sockets_in_use, 1);
 	if (!sock->file) {
 		iput(SOCK_INODE(sock));
 		return;
 
--- 

BUG: unable to handle kernel NULL pointer dereference at 0000000000000020
IP: [<ffffffff81113246>] iput+0x76/0x230
PGD 43e5e9067 PUD 4468d5067 PMD 0
Oops: 0000 [#1] PREEMPT SMP
CPU 1
Modules linked in: ip6table_filter ip6_tables iptable_filter ip_tables 
ebtable_nat ebtables x_tables kvm_amd kvm tun cpufreq_userspace 
cpufreq_stats cpufreq_powersave cpufreq_ondemand cpufreq_conservative ipv6 
fuse raid0 md_mod lm85 hwmon_vid snd_usb_audio snd_pcm_oss snd_mixer_oss 
snd_pcm snd_timer snd_page_alloc snd_hwdep snd_usbmidi_lib snd_rawmidi snd 
soundcore ide_cd_mod cdrom ohci_hcd sata_svw libata serverworks ide_core 
powernow_k8 ehci_hcd usbcore tg3 usb_common floppy freq_table e100 skge 
mii i2c_piix4 libphy mperf k10temp rtc_cmos processor button hwmon 
microcode unix

Pid: 10826, comm: miredo Not tainted 3.4.5 #85 empty empty/S3992-E
RIP: 0010:[<ffffffff81113246>]  [<ffffffff81113246>] iput+0x76/0x230
RSP: 0018:ffff88043dc73e48  EFLAGS: 00010246
RAX: 0000000000000001 RBX: ffff88023eba3f20 RCX: ffff880447d80000
RDX: 0000000000000001 RSI: ffff88023eba3f98 RDI: ffff88023eba3f98
RBP: ffff88023eba3f98 R08: ffffffff814804d8 R09: 0000000000000000
R10: 0000000000000096 R11: dead000000100100 R12: ffff88023eba3f48
R13: 0000000000000000 R14: ffff88043dc73e88 R15: 00000000ffffad3a
FS:  00007fe71bbb9700(0000) GS:ffff880247c80000(0000) 
knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
CR2: 0000000000000020 CR3: 000000043dcf0000 CR4: 00000000000007f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Process miredo (pid: 10826, threadinfo ffff88043dc72000, task 
ffff88043dc1b8d0) Stack:
 ffff88043dc73e88 ffff88023efef000 ffff88023eba3800 ffff88023eba3c00
 ffff88024749ecc0 ffffffff81281d93 ffff88023e8fa500 ffffffff81295438
 ffff88043dc73e88 ffff88043dc73e88 ffff88023eba38f8 ffff88023e8fa500
Call Trace:
 [<ffffffff81281d93>] ? sk_release_kernel+0x23/0x40
 [<ffffffff81295438>] ? netdev_run_todo+0x1a8/0x260
 [<ffffffffa02522a3>] ? tun_chr_close+0x93/0xb0 [tun]
 [<ffffffff810fc3ed>] ? fput+0xdd/0x260
 [<ffffffff810f8aff>] ? filp_close+0x5f/0x90
 [<ffffffff810f8bc7>] ? sys_close+0x97/0x100
 [<ffffffff81311f22>] ? system_call_fastpath+0x16/0x1b
Code: 6c 24 10 4c 8b 64 24 18 4c 8b 6c 24 20 48 83 c4 28 c3 0f 1f 00 f6 83 
90 00 00 00 08 4c 8b 63 28 4d 8b 6c 24 30 0f 85 7d 01 00 00 <49> 8b 45 20 
48 85 c0 0f 84 9d 00 00 00 48 89 df ff d0 85 c0 0f
RIP  [<ffffffff81113246>] iput+0x76/0x230
 RSP <ffff88043dc73e48>
CR2: 0000000000000020
---[ end trace 6bd5160ffd3ba7a2 ]---
note: miredo[10826] exited with preempt_count 1

^ permalink raw reply

* Re: linux-next: manual merge of the net-next tree with Linus' tree
From: Jeff Kirsher @ 2012-07-19  1:18 UTC (permalink / raw)
  To: Stephen Rothwell
  Cc: David Miller, netdev, linux-next, linux-kernel, Narendra K,
	Alexander Duyck
In-Reply-To: <20120719111510.478d62e4f0778bd0de1f4c66@canb.auug.org.au>

[-- Attachment #1: Type: text/plain, Size: 508 bytes --]

On Thu, 2012-07-19 at 11:15 +1000, Stephen Rothwell wrote:
> Hi all,
> 
> Today's linux-next merge of the net-next tree got a conflict in
> drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c between commit
> 936597631dd3 ("ixgbevf: Prevent RX/TX statistics getting reset to zero")
> from Linus' tree and various commits from the net-next tree.
> 
> I just used the version from the net tree.

I will take a look at it later tonight to verify the conflict resolution
is correct.

Thanks Stephen.

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply

* linux-next: manual merge of the net-next tree with Linus' tree
From: Stephen Rothwell @ 2012-07-19  1:15 UTC (permalink / raw)
  To: David Miller, netdev
  Cc: linux-next, linux-kernel, Narendra K, Jeff Kirsher,
	Alexander Duyck

[-- Attachment #1: Type: text/plain, Size: 396 bytes --]

Hi all,

Today's linux-next merge of the net-next tree got a conflict in
drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c between commit
936597631dd3 ("ixgbevf: Prevent RX/TX statistics getting reset to zero")
from Linus' tree and various commits from the net-next tree.

I just used the version from the net tree.
-- 
Cheers,
Stephen Rothwell                    sfr@canb.auug.org.au

[-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply

* Re: [PATCH v4] net: cgroup: fix access the unallocated memory in netprio cgroup
From: Li Zefan @ 2012-07-19  1:00 UTC (permalink / raw)
  To: Neil Horman
  Cc: John Fastabend, Gao feng, eric.dumazet, linux-kernel, netdev,
	davem, Eric Dumazet, Rustad, Mark D
In-Reply-To: <20120718142632.GF25563@hmsreliant.think-freely.org>

>>>>>  static struct cgroup_subsys_state *cgrp_create(struct cgroup *cgrp)

>>>>>  {
>>>>>  	struct cgroup_netprio_state *cs;
>>>>> -	int ret;
>>>>> +	int ret = -EINVAL;
>>>>>
>>>>>  	cs = kzalloc(sizeof(*cs), GFP_KERNEL);
>>>>>  	if (!cs)
>>>>>  		return ERR_PTR(-ENOMEM);
>>>>>
>>>>> -	if (cgrp->parent && cgrp_netprio_state(cgrp->parent)->prioidx) {
>>>>> -		kfree(cs);
>>>>> -		return ERR_PTR(-EINVAL);
>>>>> -	}
>>>>> +	if (cgrp->parent && cgrp_netprio_state(cgrp->parent)->prioidx)
>>>>> +		goto out;
>>>>>
>>>>>  	ret = get_prioidx(&cs->prioidx);
>>>>> -	if (ret != 0) {
>>>>> +	if (ret < 0) {
>>>>>  		pr_warn("No space in priority index array\n");
>>>>> -		kfree(cs);
>>>>> -		return ERR_PTR(ret);
>>>>> +		goto out;
>>>>> +	}
>>>>> +
>>>>> +	ret = update_netdev_tables();
>>>>> +	if (ret < 0) {
>>>>> +		put_prioidx(cs->prioidx);
>>>>> +		goto out;
>>>>>  	}
>>>>
>>>> Gao,
>>>>
>>>> This introduces a null ptr dereference when netprio_cgroup is built
>>>> into the kernel because update_netdev_tables() depends on init_net.
>>>> However cgrp_create is being called by cgroup_init before
>>>> do_initcalls() is called and before net_dev_init().
>>>>
>>>> .John
>>>>
>>> Not sure I follow here John.  Shouldn't init_net be initialized prior to any
>>> network devices getting registered?  In other words, shouldn't for_each_netdev
>>> just result in zero iterations through the loop?
>>> Neil
>>>
>>
>> init_net _is_ initialized prior to any network devices getting
>> registered but not before cgrp_create called via cgroup_init.
>>
>> #define for_each_netdev(net, d)         \
>>                 list_for_each_entry(d, &(net)->dev_base_head, dev_list)
>>
>> but dev_base_head is zeroed at this time. In netdev_init we have,
>>
>>         INIT_LIST_HEAD(&net->dev_base_head);
>>
>> but we haven't got that far yet because cgroup_init is called
>> before do_initcalls().
>>
> ok, I see that, and it makes sense, but at this point I'm more concerned with
> cgroups getting initalized twice.  The early_init flag is clear in the
> cgroup_subsystem for netprio, so we really shouldn't be getting initalized from
> cgroup_init. We should be getting initalized from the module_init() call that

> we register

If the early_init flag is set, a cgroup subsys will be initialized from
cgroup_early_init(), otherwise cgroup_init().

If netprio is built as a module, the subsys will be initailized from module_init(),
otherwise cgroup_init() (in this case cgroup_load_subsys() called in module_init()
is a no-op).

So it won't get initialized twice.

^ permalink raw reply

* Re: [PATCH firmware] rtl_nic: update firmware for RTL8168G
From: Ben Hutchings @ 2012-07-19  0:59 UTC (permalink / raw)
  To: Hayes Wang; +Cc: dwmw2, romieu, netdev, linux-kernel
In-Reply-To: <1342008603-1279-1-git-send-email-hayeswang@realtek.com>

[-- Attachment #1: Type: text/plain, Size: 565 bytes --]

On Wed, 2012-07-11 at 20:10 +0800, Hayes Wang wrote:
> File: rtl_nic/rtl8168g-1.fw
> Version: 0.0.2
> 
> Change the ocp_base of linux driver to OCP_STD_PHY_BASE after setting
> firmware. The firmware would modify the ocp_base, and that results the
> driver uses the wrong ocp_base to access standard phy after setting
> firmware.
> 
> Signed-off-by: Hayes Wang <hayeswang@realtek.com>
[...]

Applied, thanks.

Ben.

-- 
Ben Hutchings
DNRC Motto:  I can please only one person per day.
Today is not your day.  Tomorrow isn't looking good either.

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 828 bytes --]

^ permalink raw reply

* Re: [PATCH] SUNRPC: Prevent kernel stack corruption on long values of flush
From: Jim Rees @ 2012-07-18 23:04 UTC (permalink / raw)
  To: Sasha Levin
  Cc: J. Bruce Fields, Trond.Myklebust-HgOvQuBEEgTQT0dZR+AlfA,
	davem-fT/PcQaiUtIeIZ0/mPfg9Q, davej-H+wXaHxf7aLQT0dZR+AlfA,
	linux-nfs-u79uwXL29TY76Z2rM5mHXA, netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <50072DEE.2000205-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>

Sasha Levin wrote:

  > Learning from what happened in this specific case, there are actually 2 issues here:
  > 
  >  - Array size was constant and too small, which is solved by the patch above.
  >  - We were blindly trying to sprintf() into that array, this issue may pop back up if someone decides to change the format string forgetting to modify the array declaration.
  > 

The original patch changed the sprintf to snprintf, and that still seems
like a good idea.
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* [PATCHv1] net: stmmac: Add ip version to dts bindings
From: dinguyen @ 2012-07-18 23:28 UTC (permalink / raw)
  To: netdev
  Cc: dinh.linux, peppe.cavallaro, sr, shiraz.hashim, deepak.sikri,
	pavel, arnd, Dinh Nguyen

From: Dinh Nguyen <dinguyen@altera.com>

Because there are multiple variants to the stmmac/dwmac driver, the
dts bindings should be updated to include version of the IP used.

Signed-off-by: Dinh Nguyen <dinguyen@altera.com>
---
 Documentation/devicetree/bindings/net/stmmac.txt   |    3 ++-
 .../net/ethernet/stmicro/stmmac/stmmac_platform.c  |    8 ++++++--
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/Documentation/devicetree/bindings/net/stmmac.txt b/Documentation/devicetree/bindings/net/stmmac.txt
index 1f62623..060bbf0 100644
--- a/Documentation/devicetree/bindings/net/stmmac.txt
+++ b/Documentation/devicetree/bindings/net/stmmac.txt
@@ -1,7 +1,8 @@
 * STMicroelectronics 10/100/1000 Ethernet driver (GMAC)
 
 Required properties:
-- compatible: Should be "st,spear600-gmac"
+- compatible: Should be "snps,dwmac-<ip_version>" "snps,dwmac"
+	For backwards compatibility: "st,spear600-gmac" is also supported.
 - reg: Address and length of the register set for the device
 - interrupt-parent: Should be the phandle for the interrupt controller
   that services interrupts for this device
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index 680d2b8..87cadcf 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -49,7 +49,9 @@ static int __devinit stmmac_probe_config_dt(struct platform_device *pdev,
 	 * are provided. All other properties should be added
 	 * once needed on other platforms.
 	 */
-	if (of_device_is_compatible(np, "st,spear600-gmac")) {
+	if (of_device_is_compatible(np, "st,spear600-gmac") ||
+		of_device_is_compatible(np, "snps,dwmac-3.70a") ||
+		of_device_is_compatible(np, "snps,dwmac")) {
 		plat->has_gmac = 1;
 		plat->pmt = 1;
 	}
@@ -250,7 +252,9 @@ static const struct dev_pm_ops stmmac_pltfr_pm_ops;
 #endif /* CONFIG_PM */
 
 static const struct of_device_id stmmac_dt_ids[] = {
-	{ .compatible = "st,spear600-gmac", },
+	{ .compatible = "st,spear600-gmac"},
+	{ .compatible = "snps,dwmac-3.70a"},
+	{ .compatible = "snps,dwmac"},
 	{ /* sentinel */ }
 };
 MODULE_DEVICE_TABLE(of, stmmac_dt_ids);
-- 
1.7.9.5

^ permalink raw reply related

* Re: [RFC] r8169 : why SG / TX checksum are default disabled
From: David Miller @ 2012-07-18 22:24 UTC (permalink / raw)
  To: eric.dumazet; +Cc: romieu, hayeswang, netdev
In-Reply-To: <1342649136.2626.3757.camel@edumazet-glaptop>

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 19 Jul 2012 00:05:36 +0200

> On Wed, 2012-07-18 at 23:44 +0200, Francois Romieu wrote:
>> David Miller <davem@davemloft.net> :
>> [...]
>> > A NETDEV_TX_OK return means we accepted the SKB, it doesn't look like
>> > that's what you are doing in the skb_padto() failure path.
>> 
>> ?
>> 
>> - skb_padto fails
>>   (original skb is implicitely freed)
>> - skb_padto returns error status (!= 0)
>> - rtl8169_tso_csum returns false
>> - start_xmit returns NETDEV_TX_OK. 
>> 
>> I'll search the missing "!" after some sleep if that's what you are talking
>> about. Otherwise than that, I don't get it.
>> 
> 
> 
> Yes, I believe your patch is fine.
> 
> In fact many drivers dont account the error in their stats.

My bad, I forgot that skb_padto() frees the SKB on failure.

^ permalink raw reply

* [RFC PATCH] net: Add support for virtual machine device queues (VMDQ)
From: John Fastabend @ 2012-07-18 22:05 UTC (permalink / raw)
  To: or.gerlitz, davem; +Cc: roland, netdev, ali, sean.hefty, shlomop

This adds support to allow virtual net devices to be created. These
devices can be managed independtly of the physical function but
use the same physical link.

This is analagous to an offloaded macvlan device. The primary
advantage to VMDQ net devices over virtual functions is they can
be added and removed dynamically as needed.

Sending this for Or Gerlitz to take a peak at and see if this
could be used for his ipoib bits. Its not pretty as is and
likely needs some work its just an idea at this point use at
your own risk I believe it compiles.
---

 drivers/net/Kconfig       |    7 ++
 drivers/net/Makefile      |    1 
 drivers/net/vmdq.c        |  130 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/netdevice.h |    6 ++
 include/net/rtnetlink.h   |    2 +
 net/core/rtnetlink.c      |   10 +++
 6 files changed, 155 insertions(+), 1 deletions(-)
 create mode 100644 drivers/net/vmdq.c

diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 0c2bd80..f28d951 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -337,6 +337,13 @@ config VMXNET3
 	  To compile this driver as a module, choose M here: the
 	  module will be called vmxnet3.
 
+config VMDQ 
+	tristate "Support Embedded bridge devices and child devices"
+	help
+	  This supports chipsets with embedded switching components and
+	  allows us to create more net_devices that are logically slaves
+	  of a master net device.
+
 source "drivers/net/hyperv/Kconfig"
 
 endif # NETDEVICES
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index 3d375ca..1eb5605 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -21,6 +21,7 @@ obj-$(CONFIG_NET_TEAM) += team/
 obj-$(CONFIG_TUN) += tun.o
 obj-$(CONFIG_VETH) += veth.o
 obj-$(CONFIG_VIRTIO_NET) += virtio_net.o
+obj-$(CONFIG_VMDQ) += vmdq.o
 
 #
 # Networking Drivers
diff --git a/drivers/net/vmdq.c b/drivers/net/vmdq.c
new file mode 100644
index 0000000..9acc429
--- /dev/null
+++ b/drivers/net/vmdq.c
@@ -0,0 +1,130 @@
+/*******************************************************************************
+
+  vmdq - Support virtual machine device queues (VMDQ)
+  Copyright(c) 2012 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  John Fastabend <john.r.fastabend@intel.com>
+
+*******************************************************************************/
+
+#include <linux/module.h>
+#include <net/rtnetlink.h>
+#include <linux/etherdevice.h>
+
+static int vmdq_newlink(struct net *src_net, struct net_device *dev,
+		       struct nlattr *tb[], struct nlattr *data[])
+{
+	struct net_device *lowerdev;
+	int err = -EOPNOTSUPP;
+
+	if (!tb[IFLA_LINK])
+		return -EINVAL;
+
+	lowerdev = __dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK]));
+	if (!lowerdev)
+		return -ENODEV;
+
+	if (!tb[IFLA_MTU])
+		dev->mtu = lowerdev->mtu;
+	else if (dev->mtu > lowerdev->mtu)
+		return -EINVAL;
+
+	if (lowerdev->netdev_ops->ndo_add_vmdq)
+		err = lowerdev->netdev_ops->ndo_add_vmdq(lowerdev, dev);
+
+	if (err < 0)
+		return err;
+
+	err = register_netdevice(dev);
+	if (err < 0)
+		lowerdev->netdev_ops->ndo_del_vmdq(lowerdev, dev);
+	else
+		netif_stacked_transfer_operstate(lowerdev, dev);
+
+	return err;
+}
+
+void vmdq_dellink(struct net_device *dev, struct list_head *head)
+{
+	struct net_device *lowerdev = __dev_get_by_index(dev_net(dev), dev->iflink);
+
+	if (lowerdev && lowerdev->netdev_ops->ndo_del_vmdq)
+		lowerdev->netdev_ops->ndo_del_vmdq(lowerdev, dev);		
+}
+
+static void vmdq_setup(struct net_device *dev)
+{
+	ether_setup(dev);
+}
+
+size_t vmdq_getpriv_size(struct net *src_net, struct nlattr *tb[])
+{
+	struct net_device *lowerdev;
+
+	if (!tb[IFLA_LINK])
+		return -EINVAL;
+
+	lowerdev = __dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK]));
+	if (!lowerdev)
+		return -ENODEV;
+
+	return sizeof(netdev_priv(lowerdev));
+}
+
+int vmdq_get_tx_queues(struct net *net, struct nlattr *tb[])
+{
+	struct net_device *lowerdev;
+
+	if (!tb[IFLA_LINK])
+		return -EINVAL;
+
+	lowerdev = __dev_get_by_index(net, nla_get_u32(tb[IFLA_LINK]));
+	if (!lowerdev)
+		return -ENODEV;
+
+	return lowerdev->num_tx_queues;
+}
+
+static struct rtnl_link_ops vmdq_link_ops __read_mostly = {
+	.kind		= "vmdq",
+	.setup		= vmdq_setup,
+	.newlink	= vmdq_newlink,
+	.dellink	= vmdq_dellink,
+	.get_priv_size	= vmdq_getpriv_size,
+	.get_tx_queues	= vmdq_get_tx_queues,
+};
+
+static int __init vmdq_init_module(void)
+{
+	return rtnl_link_register(&vmdq_link_ops);
+}
+
+static void __exit vmdq_cleanup_module(void)
+{
+	rtnl_link_unregister(&vmdq_link_ops);
+}
+
+module_init(vmdq_init_module);
+module_exit(vmdq_cleanup_module);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("John Fastabend <john.r.fastabend@intel.com>");
+MODULE_DESCRIPTION("Driver for embedded switch chipsets");
+MODULE_ALIAS_RTNL_LINK("vmdq");
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ab0251d..d879c4d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -972,6 +972,12 @@ struct net_device_ops {
 						   struct nlattr *port[]);
 	int			(*ndo_get_vf_port)(struct net_device *dev,
 						   int vf, struct sk_buff *skb);
+
+	int			(*ndo_add_vmdq)(struct net_device *lowerdev,
+						struct net_device *dev);
+	int			(*ndo_del_vmdq)(struct net_device *lowerdev,
+						struct net_device *dev);
+
 	int			(*ndo_setup_tc)(struct net_device *dev, u8 tc);
 #if IS_ENABLED(CONFIG_FCOE)
 	int			(*ndo_fcoe_enable)(struct net_device *dev);
diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index bbcfd09..e9f903c 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -79,6 +79,8 @@ struct rtnl_link_ops {
 					       const struct net_device *dev);
 	int			(*get_tx_queues)(struct net *net,
 						 struct nlattr *tb[]);
+	size_t			(*get_priv_size)(struct net *net,
+						 struct nlattr *tb[]);
 };
 
 extern int	__rtnl_link_register(struct rtnl_link_ops *ops);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 2b325c3..2e33b9a 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1627,6 +1627,7 @@ struct net_device *rtnl_create_link(struct net *src_net, struct net *net,
 	int err;
 	struct net_device *dev;
 	unsigned int num_queues = 1;
+	size_t priv_size = ops->priv_size;
 
 	if (ops->get_tx_queues) {
 		err = ops->get_tx_queues(src_net, tb);
@@ -1635,8 +1636,15 @@ struct net_device *rtnl_create_link(struct net *src_net, struct net *net,
 		num_queues = err;
 	}
 
+	if (ops->get_priv_size) {
+		err = ops->get_priv_size(src_net, tb);
+		if (err < 0)
+			goto err;
+		priv_size = err;
+	}
+
 	err = -ENOMEM;
-	dev = alloc_netdev_mq(ops->priv_size, ifname, ops->setup, num_queues);
+	dev = alloc_netdev_mq(priv_size, ifname, ops->setup, num_queues);
 	if (!dev)
 		goto err;
 

^ permalink raw reply related

* Re: [PATCH net-next V1 1/9] IB/ipoib: Add support for clones / multiple childs on the same partition
From: John Fastabend @ 2012-07-18 22:11 UTC (permalink / raw)
  To: David Miller, or.gerlitz; +Cc: roland, netdev, ali, sean.hefty, shlomop, erezsh
In-Reply-To: <20120718.143608.2101579052587289420.davem@davemloft.net>

On 7/18/2012 2:36 PM, David Miller wrote:
> From: Or Gerlitz <or.gerlitz@gmail.com>
> Date: Thu, 19 Jul 2012 00:24:58 +0300
>
>> On Wed, Jul 18, 2012 at 9:38 PM, David Miller <davem@davemloft.net> wrote:
>>> From: Or Gerlitz <ogerlitz@mellanox.com>
>>
>>>> All sorts of childs are still created/deleted through sysfs, in a
>>>> similar manner to the way legacy child interfaces are.
>>
>>> Network device instantiation of this type is the domain of
>>> rtnl_link_ops rather than ugly sysfs interfaces.
>>
>> Didn't add any **new** sysfs interfaces in this patch. The IPoIB sysfs
>> entries to create child devices are there from IPoIB's day one, and
>> we're only extending them a tiny bit.
>
> That's extremely unfortunate, having private ways of instantiating
> networking devices leads to an extremely poor user experience.
>
> Would you like to have to train every single user in the case
> where each and every driver author makes his own unique way
> of configuring his hardware?
> --

Or,

I've got a rough patch to use rtnl_link_ops to add what we've been
calling 'virtual machine device queues' or VMDq. This looks a lot
like macvlan with offloaded switching and I believe similar to your
child case above.

Also what is a "pkey"

I'll post it as a use at your own risk shortly although this week I'm
short on time so maybe next week I can get something more "real" out.
Been stealing cycles between other work things today.

If you want to do complete it more power to you.

.John

^ permalink raw reply

* Re: [RFC] r8169 : why SG / TX checksum are default disabled
From: Eric Dumazet @ 2012-07-18 22:05 UTC (permalink / raw)
  To: Francois Romieu; +Cc: David Miller, hayeswang, netdev
In-Reply-To: <20120718214422.GA18207@electric-eye.fr.zoreil.com>

On Wed, 2012-07-18 at 23:44 +0200, Francois Romieu wrote:
> David Miller <davem@davemloft.net> :
> [...]
> > A NETDEV_TX_OK return means we accepted the SKB, it doesn't look like
> > that's what you are doing in the skb_padto() failure path.
> 
> ?
> 
> - skb_padto fails
>   (original skb is implicitely freed)
> - skb_padto returns error status (!= 0)
> - rtl8169_tso_csum returns false
> - start_xmit returns NETDEV_TX_OK. 
> 
> I'll search the missing "!" after some sleep if that's what you are talking
> about. Otherwise than that, I don't get it.
> 


Yes, I believe your patch is fine.

In fact many drivers dont account the error in their stats.

Thanks

^ permalink raw reply

* Re: [RFC] r8169 : why SG / TX checksum are default disabled
From: Francois Romieu @ 2012-07-18 21:44 UTC (permalink / raw)
  To: David Miller; +Cc: hayeswang, eric.dumazet, netdev
In-Reply-To: <20120718.132840.1571938255177607234.davem@davemloft.net>

David Miller <davem@davemloft.net> :
[...]
> A NETDEV_TX_OK return means we accepted the SKB, it doesn't look like
> that's what you are doing in the skb_padto() failure path.

?

- skb_padto fails
  (original skb is implicitely freed)
- skb_padto returns error status (!= 0)
- rtl8169_tso_csum returns false
- start_xmit returns NETDEV_TX_OK. 

I'll search the missing "!" after some sleep if that's what you are talking
about. Otherwise than that, I don't get it.

-- 
Ueimor

^ permalink raw reply

* Re: [PATCH] net: Statically initialize init_net.dev_base_head
From: John Fastabend @ 2012-07-18 21:56 UTC (permalink / raw)
  To: David Miller; +Cc: nhorman, mark.d.rustad, netdev, gaofeng, eric.dumazet
In-Reply-To: <20120718.133243.1384365436939730847.davem@davemloft.net>

On 7/18/2012 1:32 PM, David Miller wrote:
> From: John Fastabend <john.r.fastabend@intel.com>
> Date: Wed, 18 Jul 2012 13:31:13 -0700
>
>> On 7/18/2012 1:21 PM, Neil Horman wrote:
>>> On Wed, Jul 18, 2012 at 01:20:10PM -0700, David Miller wrote:
>>>> From: Neil Horman <nhorman@tuxdriver.com>
>>>> Date: Wed, 18 Jul 2012 16:11:49 -0400
>>>>
>>>>> On Wed, Jul 18, 2012 at 12:06:07PM -0700, Mark Rustad wrote:
>>>>>> This change eliminates an initialization-order hazard most
>>>>>> recently seen when netprio_cgroup is built into the kernel.
>>>>>>
>>>>>> With thanks to Eric Dumazet for catching a bug.
>>>>>>
>>>>>> Signed-off-by: Mark Rustad <mark.d.rustad@intel.com>
>>>>    ...
>>>>> I think dave was going to take John Fastabends patch from earlier
>>>>> today, but
>>>>> this works just as well.  Long term I'm going to look into delaying
>>>>> initzlization for cgroups, as it creates a strange initialization
>>>>> state when you
>>>>> have a module_init routine registered.
>>>>
>>>> Neil, any particular preference between John's and Mark's version
>>>> of the fix?
>>>>
>>> I think they're both perfectly good.  If I had to choose I'd say
>>> Marks, just
>>> because its done by initializing data, rather than adding more code to
>>> run every
>>> time we create a cgroup.
>>>
>>> Neil
>>>
>>
>> Fine by me if we take this version instead.
>
> I think that's what I'll do, sorry for all the trouble John :)
>

No problem. For what its worth verified this resolved the netprio
issue on my system as expected.

^ permalink raw reply

* Re: [PATCH v2 2/7] net-tcp: Fast Open client - cookie cache
From: Eric Dumazet @ 2012-07-18 21:54 UTC (permalink / raw)
  To: Yuchung Cheng; +Cc: davem, hkchu, edumazet, ncardwell, sivasankar, netdev
In-Reply-To: <1342646166.2626.3692.camel@edumazet-glaptop>

On Wed, 2012-07-18 at 23:16 +0200, Eric Dumazet wrote:

> Hmm, this rcu_read_lock() in cache_set() gives a false sense of
> security ;)
> 
> I suggest using a seqlock instead ?
> 

Please find an updated version for your next submission :

 include/net/tcp.h      |    4 +++
 net/ipv4/tcp_metrics.c |   51 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 55 insertions(+)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 5aed371..e601da1 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -405,6 +405,10 @@ extern void tcp_metrics_init(void);
 extern bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst, bool paws_check);
 extern bool tcp_remember_stamp(struct sock *sk);
 extern bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw);
+extern void tcp_fastopen_cache_get(struct sock *sk, u16 *mss,
+				   struct tcp_fastopen_cookie *cookie);
+extern void tcp_fastopen_cache_set(struct sock *sk, u16 mss,
+				   struct tcp_fastopen_cookie *cookie);
 extern void tcp_fetch_timewait_stamp(struct sock *sk, struct dst_entry *dst);
 extern void tcp_disable_fack(struct tcp_sock *tp);
 extern void tcp_close(struct sock *sk, long timeout);
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 1a115b6..d02ff37 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -30,6 +30,11 @@ enum tcp_metric_index {
 	TCP_METRIC_MAX,
 };
 
+struct tcp_fastopen_metrics {
+	u16	mss;
+	struct	tcp_fastopen_cookie	cookie;
+};
+
 struct tcp_metrics_block {
 	struct tcp_metrics_block __rcu	*tcpm_next;
 	struct inetpeer_addr		tcpm_addr;
@@ -38,6 +43,7 @@ struct tcp_metrics_block {
 	u32				tcpm_ts_stamp;
 	u32				tcpm_lock;
 	u32				tcpm_vals[TCP_METRIC_MAX];
+	struct tcp_fastopen_metrics	tcpm_fastopen;
 };
 
 static bool tcp_metric_locked(struct tcp_metrics_block *tm,
@@ -118,6 +124,8 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm, struct dst_entry *dst)
 	tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING);
 	tm->tcpm_ts = 0;
 	tm->tcpm_ts_stamp = 0;
+	tm->tcpm_fastopen.mss = 0;
+	tm->tcpm_fastopen.cookie.len = 0;
 }
 
 static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst,
@@ -633,6 +641,49 @@ bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw)
 	return ret;
 }
 
+static DEFINE_SEQLOCK(fastopen_seqlock);
+
+void tcp_fastopen_cache_get(struct sock *sk, u16 *mss,
+			    struct tcp_fastopen_cookie *cookie)
+{
+	struct tcp_metrics_block *tm;
+
+	rcu_read_lock();
+	tm = tcp_get_metrics(sk, __sk_dst_get(sk), false);
+	if (tm) {
+		struct tcp_fastopen_metrics *tfom = &tm->tcpm_fastopen;
+		unsigned int seq;
+
+		do {
+			seq = read_seqbegin(&fastopen_seqlock);
+			if (tfom->mss)
+				*mss = tfom->mss;
+			*cookie = tfom->cookie;
+		} while (read_seqretry(&fastopen_seqlock, seq));
+	}
+	rcu_read_unlock();
+}
+
+
+void tcp_fastopen_cache_set(struct sock *sk, u16 mss,
+			    struct tcp_fastopen_cookie *cookie)
+{
+	struct tcp_metrics_block *tm;
+
+	rcu_read_lock();
+	tm = tcp_get_metrics(sk, __sk_dst_get(sk), true);
+	if (tm) {
+		struct tcp_fastopen_metrics *tfom = &tm->tcpm_fastopen;
+
+		write_seqlock_bh(&fastopen_seqlock);
+		tfom->mss = mss;
+		if (cookie->len > 0)
+			tfom->cookie = *cookie;
+		write_sequnlock_bh(&fastopen_seqlock);
+	}
+	rcu_read_unlock();
+}
+
 static unsigned long tcpmhash_entries;
 static int __init set_tcpmhash_entries(char *str)
 {

^ permalink raw reply related

* Re: [PATCH] SUNRPC: Prevent kernel stack corruption on long values of flush
From: Sasha Levin @ 2012-07-18 21:43 UTC (permalink / raw)
  To: J. Bruce Fields
  Cc: Jim Rees, Trond.Myklebust-HgOvQuBEEgTQT0dZR+AlfA,
	davem-fT/PcQaiUtIeIZ0/mPfg9Q, davej-H+wXaHxf7aLQT0dZR+AlfA,
	linux-nfs-u79uwXL29TY76Z2rM5mHXA, netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <50072BA7.6070205-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>

On 07/18/2012 11:33 PM, Sasha Levin wrote:
> On 07/18/2012 11:08 PM, J. Bruce Fields wrote:
>> On Wed, Jul 18, 2012 at 04:00:49PM -0400, Jim Rees wrote:
>>> J. Bruce Fields wrote:
>>>
>>>   On Tue, Jul 17, 2012 at 12:01:26AM +0200, Sasha Levin wrote:
>>>   > The buffer size in read_flush() is too small for the longest possible values
>>>   > for it. This can lead to a kernel stack corruption:
>>>   
>>>   Thanks!
>>>   
>>>   > 
>>>   > diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
>>>   > index 2afd2a8..f86d95e 100644
>>>   > --- a/net/sunrpc/cache.c
>>>   > +++ b/net/sunrpc/cache.c
>>>   > @@ -1409,11 +1409,11 @@ static ssize_t read_flush(struct file *file, char __user *buf,
>>>   >  			  size_t count, loff_t *ppos,
>>>   >  			  struct cache_detail *cd)
>>>   >  {
>>>   > -	char tbuf[20];
>>>   > +	char tbuf[22];
>>>   
>>>   I wonder how common this sort of calculation is in the kernel?  It might
>>>   provide some peace of mind to be able to write this something like
>>>   
>>>   	char tbuf[MAXLEN_BASE10_UL + 2]  /* + 2 for final "\n\0" */
>>>
>>> You could use something like:
>>>
>>>     char tbuf[sizeof (unsigned long) * 24 / 10 + 1 + 2]; /* + 2 for final "\n\0" */
>>>
>>> since there are roughly 10 bits for every 3 decimal digits.
>>
>> So we could do something like this.  OK, I'm not sure I care enough.
>>
>> --b.
>>
>> diff --git a/include/linux/string.h b/include/linux/string.h
>> index e033564..ed34180 100644
>> --- a/include/linux/string.h
>> +++ b/include/linux/string.h
>> @@ -126,6 +126,10 @@ extern void argv_free(char **argv);
>>  extern bool sysfs_streq(const char *s1, const char *s2);
>>  extern int strtobool(const char *s, bool *res);
>>  
>> +/* length of the decimal representation of an unsigned integer.  Just an
>> + * approximation, but it's right for types of size 1 to 36 bytes: */
>> +#define base10len(i) (sizeof(i) * 24 / 10 + 1)
>> +
>>  #ifdef CONFIG_BINARY_PRINTF
>>  int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args);
>>  int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf);
>> diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
>> index 2afd2a8..1dcd2b3 100644
>> --- a/net/sunrpc/cache.c
>> +++ b/net/sunrpc/cache.c
>> @@ -1409,7 +1409,7 @@ static ssize_t read_flush(struct file *file, char __user *buf,
>>  			  size_t count, loff_t *ppos,
>>  			  struct cache_detail *cd)
>>  {
>> -	char tbuf[20];
>> +	char tbuf[base10len(unsigned long) + 2];
>>  	unsigned long p = *ppos;
>>  	size_t len;
>>  
>>
> 
> Learning from what happened in this specific case, there are actually 2 issues here:
> 
>  - Array size was constant and too small, which is solved by the patch above.
>  - We were blindly trying to sprintf() into that array, this issue may pop back up if someone decides to change the format string forgetting to modify the array declaration.
> 
> What about adding the following itoa() type helper:

Fat fingers :(

Something like this (which obviously needs tons of work):

#define base10len(i) (sizeof(i) * 24 / 10 + 1)
#define itoa(x) \
({ \
        static char str[base10len(x)]; \
        sprintf(str, "%lu", (x)); \
        str; \
})


--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH net-next 0/4] net/mlx4_en: Add accelerated RFS support
From: David Miller @ 2012-07-18 21:37 UTC (permalink / raw)
  To: or.gerlitz; +Cc: roland, netdev, oren, yevgenyp, amirv
In-Reply-To: <CAJZOPZJJurMrKuYCZJNUWnSy-ZOXdPfY=t=+87JVcEP2JFxpnQ@mail.gmail.com>

From: Or Gerlitz <or.gerlitz@gmail.com>
Date: Thu, 19 Jul 2012 00:36:12 +0300

> On Thu, Jul 19, 2012 at 12:34 AM, David Miller <davem@davemloft.net> wrote:
> 
>> That's basically what I said.
> 
> sure, will follow. Any further comments that we should fix in this series?

Nope, it otherwise looked good to me.

^ permalink raw reply

* Re: [PATCH v2 7/7] net-tcp: Fast Open client - cookie-less mode
From: Eric Dumazet @ 2012-07-18 21:36 UTC (permalink / raw)
  To: Yuchung Cheng; +Cc: davem, hkchu, edumazet, ncardwell, sivasankar, netdev
In-Reply-To: <1342645307-17772-8-git-send-email-ycheng@google.com>

On Wed, 2012-07-18 at 14:01 -0700, Yuchung Cheng wrote:
> In trusted networks, e.g., intranet, data-center, the client does not
> need to use Fast Open cookie to mitigate DoS attacks. In cookie-less
> mode, sendmsg() with MSG_FASTOPEN flag will send SYN-data regardless
> of cookie availability.
> 
> Signed-off-by: Yuchung Cheng <ycheng@google.com>
> ---
>  Documentation/networking/ip-sysctl.txt |    2 ++
>  include/linux/tcp.h                    |    1 +
>  include/net/tcp.h                      |    1 +
>  net/ipv4/tcp_input.c                   |    8 ++++++--
>  net/ipv4/tcp_output.c                  |    6 +++++-
>  5 files changed, 15 insertions(+), 3 deletions(-)

Acked-by: Eric Dumazet <edumazet@google.com>

^ permalink raw reply

* Re: [PATCH net-next 0/4] net/mlx4_en: Add accelerated RFS support
From: Or Gerlitz @ 2012-07-18 21:36 UTC (permalink / raw)
  To: David Miller; +Cc: roland, netdev, oren, yevgenyp, amirv
In-Reply-To: <20120718.143456.1078493346727092667.davem@davemloft.net>

On Thu, Jul 19, 2012 at 12:34 AM, David Miller <davem@davemloft.net> wrote:

> That's basically what I said.

sure, will follow. Any further comments that we should fix in this series?

Or.

^ permalink raw reply

* Re: [PATCH net-next V1 1/9] IB/ipoib: Add support for clones / multiple childs on the same partition
From: David Miller @ 2012-07-18 21:36 UTC (permalink / raw)
  To: or.gerlitz; +Cc: roland, netdev, ali, sean.hefty, shlomop, erezsh
In-Reply-To: <CAJZOPZ+v291u5fSxJBu6q86LSpGAUgxLkhQb-ucL+g=hXba8cQ@mail.gmail.com>

From: Or Gerlitz <or.gerlitz@gmail.com>
Date: Thu, 19 Jul 2012 00:24:58 +0300

> On Wed, Jul 18, 2012 at 9:38 PM, David Miller <davem@davemloft.net> wrote:
>> From: Or Gerlitz <ogerlitz@mellanox.com>
> 
>>> All sorts of childs are still created/deleted through sysfs, in a
>>> similar manner to the way legacy child interfaces are.
> 
>> Network device instantiation of this type is the domain of
>> rtnl_link_ops rather than ugly sysfs interfaces.
> 
> Didn't add any **new** sysfs interfaces in this patch. The IPoIB sysfs
> entries to create child devices are there from IPoIB's day one, and
> we're only extending them a tiny bit.

That's extremely unfortunate, having private ways of instantiating
networking devices leads to an extremely poor user experience.

Would you like to have to train every single user in the case
where each and every driver author makes his own unique way
of configuring his hardware?

^ permalink raw reply

* Re: [PATCH v2 6/7] net-tcp: Fast Open client - detecting SYN-data drops
From: Eric Dumazet @ 2012-07-18 21:35 UTC (permalink / raw)
  To: Yuchung Cheng; +Cc: davem, hkchu, edumazet, ncardwell, sivasankar, netdev
In-Reply-To: <1342645307-17772-7-git-send-email-ycheng@google.com>

On Wed, 2012-07-18 at 14:01 -0700, Yuchung Cheng wrote:
> On paths with firewalls dropping SYN with data or experimental TCP options,
> Fast Open connections will have experience SYN timeout and bad performance.
> The solution is to track such incidents in the cookie cache and disables
> Fast Open temporarily.
> 
> Since only the original SYN includes data and/or Fast Open option, the
> SYN-ACK has some tell-tale sign (tcp_rcv_fastopen_synack()) to detect
> such drops. If a path has recurring Fast Open SYN drops, Fast Open is
> disabled for 2^(recurring_losses) minutes starting from four minutes up to
> roughly one and half day. sendmsg with MSG_FASTOPEN flag will succeed but
> it behaves as connect() then write().
> 
> Signed-off-by: Yuchung Cheng <ycheng@google.com>
...

>  void tcp_fastopen_cache_set(struct sock *sk, u16 mss,
> -			    struct tcp_fastopen_cookie *cookie)
> +			    struct tcp_fastopen_cookie *cookie, bool syn_lost)
>  {
>  	struct tcp_metrics_block *tm;
>  
> @@ -670,6 +675,11 @@ void tcp_fastopen_cache_set(struct sock *sk, u16 mss,
>  		tfom->mss = mss;
>  		if (cookie->len > 0)
>  			tfom->cookie = *cookie;
> +		if (syn_lost) {
> +			++tfom->syn_loss;
> +			tfom->last_syn_loss = jiffies;
> +		} else
> +			tfom->syn_loss = 0;
>  	}
>  	rcu_read_unlock();
>  }

Proably needs a respin after you use a seqlock, otherwise looks good to
me.

^ permalink raw reply

* Re: [PATCH net-next 0/4] net/mlx4_en: Add accelerated RFS support
From: David Miller @ 2012-07-18 21:34 UTC (permalink / raw)
  To: or.gerlitz; +Cc: roland, netdev, oren, yevgenyp, amirv
In-Reply-To: <CAJZOPZL8842H9H7404qjugCPpdhijjUhxk0Kn6BfBONCbE-tNA@mail.gmail.com>

From: Or Gerlitz <or.gerlitz@gmail.com>
Date: Thu, 19 Jul 2012 00:22:42 +0300

> On Wed, Jul 18, 2012 at 9:41 PM, David Miller <davem@davemloft.net> wrote:
> 
>> Please use CONFIG_RFS_ACCEL consistently to protect this feature
>> in your driver sources.
>>
>> Using CPU_RMAP in a few places is inconsistent, and not what other
>> drivers do.
> 
> We're indeed using CONFIG_RFS_ACCEL across the place, and only in two
> places which deal directly with rmap use CPU_RMAP. The latter is
> selected by RFS_ACCEL, but if you feel this is inconsistent, will
> change to use only one define.

That's basically what I said.

^ permalink raw reply

* Re: [PATCH v2 5/7] net-tcp: Fast Open client - sendmsg(MSG_FASTOPEN)
From: Eric Dumazet @ 2012-07-18 21:30 UTC (permalink / raw)
  To: Yuchung Cheng; +Cc: davem, hkchu, edumazet, ncardwell, sivasankar, netdev
In-Reply-To: <1342645307-17772-6-git-send-email-ycheng@google.com>

On Wed, 2012-07-18 at 14:01 -0700, Yuchung Cheng wrote:
> sendmsg() (or sendto()) with MSG_FASTOPEN is a combo of connect(2)
> and write(2). The application should replace connect() with it to
> send data in the opening SYN packet.
> 

> index 25d6322..90297db 100644
> --- a/include/linux/socket.h
> +++ b/include/linux/socket.h
> @@ -276,6 +276,7 @@ struct ucred {
>  #else
>  #define MSG_CMSG_COMPAT	0		/* We never have 32 bit fixups */
>  #endif
> +#define MSG_FASTOPEN	0x20000000 /* Send data in TCP SYN */

Could you put MSG_FASTOPEN before MSG_CMSG_CLOEXEC ?

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox