Netdev List
 help / color / mirror / Atom feed
* [v2 010/115] sysctl: remove .child from dev/hpet/
From: Lucian Adrian Grijincu @ 2011-05-08 22:38 UTC (permalink / raw)
  To: linux-kernel; +Cc: netdev, Lucian Adrian Grijincu
In-Reply-To: <1304894407-32201-1-git-send-email-lucian.grijincu@gmail.com>

Signed-off-by: Lucian Adrian Grijincu <lucian.grijincu@gmail.com>
---
 drivers/char/hpet.c |   38 ++++++++++++--------------------------
 1 files changed, 12 insertions(+), 26 deletions(-)

diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c
index 7066e80..303de7e 100644
--- a/drivers/char/hpet.c
+++ b/drivers/char/hpet.c
@@ -721,33 +721,19 @@ static int hpet_is_known(struct hpet_data *hdp)
 
 static ctl_table hpet_table[] = {
 	{
-	 .procname = "max-user-freq",
-	 .data = &hpet_max_freq,
-	 .maxlen = sizeof(int),
-	 .mode = 0644,
-	 .proc_handler = proc_dointvec,
-	 },
-	{}
+		.procname = "max-user-freq",
+		.data     = &hpet_max_freq,
+		.maxlen   = sizeof(int),
+		.mode     = 0644,
+		.proc_handler = proc_dointvec,
+	},
+	{ }
 };
 
-static ctl_table hpet_root[] = {
-	{
-	 .procname = "hpet",
-	 .maxlen = 0,
-	 .mode = 0555,
-	 .child = hpet_table,
-	 },
-	{}
-};
-
-static ctl_table dev_root[] = {
-	{
-	 .procname = "dev",
-	 .maxlen = 0,
-	 .mode = 0555,
-	 .child = hpet_root,
-	 },
-	{}
+static const struct ctl_path hpet_path[] = {
+	{ .procname = "dev" },
+	{ .procname = "hpet" },
+	{ }
 };
 
 static struct ctl_table_header *sysctl_header;
@@ -1053,7 +1039,7 @@ static int __init hpet_init(void)
 	if (result < 0)
 		return -ENODEV;
 
-	sysctl_header = register_sysctl_table(dev_root);
+	sysctl_header = register_sysctl_paths(hpet_path, hpet_table);
 
 	result = acpi_bus_register_driver(&hpet_acpi_driver);
 	if (result < 0) {
-- 
1.7.5.134.g1c08b

^ permalink raw reply related

* [v2 009/115] sysctl: remove .child from dev/cdrom/
From: Lucian Adrian Grijincu @ 2011-05-08 22:38 UTC (permalink / raw)
  To: linux-kernel; +Cc: netdev, Lucian Adrian Grijincu
In-Reply-To: <1304894407-32201-1-git-send-email-lucian.grijincu@gmail.com>

Signed-off-by: Lucian Adrian Grijincu <lucian.grijincu@gmail.com>
---
 drivers/cdrom/cdrom.c |   22 ++++------------------
 1 files changed, 4 insertions(+), 18 deletions(-)

diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index 514dd8e..9560789 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -3654,26 +3654,12 @@ static ctl_table cdrom_table[] = {
 	{ }
 };
 
-static ctl_table cdrom_cdrom_table[] = {
-	{
-		.procname	= "cdrom",
-		.maxlen		= 0,
-		.mode		= 0555,
-		.child		= cdrom_table,
-	},
+static const struct ctl_path cdrom_root_path[] = {
+	{ .procname = "dev" },
+	{ .procname = "cdrom" },
 	{ }
 };
 
-/* Make sure that /proc/sys/dev is there */
-static ctl_table cdrom_root_table[] = {
-	{
-		.procname	= "dev",
-		.maxlen		= 0,
-		.mode		= 0555,
-		.child		= cdrom_cdrom_table,
-	},
-	{ }
-};
 static struct ctl_table_header *cdrom_sysctl_header;
 
 static void cdrom_sysctl_register(void)
@@ -3683,7 +3669,7 @@ static void cdrom_sysctl_register(void)
 	if (initialized == 1)
 		return;
 
-	cdrom_sysctl_header = register_sysctl_table(cdrom_root_table);
+	cdrom_sysctl_header = register_sysctl_paths(cdrom_root_path, cdrom_table);
 
 	/* set the defaults */
 	cdrom_sysctl_settings.autoclose = autoclose;
-- 
1.7.5.134.g1c08b

^ permalink raw reply related

* [PATCH 11/15] ipv4: Use inet_csk_route_child_sock() in DCCP and TCP.
From: David Miller @ 2011-05-08 22:37 UTC (permalink / raw)
  To: netdev


Operation order is now transposed, we first create the child
socket then we try to hook up the route.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ipv4.c     |   19 ++++++++++---------
 net/ipv4/tcp_ipv4.c |   18 ++++++++++--------
 2 files changed, 20 insertions(+), 17 deletions(-)

diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 4ac1a72..46b15e9 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -396,15 +396,10 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb,
 	if (sk_acceptq_is_full(sk))
 		goto exit_overflow;
 
-	if (dst == NULL && (dst = inet_csk_route_req(sk, req)) == NULL)
-		goto exit;
-
 	newsk = dccp_create_openreq_child(sk, req, skb);
 	if (newsk == NULL)
 		goto exit_nonewsk;
 
-	sk_setup_caps(newsk, dst);
-
 	newinet		   = inet_sk(newsk);
 	ireq		   = inet_rsk(req);
 	newinet->inet_daddr	= ireq->rmt_addr;
@@ -416,12 +411,15 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb,
 	newinet->mc_ttl	   = ip_hdr(skb)->ttl;
 	newinet->inet_id   = jiffies;
 
+	if (dst == NULL && (dst = inet_csk_route_child_sock(sk, newsk, req)) == NULL)
+		goto put_and_exit;
+
+	sk_setup_caps(newsk, dst);
+
 	dccp_sync_mss(newsk, dst_mtu(dst));
 
-	if (__inet_inherit_port(sk, newsk) < 0) {
-		sock_put(newsk);
-		goto exit;
-	}
+	if (__inet_inherit_port(sk, newsk) < 0)
+		goto put_and_exit;
 	__inet_hash_nolisten(newsk, NULL);
 
 	return newsk;
@@ -433,6 +431,9 @@ exit_nonewsk:
 exit:
 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
 	return NULL;
+put_and_exit:
+	sock_put(newsk);
+	goto exit;
 }
 
 EXPORT_SYMBOL_GPL(dccp_v4_request_recv_sock);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a712171..374de3c 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1421,15 +1421,11 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 	if (sk_acceptq_is_full(sk))
 		goto exit_overflow;
 
-	if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
-		goto exit;
-
 	newsk = tcp_create_openreq_child(sk, req, skb);
 	if (!newsk)
 		goto exit_nonewsk;
 
 	newsk->sk_gso_type = SKB_GSO_TCPV4;
-	sk_setup_caps(newsk, dst);
 
 	newtp		      = tcp_sk(newsk);
 	newinet		      = inet_sk(newsk);
@@ -1447,6 +1443,11 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 		inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
 	newinet->inet_id = newtp->write_seq ^ jiffies;
 
+	if (!dst && (dst = inet_csk_route_child_sock(sk, newsk, req)) == NULL)
+		goto put_and_exit;
+
+	sk_setup_caps(newsk, dst);
+
 	tcp_mtup_init(newsk);
 	tcp_sync_mss(newsk, dst_mtu(dst));
 	newtp->advmss = dst_metric_advmss(dst);
@@ -1474,10 +1475,8 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 	}
 #endif
 
-	if (__inet_inherit_port(sk, newsk) < 0) {
-		sock_put(newsk);
-		goto exit;
-	}
+	if (__inet_inherit_port(sk, newsk) < 0)
+		goto put_and_exit;
 	__inet_hash_nolisten(newsk, NULL);
 
 	return newsk;
@@ -1489,6 +1488,9 @@ exit_nonewsk:
 exit:
 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
 	return NULL;
+put_and_exit:
+	sock_put(newsk);
+	goto exit;
 }
 EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
 
-- 
1.7.5.1


^ permalink raw reply related

* [v2 008/115] sysctl: remove .child from crypto/fips_enabled
From: Lucian Adrian Grijincu @ 2011-05-08 22:38 UTC (permalink / raw)
  To: linux-kernel; +Cc: netdev, Lucian Adrian Grijincu
In-Reply-To: <1304894407-32201-1-git-send-email-lucian.grijincu@gmail.com>

Signed-off-by: Lucian Adrian Grijincu <lucian.grijincu@gmail.com>
---
 crypto/proc.c |   12 ++++--------
 1 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/crypto/proc.c b/crypto/proc.c
index 58fef67..2ef248b 100644
--- a/crypto/proc.c
+++ b/crypto/proc.c
@@ -34,20 +34,16 @@ static struct ctl_table crypto_sysctl_table[] = {
 	{}
 };
 
-static struct ctl_table crypto_dir_table[] = {
-	{
-		.procname       = "crypto",
-		.mode           = 0555,
-		.child          = crypto_sysctl_table
-	},
-	{}
+static const struct ctl_path crypto_root_path[] = {
+	{ .procname = "crypto" },
+	{ }
 };
 
 static struct ctl_table_header *crypto_sysctls;
 
 static void crypto_proc_fips_init(void)
 {
-	crypto_sysctls = register_sysctl_table(crypto_dir_table);
+	crypto_sysctls = register_sysctl_paths(crypto_root_path, crypto_sysctl_table);
 }
 
 static void crypto_proc_fips_exit(void)
-- 
1.7.5.134.g1c08b

^ permalink raw reply related

* [PATCH 10/15] ipv4: Create inet_csk_route_child_sock().
From: David Miller @ 2011-05-08 22:37 UTC (permalink / raw)
  To: netdev


This is just like inet_csk_route_req() except that it operates after
we've created the new child socket.

In this way we can use the new socket's cork flow for proper route
key storage.

This will be used by DCCP and TCP child socket creation handling.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_connection_sock.h |    3 +++
 net/ipv4/inet_connection_sock.c    |   33 +++++++++++++++++++++++++++++++++
 2 files changed, 36 insertions(+), 0 deletions(-)

diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 6ac4e3b..4367d91 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -250,6 +250,9 @@ extern int inet_csk_get_port(struct sock *sk, unsigned short snum);
 
 extern struct dst_entry* inet_csk_route_req(struct sock *sk,
 					    const struct request_sock *req);
+extern struct dst_entry* inet_csk_route_child_sock(struct sock *sk,
+						   struct sock *newsk,
+						   const struct request_sock *req);
 
 static inline void inet_csk_reqsk_queue_add(struct sock *sk,
 					    struct request_sock *req,
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 54944da..3a2ba56 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -379,6 +379,39 @@ no_route:
 }
 EXPORT_SYMBOL_GPL(inet_csk_route_req);
 
+struct dst_entry *inet_csk_route_child_sock(struct sock *sk,
+					    struct sock *newsk,
+					    const struct request_sock *req)
+{
+	const struct inet_request_sock *ireq = inet_rsk(req);
+	struct inet_sock *newinet = inet_sk(newsk);
+	struct ip_options_rcu *opt = ireq->opt;
+	struct net *net = sock_net(sk);
+	struct flowi4 *fl4;
+	struct rtable *rt;
+
+	fl4 = &newinet->cork.fl.u.ip4;
+	flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
+			   RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
+			   sk->sk_protocol, inet_sk_flowi_flags(sk),
+			   (opt && opt->opt.srr) ? opt->opt.faddr : ireq->rmt_addr,
+			   ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport);
+	security_req_classify_flow(req, flowi4_to_flowi(fl4));
+	rt = ip_route_output_flow(net, fl4, sk);
+	if (IS_ERR(rt))
+		goto no_route;
+	if (opt && opt->opt.is_strictroute && fl4->daddr != rt->rt_gateway)
+		goto route_err;
+	return &rt->dst;
+
+route_err:
+	ip_rt_put(rt);
+no_route:
+	IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(inet_csk_route_child_sock);
+
 static inline u32 inet_synq_hash(const __be32 raddr, const __be16 rport,
 				 const u32 rnd, const u32 synq_hsize)
 {
-- 
1.7.5.1


^ permalink raw reply related

* [v2 006/115] sysctl: remove .child from kernel/vsyscall64 (x86)
From: Lucian Adrian Grijincu @ 2011-05-08 22:38 UTC (permalink / raw)
  To: linux-kernel; +Cc: netdev, Lucian Adrian Grijincu
In-Reply-To: <1304894407-32201-1-git-send-email-lucian.grijincu@gmail.com>

Signed-off-by: Lucian Adrian Grijincu <lucian.grijincu@gmail.com>
---
 arch/x86/kernel/vsyscall_64.c |   25 ++++++++++++++-----------
 1 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index dcbb28c..7d8b83d 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -234,18 +234,21 @@ static long __vsyscall(3) venosys_1(void)
 }
 
 #ifdef CONFIG_SYSCTL
-static ctl_table kernel_table2[] = {
-	{ .procname = "vsyscall64",
-	  .data = &vsyscall_gtod_data.sysctl_enabled, .maxlen = sizeof(int),
-	  .mode = 0644,
-	  .proc_handler = proc_dointvec },
-	{}
+static ctl_table vsyscall64_table[] = {
+	{
+		.procname = "vsyscall64",
+		.data     = &vsyscall_gtod_data.sysctl_enabled,
+		.maxlen   = sizeof(int),
+		.mode     = 0644,
+		.proc_handler = proc_dointvec,
+	},
+	{ }
 };
 
-static ctl_table kernel_root_table2[] = {
-	{ .procname = "kernel", .mode = 0555,
-	  .child = kernel_table2 },
-	{}
+
+static struct ctl_path kernel_root_path[] = {
+	{ .procname = "kernel" },
+	{ }
 };
 #endif
 
@@ -303,7 +306,7 @@ static int __init vsyscall_init(void)
 	BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE)));
 	BUG_ON((unsigned long) &vgetcpu != VSYSCALL_ADDR(__NR_vgetcpu));
 #ifdef CONFIG_SYSCTL
-	register_sysctl_table(kernel_root_table2);
+	register_sysctl_paths(kernel_root_path, vsyscall64_table);
 #endif
 	on_each_cpu(cpu_vsyscall_init, NULL, 1);
 	/* notifier priority > KVM */
-- 
1.7.5.134.g1c08b

^ permalink raw reply related

* [v2 005/115] sysctl: remove .child from dev/parport/PORT/devices/
From: Lucian Adrian Grijincu @ 2011-05-08 22:38 UTC (permalink / raw)
  To: linux-kernel; +Cc: netdev, Lucian Adrian Grijincu
In-Reply-To: <1304894407-32201-1-git-send-email-lucian.grijincu@gmail.com>

Signed-off-by: Lucian Adrian Grijincu <lucian.grijincu@gmail.com>
---
 drivers/parport/procfs.c |   42 ++++++++++++++++++++++++++++--------------
 1 files changed, 28 insertions(+), 14 deletions(-)

diff --git a/drivers/parport/procfs.c b/drivers/parport/procfs.c
index e55b9b6..3bb5bed 100644
--- a/drivers/parport/procfs.c
+++ b/drivers/parport/procfs.c
@@ -248,6 +248,7 @@ PARPORT_MAX_SPINTIME_VALUE;
 
 struct parport_sysctl_table {
 	struct ctl_table_header *sysctl_header;
+	struct ctl_table_header *devices_sysctl_header;
 	ctl_table vars[12];
 	ctl_table device_dir[2];
 };
@@ -291,11 +292,6 @@ static const struct parport_sysctl_table parport_sysctl_template = {
 			.mode		= 0444,
 			.proc_handler	= do_hardware_modes
 		},
-		{
-			.procname = "devices",
-			.mode = 0555,
-			.child = NULL, /* child will point to .device_dir */
-		},
 #ifdef CONFIG_PARPORT_1284
 		{
 			.procname	= "autoprobe",
@@ -378,6 +374,14 @@ int parport_proc_register(struct parport *port)
 		{ .procname = port->name },
 		{  },
 	};
+	struct ctl_path parport_port_devices_path[] = {
+		{ .procname = "dev" },
+		{ .procname = "parport" },
+		{ .procname = port->name },
+		{ .procname = "devices" },
+		{  },
+	};
+
 	struct parport_sysctl_table *t;
 	int i;
 
@@ -392,20 +396,29 @@ int parport_proc_register(struct parport *port)
 		t->vars[i].extra1 = port;
 
 	t->vars[0].data = &port->spintime;
-	
-	for (i = 0; i < 5; i++)
-		t->vars[6 + i].extra2 = &port->probe_info[i];
 
-	t->vars[5].child = t->device_dir;
-	/* vars[5].procname is the 'devices' dir entry */
+#ifdef CONFIG_PARPORT_1284
+	for (i = 0; i < 5; i++)
+		t->vars[5 + i].extra2 = &port->probe_info[i];
+#endif /* CONFIG_PARPORT_1284 */
 
 	t->sysctl_header = register_sysctl_paths(parport_port_path, t->vars);
-	if (t->sysctl_header == NULL) {
-		kfree(t);
-		t = NULL;
-	}
+	if (t->sysctl_header == NULL)
+		goto fail_register_port;
+
+	t->devices_sysctl_header = register_sysctl_paths(parport_port_devices_path,
+							 t->device_dir);
+	if (t->devices_sysctl_header == NULL)
+		goto fail_register_devices;
 	port->sysctl_table = t;
 	return 0;
+
+fail_register_devices:
+	unregister_sysctl_table(t->sysctl_header);
+fail_register_port:
+	kfree(t);
+
+	return -ENOMEM;
 }
 
 int parport_proc_unregister(struct parport *port)
@@ -413,6 +426,7 @@ int parport_proc_unregister(struct parport *port)
 	if (port->sysctl_table) {
 		struct parport_sysctl_table *t = port->sysctl_table;
 		port->sysctl_table = NULL;
+		unregister_sysctl_table(t->devices_sysctl_header);
 		unregister_sysctl_table(t->sysctl_header);
 		kfree(t);
 	}
-- 
1.7.5.134.g1c08b

^ permalink raw reply related

* [v2 004/115] sysctl: remove .child from dev/parport/PORT/
From: Lucian Adrian Grijincu @ 2011-05-08 22:38 UTC (permalink / raw)
  To: linux-kernel; +Cc: netdev, Lucian Adrian Grijincu
In-Reply-To: <1304894407-32201-1-git-send-email-lucian.grijincu@gmail.com>

Signed-off-by: Lucian Adrian Grijincu <lucian.grijincu@gmail.com>
---
 drivers/parport/procfs.c |   48 ++++++++++++++-------------------------------
 1 files changed, 15 insertions(+), 33 deletions(-)

diff --git a/drivers/parport/procfs.c b/drivers/parport/procfs.c
index 350233e..e55b9b6 100644
--- a/drivers/parport/procfs.c
+++ b/drivers/parport/procfs.c
@@ -233,13 +233,6 @@ static int do_hardware_modes (ctl_table *table, int write,
 	return copy_to_user(result, buffer, len) ? -EFAULT : 0;
 }
 
-#define PARPORT_PORT_DIR(CHILD) { .procname = NULL, .mode = 0555, .child = CHILD }
-#define PARPORT_PARPORT_DIR(CHILD) { .procname = "parport", \
-                                     .mode = 0555, .child = CHILD }
-#define PARPORT_DEV_DIR(CHILD) { .procname = "dev", .mode = 0555, .child = CHILD }
-#define PARPORT_DEVICES_ROOT_DIR  {  .procname = "devices", \
-                                    .mode = 0555, .child = NULL }
-
 static const unsigned long parport_min_timeslice_value =
 PARPORT_MIN_TIMESLICE_VALUE;
 
@@ -257,14 +250,10 @@ struct parport_sysctl_table {
 	struct ctl_table_header *sysctl_header;
 	ctl_table vars[12];
 	ctl_table device_dir[2];
-	ctl_table port_dir[2];
-	ctl_table parport_dir[2];
-	ctl_table dev_dir[2];
 };
 
 static const struct parport_sysctl_table parport_sysctl_template = {
-	.sysctl_header = NULL,
-        {
+        .vars = {
 		{
 			.procname	= "spintime",
 			.data		= NULL,
@@ -302,7 +291,11 @@ static const struct parport_sysctl_table parport_sysctl_template = {
 			.mode		= 0444,
 			.proc_handler	= do_hardware_modes
 		},
-		PARPORT_DEVICES_ROOT_DIR,
+		{
+			.procname = "devices",
+			.mode = 0555,
+			.child = NULL, /* child will point to .device_dir */
+		},
 #ifdef CONFIG_PARPORT_1284
 		{
 			.procname	= "autoprobe",
@@ -342,7 +335,7 @@ static const struct parport_sysctl_table parport_sysctl_template = {
 #endif /* IEEE 1284 support */
 		{}
 	},
-	{
+	.device_dir = {
 		{
 			.procname	= "active",
 			.data		= NULL,
@@ -352,18 +345,6 @@ static const struct parport_sysctl_table parport_sysctl_template = {
 		},
 		{}
 	},
-	{
-		PARPORT_PORT_DIR(NULL),
-		{}
-	},
-	{
-		PARPORT_PARPORT_DIR(NULL),
-		{}
-	},
-	{
-		PARPORT_DEV_DIR(NULL),
-		{}
-	}
 };
 
 struct parport_device_sysctl_table
@@ -391,6 +372,12 @@ parport_device_sysctl_template = {
 
 int parport_proc_register(struct parport *port)
 {
+	struct ctl_path parport_port_path[] = {
+		{ .procname = "dev" },
+		{ .procname = "parport" },
+		{ .procname = port->name },
+		{  },
+	};
 	struct parport_sysctl_table *t;
 	int i;
 
@@ -409,15 +396,10 @@ int parport_proc_register(struct parport *port)
 	for (i = 0; i < 5; i++)
 		t->vars[6 + i].extra2 = &port->probe_info[i];
 
-	t->port_dir[0].procname = port->name;
-
-	t->dev_dir[0].child = t->parport_dir;
-	t->parport_dir[0].child = t->port_dir;
-	t->port_dir[0].child = t->vars;
 	t->vars[5].child = t->device_dir;
-	/* vars[5] = PARPORT_DEVICES_ROOT_DIR => .procname = 'devices' */
+	/* vars[5].procname is the 'devices' dir entry */
 
-	t->sysctl_header = register_sysctl_table(t->dev_dir);
+	t->sysctl_header = register_sysctl_paths(parport_port_path, t->vars);
 	if (t->sysctl_header == NULL) {
 		kfree(t);
 		t = NULL;
-- 
1.7.5.134.g1c08b

^ permalink raw reply related

* [PATCH 9/15] sctp: Store a flowi in transports to provide persistent keying.
From: David Miller @ 2011-05-08 22:37 UTC (permalink / raw)
  To: netdev


Several future simplifications are possible now because of this.

For example, the sctp_addr unions can simply refer directly to
the flowi information.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h |    1 +
 net/sctp/transport.c       |    9 +++------
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index ff3e8cc..795f488 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -894,6 +894,7 @@ struct sctp_transport {
 		/* Is this structure kfree()able? */
 		malloced:1;
 
+	struct flowi fl;
 
 	/* This is the peer's IP address and port. */
 	union sctp_addr ipaddr;
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index d8595dd..394c57c 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -213,13 +213,11 @@ void sctp_transport_set_owner(struct sctp_transport *transport,
 /* Initialize the pmtu of a transport. */
 void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk)
 {
-	struct flowi fl;
-
 	/* If we don't have a fresh route, look one up */
 	if (!transport->dst || transport->dst->obsolete > 1) {
 		dst_release(transport->dst);
 		transport->af_specific->get_dst(transport, &transport->saddr,
-					      &fl, sk);
+						&transport->fl, sk);
 	}
 
 	if (transport->dst) {
@@ -274,14 +272,13 @@ void sctp_transport_route(struct sctp_transport *transport,
 {
 	struct sctp_association *asoc = transport->asoc;
 	struct sctp_af *af = transport->af_specific;
-	struct flowi fl;
 
-	af->get_dst(transport, saddr, &fl, sctp_opt2sk(opt));
+	af->get_dst(transport, saddr, &transport->fl, sctp_opt2sk(opt));
 
 	if (saddr)
 		memcpy(&transport->saddr, saddr, sizeof(union sctp_addr));
 	else
-		af->get_saddr(opt, transport, &fl);
+		af->get_saddr(opt, transport, &transport->fl);
 
 	if ((transport->param_flags & SPP_PMTUD_DISABLE) && transport->pathmtu) {
 		return;
-- 
1.7.5.1


^ permalink raw reply related

* [v2 003/115] sysctl: remove .child from dev/parport/PORT/devices/DEVICE
From: Lucian Adrian Grijincu @ 2011-05-08 22:38 UTC (permalink / raw)
  To: linux-kernel; +Cc: netdev, Lucian Adrian Grijincu
In-Reply-To: <1304894407-32201-1-git-send-email-lucian.grijincu@gmail.com>

MAINTAINERS says parport is "Orphan" and I don't have a parallel
port => I cannot test that this patch works.

Signed-off-by: Lucian Adrian Grijincu <lucian.grijincu@gmail.com>
---
 drivers/parport/procfs.c |   56 ++++++++++------------------------------------
 1 files changed, 12 insertions(+), 44 deletions(-)

diff --git a/drivers/parport/procfs.c b/drivers/parport/procfs.c
index edeb012..350233e 100644
--- a/drivers/parport/procfs.c
+++ b/drivers/parport/procfs.c
@@ -370,17 +370,11 @@ struct parport_device_sysctl_table
 {
 	struct ctl_table_header *sysctl_header;
 	ctl_table vars[2];
-	ctl_table device_dir[2];
-	ctl_table devices_root_dir[2];
-	ctl_table port_dir[2];
-	ctl_table parport_dir[2];
-	ctl_table dev_dir[2];
 };
 
 static const struct parport_device_sysctl_table
 parport_device_sysctl_template = {
-	.sysctl_header = NULL,
-	{
+	.vars = {
 		{
 			.procname 	= "timeslice",
 			.data		= NULL,
@@ -391,32 +385,6 @@ parport_device_sysctl_template = {
 			.extra2		= (void*) &parport_max_timeslice_value
 		},
 	},
-	{
-		{
-			.procname	= NULL,
-			.data		= NULL,
-			.maxlen		= 0,
-			.mode		= 0555,
-			.child		= NULL
-		},
-		{}
-	},
-	{
-		PARPORT_DEVICES_ROOT_DIR,
-		{}
-	},
-	{
-		PARPORT_PORT_DIR(NULL),
-		{}
-	},
-	{
-		PARPORT_PARPORT_DIR(NULL),
-		{}
-	},
-	{
-		PARPORT_DEV_DIR(NULL),
-		{}
-	}
 };
 
 
@@ -473,24 +441,24 @@ int parport_device_proc_register(struct pardevice *device)
 {
 	struct parport_device_sysctl_table *t;
 	struct parport * port = device->port;
-	
+	struct ctl_path parport_devices_port_path[] = {
+		{ .procname = "dev" },
+		{ .procname = "parport" },
+		{ .procname = port->name },
+		{ .procname = "devices" },
+		{ .procname = device->name },
+		{  },
+	};
+
 	t = kmalloc(sizeof(*t), GFP_KERNEL);
 	if (t == NULL)
 		return -ENOMEM;
 	memcpy(t, &parport_device_sysctl_template, sizeof(*t));
 
-	t->port_dir[0].procname = port->name;
-	t->device_dir[0].procname = device->name;
-
-	t->dev_dir[0].child = t->parport_dir;
-	t->parport_dir[0].child = t->port_dir;
-	t->port_dir[0].child = t->devices_root_dir;
-	t->devices_root_dir[0].child = t->device_dir;
-	t->device_dir[0].child = t->vars;
-
 	t->vars[0].data = &device->timeslice;
 
-	t->sysctl_header = register_sysctl_table(t->dev_dir);
+	t->sysctl_header = register_sysctl_paths(parport_devices_port_path,
+						 t->vars);
 	if (t->sysctl_header == NULL) {
 		kfree(t);
 		t = NULL;
-- 
1.7.5.134.g1c08b

^ permalink raw reply related

* [v2 002/115] sysctl: parport: reorder .child assignments to simplify review
From: Lucian Adrian Grijincu @ 2011-05-08 22:38 UTC (permalink / raw)
  To: linux-kernel; +Cc: netdev, Lucian Adrian Grijincu
In-Reply-To: <1304894407-32201-1-git-send-email-lucian.grijincu@gmail.com>

Signed-off-by: Lucian Adrian Grijincu <lucian.grijincu@gmail.com>
---
 drivers/parport/procfs.c |   14 ++++++++------
 1 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/parport/procfs.c b/drivers/parport/procfs.c
index 89b8b71..edeb012 100644
--- a/drivers/parport/procfs.c
+++ b/drivers/parport/procfs.c
@@ -437,16 +437,17 @@ int parport_proc_register(struct parport *port)
 		t->vars[i].extra1 = port;
 
 	t->vars[0].data = &port->spintime;
-	t->vars[5].child = t->device_dir;
 	
 	for (i = 0; i < 5; i++)
 		t->vars[6 + i].extra2 = &port->probe_info[i];
 
 	t->port_dir[0].procname = port->name;
 
-	t->port_dir[0].child = t->vars;
-	t->parport_dir[0].child = t->port_dir;
 	t->dev_dir[0].child = t->parport_dir;
+	t->parport_dir[0].child = t->port_dir;
+	t->port_dir[0].child = t->vars;
+	t->vars[5].child = t->device_dir;
+	/* vars[5] = PARPORT_DEVICES_ROOT_DIR => .procname = 'devices' */
 
 	t->sysctl_header = register_sysctl_table(t->dev_dir);
 	if (t->sysctl_header == NULL) {
@@ -478,14 +479,15 @@ int parport_device_proc_register(struct pardevice *device)
 		return -ENOMEM;
 	memcpy(t, &parport_device_sysctl_template, sizeof(*t));
 
+	t->port_dir[0].procname = port->name;
+	t->device_dir[0].procname = device->name;
+
 	t->dev_dir[0].child = t->parport_dir;
 	t->parport_dir[0].child = t->port_dir;
-	t->port_dir[0].procname = port->name;
 	t->port_dir[0].child = t->devices_root_dir;
 	t->devices_root_dir[0].child = t->device_dir;
-
-	t->device_dir[0].procname = device->name;
 	t->device_dir[0].child = t->vars;
+
 	t->vars[0].data = &device->timeslice;
 
 	t->sysctl_header = register_sysctl_table(t->dev_dir);
-- 
1.7.5.134.g1c08b

^ permalink raw reply related

* [v2 000/115] faster tree-based sysctl implementation
From: Lucian Adrian Grijincu @ 2011-05-08 22:38 UTC (permalink / raw)
  To: linux-kernel
  Cc: netdev, Lucian Adrian Grijincu, Eric W . Biederman,
	Alexey Dobriyan, Octavian Purdila, David S . Miller

This patch series introduces a faster/leaner sysctl internal implementation:

$ time modprobe dummy numdummies=N

Without this patch series :(
- ipv4 only
  -  N=1000  time= 0m 06s
  -  N=2000  time= 0m 30s
  -  N=4000  time= 2m 35s
- ipv4 and ipv6
  -  N=1000  time= 0m 24s
  -  N=2000  time= 2m 14s
  -  N=4000  time=10m 16s
  -  N=5000  time=16m  3s

With this patch series    :)
- ipv4 only
  -  N=1000  time= 0m  0.33s
  -  N=2000  time= 0m  1.25s
  -  N=4000  time= 0m  5.31s
- ipv4 and ipv6
  -  N=1000  time= 0m  0.41s
  -  N=2000  time= 0m  1.62s
  -  N=4000  time= 0m  7.64s
  -  N=5000  time= 0m 12.35s
  -  N=8000  time= 0m 36.95s


Since v1 (http://thread.gmane.org/gmane.linux.kernel/1133667):
- rebased on top of 2.6.39-rc6
- split the patch that adds the new algorithm and data structures.
- fixed a few bugs lingering in the old code
- shrinked a reference counter
- added a new reference counter to maintain ownership information
- added method to register an empty sysctl dir and converted some users
- added checks enforcing the rule that a non-netns specific directory may
  not be registered after a netns specific one has already been registered.
- added cookie support: register a piece of data with the header to be
  used to make simple conversions on the ctl_table. This saves memory where
  we need to register sysctl tables with the same content affecting
  different pieces of data.
- enforced sysctl checks


Eric also asked for:
- registration based on strings, not the ctl_path version
  -- I did not add this at the moment because of lack of time and,
     if needed, this can be added any time later. The patch series
     is long enogh.

- replacing the per-header list of subdirs with a rbtree.
  -- Again, lack of time, and this can always be added at a later time
     to optimize lookup and duplicate checks. At the moment this patch
     series does not add a complexity regression over the previous
     implementation, au contraire.


For anyone interested in testing these patches check them out from:

  web:   https://github.com/luciang/linux-2.6-new-sysctl
  git:   git://github.com/luciang/linux-2.6-new-sysctl.git


Cc: "Eric W . Biederman" <ebiederm@xmission.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Octavian Purdila <tavi@cs.pub.ro>
Cc: "David S . Miller" <davem@davemloft.net>

Lucian Adrian Grijincu (115):
  sysctl: remove .child from dev/parport/default
  sysctl: parport: reorder .child assignments to simplify review
  sysctl: remove .child from dev/parport/PORT/devices/DEVICE
  sysctl: remove .child from dev/parport/PORT/
  sysctl: remove .child from dev/parport/PORT/devices/
  sysctl: remove .child from kernel/vsyscall64 (x86)
  sysctl: remove .child from abi/vsyscall32 (x86)
  sysctl: remove .child from crypto/fips_enabled
  sysctl: remove .child from dev/cdrom/
  sysctl: remove .child from dev/hpet/
  sysctl: remove .child from dev/ipmi/
  sysctl: remove .child from dev/rtc/
  sysctl: remove .child from dev/mac_hid/
  sysctl: remove .child from dev/raid/
  sysctl: remove .child from xpc/
  sysctl: remove .child from xpc/hb
  sysctl: remove .child from kernel/sclp (s390)
  sysctl: remove .child from dev/scsi
  sysctl: remove .child from kernel/pty
  sysctl: remove .child from coda/
  sysctl: remove .child from fscache/
  sysctl: remove .child from fs/nfs/ nlm_table table
  sysctl: remove .child from fs/nfs/ nfs_cb_table
  sysctl: remove .child from fs/ntfs-debug
  sysctl: remove .child from fs/ocfs2/nm/
  sysctl: remove .child from fs/quota/
  sysctl: remove .child from fs/xfs/
  sysctl: remove .child from kernel/ (ipc)
  sysctl: remove .child from fs/mqueue
  sysctl: sched: add sd_table_template
  sysctl: remove .child from kernel/sched_domain/cpuX/domainY/
  sysctl: remove .child from kernel/ (utsname)
  sysctl: remove .child from sunrpc/
  sysctl: remove .child from sunrpc/svc_rdma
  sysctl: remove .child from sunrpc/ (xprtrdma)
  sysctl: remove .child from sunrpc/ (xprtsock)
  sysctl: remove .child from bus/isa/ (arm)
  sysctl: remove .child from reboot/warm (arm)
  sysctl: remove .child from lasat/ (mips)
  sysctl: remove .child from appldata/ (s390)
  sysctl: remove .child from s390dbf/
  sysctl: remove .child from vm/ (s390)
  sysctl: remove .child from kernel/perfmon/ (ia64)
  sysctl: remove .child from kernel/ (ia64/kdump)
  sysctl: remove .child from kernel/powersave-nap (powerpc)
  sysctl: remove .child from pm/ (frv)
  sysctl: remove .child from frv/
  sysctl: remove .child from sh64/unaligned_fixup/
  sysctl: delete unused register_sysctl_table function
  sysctl: remove .child from ax25 table
  sysctl: remove .child from net/ipv4/route and net/ipv4/neigh tables
  sysctl: remove .child from net/ipv4/neigh table
  sysctl: remove .child from net/ipv6/route, net/ipv6/icmp, net/ipv6
    tables
  sysctl: remove .child from net/llc tables
  sysctl: call sysctl_init before the first sysctl registration
  sysctl: no-child: manually register kernel/random
  sysctl: no-child: manually register kernel/keys
  sysctl: no-child: manually register fs/inotify
  sysctl: no-child: manually register fs/epoll
  sysctl: no-child: manually register root tables
  sysctl: faster reimplementation of sysctl_check_table
  sysctl: remove useless ctl_table->parent field
  sysctl: simplify find_in_table
  sysctl: sysctl_head_grab defaults to root header on NULL
  sysctl: delete useless grab_header function
  sysctl: rename ->used to ->ctl_use_refs
  sysctl: rename sysctl_head_grab/finish to sysctl_use_header/unuse
  sysctl: rename sysctl_head_next to sysctl_use_next_header
  sysctl: split ->count into ctl_procfs_refs and ctl_header_refs
  sysctl: rename sysctl_head_get/put to sysctl_proc_inode_get/put
  sysctl: rename (un)use_table to __sysctl_(un)use_header
  sysctl: simplify ->permissions hook
  sysctl: group root-specific operations
  sysctl: introduce ctl_table_group
  sysctl: move removal from list out of start_unregistering
  sysctl: faster tree-based sysctl implementation
  sysctl: add duplicate entry and sanity ctl_table checks
  sysctl: alloc ctl_table_header with kmem_cache
  sysctl: single subheader path: optimisation for paths used only once
  sysctl: single subheader path: net/ipv4/conf/DEVICE-NAME/
  sysctl: single subheader path: net/{ipv4|ipv6}/neigh/DEV/
  sysctl: single subheader path: net/ipv6/conf/DEVICE-NAME/
  sysctl: single subheader path: dev/parport/PORT/devices/DEVICE/
  sysctl: single subheader path: net/ax25/DEVICE
  sysctl: single subheader path: kernel/sched_domain/CPU/DOMAIN/
  sysctl: single subheader path: net/decnet/conf/DEVNAME
  sysctl: check netns-specific registration order respected
  RFC: sysctl: convert read-write lock to RCU
  RFC: sysctl: change type of ctl_procfs_refs to u8
  sysctl: warn if registration/unregistration order is not respected
  sysctl: add register_sysctl_dir: register an empty sysctl directory
  sysctl: sched: create empty dir with register_sysctl_dir
  sysctl: ax25: create empty dir with register_sysctl_dir
  sysctl: net/core: create empty dir with register_sysctl_dir
  sysctl: net/ipv4/neigh: create empty dir with register_sysctl_dir
  sysctl: net/ipv6/neigh: create empty dir with register_sysctl_dir
  sysctl: add ctl_cookie
  sysctl: add cookie to __register_sysctl_paths
  sysctl: add register_net_sysctl_table_net_cookie
  sysctl: cookie: share ip4_frags_ns_ctl_table between nets
  sysctl: cookie: share netns_core_table between nets
  sysctl: cookie: share ipv4_net_table between nets
  sysctl: cookie: share ip6_frags_ns_ctl_table between nets
  sysctl: cookie: share ipv6_route_table/ipv6_icmp_table between nets
  sysctl: cookie: share ipv6_bindv6only_table between nets
  sysctl: cookie: share acct_sysctl_table table between nets
  sysctl: cookie: share event_sysctl_table between nets
  net: split nf_ct_sysctl_table
  sysctl: cookie: share nf_ct_sysctl_table between nets
  sysctl: cookie: share unix_table between nets
  sysctl: cookie: share xfrm_table between nets
  sysctl: cookie: add register_net_sysctl_table_custom_cookie
  sysctl: cookie: share devinet tables between network devices
  sysctl: cookie: share addrconf tables between network devices
  RFC: sysctl: always perform sysctl checks

 arch/arm/kernel/isa.c                   |   31 +-
 arch/arm/mach-bcmring/arch.c            |   25 +-
 arch/frv/kernel/pm.c                    |   10 +-
 arch/frv/kernel/sysctl.c                |   12 +-
 arch/ia64/kernel/crash.c                |   13 +-
 arch/ia64/kernel/perfmon.c              |   23 +-
 arch/mips/lasat/sysctl.c                |   13 +-
 arch/powerpc/kernel/idle.c              |   13 +-
 arch/s390/appldata/appldata_base.c      |   42 +-
 arch/s390/kernel/debug.c                |   13 +-
 arch/s390/mm/cmm.c                      |   11 +-
 arch/sh/kernel/traps_64.c               |   21 +-
 arch/x86/kernel/vsyscall_64.c           |   25 +-
 arch/x86/vdso/vdso32-setup.c            |   14 +-
 crypto/proc.c                           |   12 +-
 drivers/cdrom/cdrom.c                   |   22 +-
 drivers/char/hpet.c                     |   38 +-
 drivers/char/ipmi/ipmi_poweroff.c       |   16 +-
 drivers/char/random.c                   |   27 +-
 drivers/char/rtc.c                      |   24 +-
 drivers/macintosh/mac_hid.c             |   26 +-
 drivers/md/md.c                         |   22 +-
 drivers/misc/sgi-xp/xpc_main.c          |   81 ++--
 drivers/parport/procfs.c                |  231 ++++-----
 drivers/s390/char/sclp_async.c          |   13 +-
 drivers/scsi/scsi_sysctl.c              |   28 +-
 drivers/tty/pty.c                       |   23 +-
 fs/coda/sysctl.c                        |   12 +-
 fs/eventpoll.c                          |   22 +-
 fs/fscache/main.c                       |   15 +-
 fs/lockd/svc.c                          |   22 +-
 fs/nfs/sysctl.c                         |   22 +-
 fs/notify/inotify/inotify_user.c        |   22 +-
 fs/ntfs/sysctl.c                        |   15 +-
 fs/ocfs2/stackglue.c                    |   36 +-
 fs/proc/inode.c                         |    2 +-
 fs/proc/proc_sysctl.c                   |  217 +++++---
 fs/quota/dquot.c                        |   21 +-
 fs/xfs/linux-2.6/xfs_sysctl.c           |   22 +-
 include/linux/inetdevice.h              |    6 +-
 include/linux/inotify.h                 |    2 -
 include/linux/ipv6.h                    |    6 +-
 include/linux/key.h                     |    4 +-
 include/linux/poll.h                    |    2 -
 include/linux/sysctl.h                  |  227 ++++++---
 include/net/ax25.h                      |   10 +-
 include/net/ipv6.h                      |    8 +-
 include/net/net_namespace.h             |    7 +-
 include/net/netns/conntrack.h           |    1 +
 include/net/netns/ipv6.h                |    4 +-
 init/main.c                             |    1 +
 ipc/ipc_sysctl.c                        |   12 +-
 ipc/mq_sysctl.c                         |   24 +-
 kernel/Makefile                         |    5 +-
 kernel/sched.c                          |  389 +++++++++----
 kernel/sysctl.c                         |  920 ++++++++++++++++++++-----------
 kernel/sysctl_check.c                   |  322 +++++++-----
 kernel/utsname_sysctl.c                 |   14 +-
 lib/Kconfig.debug                       |    8 -
 net/ax25/af_ax25.c                      |   22 +-
 net/ax25/ax25_dev.c                     |   10 +-
 net/ax25/sysctl_net_ax25.c              |   82 +--
 net/core/neighbour.c                    |    8 +-
 net/core/sysctl_net_core.c              |   33 +-
 net/decnet/dn_dev.c                     |    8 +-
 net/ipv4/devinet.c                      |  154 +++---
 net/ipv4/ip_fragment.c                  |   28 +-
 net/ipv4/route.c                        |   17 +-
 net/ipv4/sysctl_net_ipv4.c              |   40 +--
 net/ipv6/addrconf.c                     |  506 +++++++++---------
 net/ipv6/icmp.c                         |   18 +-
 net/ipv6/reassembly.c                   |   34 +-
 net/ipv6/route.c                        |   36 +-
 net/ipv6/sysctl_net_ipv6.c              |  118 ++---
 net/llc/sysctl_net_llc.c                |   55 +-
 net/netfilter/nf_conntrack_acct.c       |   24 +-
 net/netfilter/nf_conntrack_ecache.c     |   26 +-
 net/netfilter/nf_conntrack_standalone.c |   52 +-
 net/sunrpc/sysctl.c                     |   19 +-
 net/sunrpc/xprtrdma/svc_rdma.c          |   26 +-
 net/sunrpc/xprtrdma/transport.c         |   14 +-
 net/sunrpc/xprtsock.c                   |   16 +-
 net/sysctl_net.c                        |   95 ++--
 net/unix/sysctl_net_unix.c              |   23 +-
 net/xfrm/xfrm_sysctl.c                  |   29 +-
 security/keys/key.c                     |    1 +
 security/keys/sysctl.c                  |   18 +-
 87 files changed, 2436 insertions(+), 2305 deletions(-)

-- 
1.7.5.134.g1c08b

^ permalink raw reply

* [PATCH 8/15] ipv4: Use cork flow in ip_queue_xmit()
From: David Miller @ 2011-05-08 22:37 UTC (permalink / raw)
  To: netdev


All invokers of ip_queue_xmit() must make certain that the
socket is locked.  All of SCTP, TCP, DCCP, and L2TP now make
sure this is the case.

Therefore we can use the cork flow during output route lookup in
ip_queue_xmit() when the socket route check fails.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_output.c |    5 +++--
 1 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 0a2f49a..4ba26d4 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -317,6 +317,7 @@ int ip_queue_xmit(struct sk_buff *skb)
 	struct sock *sk = skb->sk;
 	struct inet_sock *inet = inet_sk(sk);
 	struct ip_options_rcu *inet_opt;
+	struct flowi4 *fl4;
 	struct rtable *rt;
 	struct iphdr *iph;
 	int res;
@@ -331,9 +332,9 @@ int ip_queue_xmit(struct sk_buff *skb)
 		goto packet_routed;
 
 	/* Make sure we can route this packet. */
+	fl4 = &inet->cork.fl.u.ip4;
 	rt = (struct rtable *)__sk_dst_check(sk, 0);
 	if (rt == NULL) {
-		struct flowi4 fl4;
 		__be32 daddr;
 
 		/* Use correct destination address if we have options. */
@@ -345,7 +346,7 @@ int ip_queue_xmit(struct sk_buff *skb)
 		 * keep trying until route appears or the connection times
 		 * itself out.
 		 */
-		rt = ip_route_output_ports(sock_net(sk), &fl4, sk,
+		rt = ip_route_output_ports(sock_net(sk), fl4, sk,
 					   daddr, inet->inet_saddr,
 					   inet->inet_dport,
 					   inet->inet_sport,
-- 
1.7.5.1


^ permalink raw reply related

* [PATCH 7/15] ipv4: Use cork flow in inet_sk_{reselect_saddr,rebuild_header}()
From: David Miller @ 2011-05-08 22:37 UTC (permalink / raw)
  To: netdev


These two functions must be invoked only when the socket is locked
(because socket identity modifications are made non-atomically).

Therefore we can use the cork flow for output route lookups.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/af_inet.c |   12 +++++++-----
 1 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 7b91fa8..851aa05 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1103,7 +1103,7 @@ static int inet_sk_reselect_saddr(struct sock *sk)
 	struct inet_sock *inet = inet_sk(sk);
 	__be32 old_saddr = inet->inet_saddr;
 	__be32 daddr = inet->inet_daddr;
-	struct flowi4 fl4;
+	struct flowi4 *fl4;
 	struct rtable *rt;
 	__be32 new_saddr;
 	struct ip_options_rcu *inet_opt;
@@ -1114,7 +1114,8 @@ static int inet_sk_reselect_saddr(struct sock *sk)
 		daddr = inet_opt->opt.faddr;
 
 	/* Query new route. */
-	rt = ip_route_connect(&fl4, daddr, 0, RT_CONN_FLAGS(sk),
+	fl4 = &inet->cork.fl.u.ip4;
+	rt = ip_route_connect(fl4, daddr, 0, RT_CONN_FLAGS(sk),
 			      sk->sk_bound_dev_if, sk->sk_protocol,
 			      inet->inet_sport, inet->inet_dport, sk, false);
 	if (IS_ERR(rt))
@@ -1122,7 +1123,7 @@ static int inet_sk_reselect_saddr(struct sock *sk)
 
 	sk_setup_caps(sk, &rt->dst);
 
-	new_saddr = fl4.saddr;
+	new_saddr = fl4->saddr;
 
 	if (new_saddr == old_saddr)
 		return 0;
@@ -1152,7 +1153,7 @@ int inet_sk_rebuild_header(struct sock *sk)
 	struct rtable *rt = (struct rtable *)__sk_dst_check(sk, 0);
 	__be32 daddr;
 	struct ip_options_rcu *inet_opt;
-	struct flowi4 fl4;
+	struct flowi4 *fl4;
 	int err;
 
 	/* Route is OK, nothing to do. */
@@ -1166,7 +1167,8 @@ int inet_sk_rebuild_header(struct sock *sk)
 	if (inet_opt && inet_opt->opt.srr)
 		daddr = inet_opt->opt.faddr;
 	rcu_read_unlock();
-	rt = ip_route_output_ports(sock_net(sk), &fl4, sk, daddr, inet->inet_saddr,
+	fl4 = &inet->cork.fl.u.ip4;
+	rt = ip_route_output_ports(sock_net(sk), fl4, sk, daddr, inet->inet_saddr,
 				   inet->inet_dport, inet->inet_sport,
 				   sk->sk_protocol, RT_CONN_FLAGS(sk),
 				   sk->sk_bound_dev_if);
-- 
1.7.5.1


^ permalink raw reply related

* [PATCH 6/15] ipv4: Lock socket and use cork flow in ip4_datagram_connect().
From: David Miller @ 2011-05-08 22:37 UTC (permalink / raw)
  To: netdev


This is to make sure that an l2tp socket's inet cork flow is
fully filled in, when it's encapsulated in UDP.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/datagram.c |   23 +++++++++++++++--------
 1 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index d5a2e69..424fafb 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -24,7 +24,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 {
 	struct inet_sock *inet = inet_sk(sk);
 	struct sockaddr_in *usin = (struct sockaddr_in *) uaddr;
-	struct flowi4 fl4;
+	struct flowi4 *fl4;
 	struct rtable *rt;
 	__be32 saddr;
 	int oif;
@@ -39,6 +39,8 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 
 	sk_dst_reset(sk);
 
+	lock_sock(sk);
+
 	oif = sk->sk_bound_dev_if;
 	saddr = inet->inet_saddr;
 	if (ipv4_is_multicast(usin->sin_addr.s_addr)) {
@@ -47,7 +49,8 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 		if (!saddr)
 			saddr = inet->mc_addr;
 	}
-	rt = ip_route_connect(&fl4, usin->sin_addr.s_addr, saddr,
+	fl4 = &inet->cork.fl.u.ip4;
+	rt = ip_route_connect(fl4, usin->sin_addr.s_addr, saddr,
 			      RT_CONN_FLAGS(sk), oif,
 			      sk->sk_protocol,
 			      inet->inet_sport, usin->sin_port, sk, true);
@@ -55,26 +58,30 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 		err = PTR_ERR(rt);
 		if (err == -ENETUNREACH)
 			IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
-		return err;
+		goto out;
 	}
 
 	if ((rt->rt_flags & RTCF_BROADCAST) && !sock_flag(sk, SOCK_BROADCAST)) {
 		ip_rt_put(rt);
-		return -EACCES;
+		err = -EACCES;
+		goto out;
 	}
 	if (!inet->inet_saddr)
-		inet->inet_saddr = fl4.saddr;	/* Update source address */
+		inet->inet_saddr = fl4->saddr;	/* Update source address */
 	if (!inet->inet_rcv_saddr) {
-		inet->inet_rcv_saddr = fl4.saddr;
+		inet->inet_rcv_saddr = fl4->saddr;
 		if (sk->sk_prot->rehash)
 			sk->sk_prot->rehash(sk);
 	}
-	inet->inet_daddr = fl4.daddr;
+	inet->inet_daddr = fl4->daddr;
 	inet->inet_dport = usin->sin_port;
 	sk->sk_state = TCP_ESTABLISHED;
 	inet->inet_id = jiffies;
 
 	sk_dst_set(sk, &rt->dst);
-	return 0;
+	err = 0;
+out:
+	release_sock(sk);
+	return err;
 }
 EXPORT_SYMBOL(ip4_datagram_connect);
-- 
1.7.5.1


^ permalink raw reply related

* [PATCH 5/15] l2tp: Use cork flow in l2tp_ip_connect() and l2tp_ip_sendmsg()
From: David Miller @ 2011-05-08 22:37 UTC (permalink / raw)
  To: netdev


Now that the socket is consistently locked in these two routines,
this transformation is legal.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_ip.c |   16 +++++++++-------
 1 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index bd0cc0b..1ca7489 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -298,7 +298,7 @@ static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
 {
 	struct sockaddr_l2tpip *lsa = (struct sockaddr_l2tpip *) uaddr;
 	struct inet_sock *inet = inet_sk(sk);
-	struct flowi4 fl4;
+	struct flowi4 *fl4;
 	struct rtable *rt;
 	__be32 saddr;
 	int oif, rc;
@@ -322,7 +322,8 @@ static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
 	if (ipv4_is_multicast(lsa->l2tp_addr.s_addr))
 		goto out;
 
-	rt = ip_route_connect(&fl4, lsa->l2tp_addr.s_addr, saddr,
+	fl4 = &inet->cork.fl.u.ip4;
+	rt = ip_route_connect(fl4, lsa->l2tp_addr.s_addr, saddr,
 			      RT_CONN_FLAGS(sk), oif,
 			      IPPROTO_L2TP,
 			      0, 0, sk, true);
@@ -342,10 +343,10 @@ static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
 	l2tp_ip_sk(sk)->peer_conn_id = lsa->l2tp_conn_id;
 
 	if (!inet->inet_saddr)
-		inet->inet_saddr = fl4.saddr;
+		inet->inet_saddr = fl4->saddr;
 	if (!inet->inet_rcv_saddr)
-		inet->inet_rcv_saddr = fl4.saddr;
-	inet->inet_daddr = fl4.daddr;
+		inet->inet_rcv_saddr = fl4->saddr;
+	inet->inet_daddr = fl4->daddr;
 	sk->sk_state = TCP_ESTABLISHED;
 	inet->inet_id = jiffies;
 
@@ -420,6 +421,7 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m
 	struct l2tp_ip_sock *lsa = l2tp_ip_sk(sk);
 	struct inet_sock *inet = inet_sk(sk);
 	struct rtable *rt = NULL;
+	struct flowi4 *fl4;
 	int connected = 0;
 	__be32 daddr;
 
@@ -474,12 +476,12 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m
 		goto error;
 	}
 
+	fl4 = &inet->cork.fl.u.ip4;
 	if (connected)
 		rt = (struct rtable *) __sk_dst_check(sk, 0);
 
 	if (rt == NULL) {
 		struct ip_options_rcu *inet_opt;
-		struct flowi4 fl4;
 
 		rcu_read_lock();
 		inet_opt = rcu_dereference(inet->inet_opt);
@@ -494,7 +496,7 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m
 		 * keep trying until route appears or the connection times
 		 * itself out.
 		 */
-		rt = ip_route_output_ports(sock_net(sk), &fl4, sk,
+		rt = ip_route_output_ports(sock_net(sk), fl4, sk,
 					   daddr, inet->inet_saddr,
 					   inet->inet_dport, inet->inet_sport,
 					   sk->sk_protocol, RT_CONN_FLAGS(sk),
-- 
1.7.5.1


^ permalink raw reply related

* [PATCH 4/15] l2tp: Fix locking in l2tp_core.c
From: David Miller @ 2011-05-08 22:37 UTC (permalink / raw)
  To: netdev


l2tp_xmit_skb() must take the socket lock.  It makes use of ip_queue_xmit()
which expects to execute in a socket atomic context.

Since we execute this function in software interrupts, we cannot use the
usual lock_sock()/release_sock() sequence, instead we have to use
bh_lock_sock() and see if a user has the socket locked, and if so drop
the packet.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_core.c |    8 ++++++++
 1 files changed, 8 insertions(+), 0 deletions(-)

diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index c64ce0a..7853029 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1060,6 +1060,12 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len
 			      IPSKB_REROUTED);
 	nf_reset(skb);
 
+	bh_lock_sock(sk);
+	if (sock_owned_by_user(sk)) {
+		dev_kfree_skb(skb);
+		goto out_unlock;
+	}
+
 	/* Get routing info from the tunnel socket */
 	skb_dst_drop(skb);
 	skb_dst_set(skb, dst_clone(__sk_dst_get(sk)));
@@ -1106,6 +1112,8 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len
 	l2tp_skb_set_owner_w(skb, sk);
 
 	l2tp_xmit_core(session, skb, data_len);
+out_unlock:
+	bh_unlock_sock(sk);
 
 abort:
 	return 0;
-- 
1.7.5.1


^ permalink raw reply related

* [PATCH 3/15] l2tp: Fix locking in l2tp_ip.c
From: David Miller @ 2011-05-08 22:37 UTC (permalink / raw)
  To: netdev


Both l2tp_ip_connect() and l2tp_ip_sendmsg() must take the socket
lock.  They both modify socket state non-atomically, and in particular
l2tp_ip_sendmsg() increments socket private counters without using
atomic operations.
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_ip.c |   19 +++++++++++++++----
 1 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 8189960..bd0cc0b 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -311,6 +311,8 @@ static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
 	if (lsa->l2tp_family != AF_INET)
 		goto out;
 
+	lock_sock(sk);
+
 	sk_dst_reset(sk);
 
 	oif = sk->sk_bound_dev_if;
@@ -356,6 +358,7 @@ static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
 
 	rc = 0;
 out:
+	release_sock(sk);
 	return rc;
 }
 
@@ -420,18 +423,23 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m
 	int connected = 0;
 	__be32 daddr;
 
+	lock_sock(sk);
+
+	rc = -ENOTCONN;
 	if (sock_flag(sk, SOCK_DEAD))
-		return -ENOTCONN;
+		goto out;
 
 	/* Get and verify the address. */
 	if (msg->msg_name) {
 		struct sockaddr_l2tpip *lip = (struct sockaddr_l2tpip *) msg->msg_name;
+		rc = -EINVAL;
 		if (msg->msg_namelen < sizeof(*lip))
-			return -EINVAL;
+			goto out;
 
 		if (lip->l2tp_family != AF_INET) {
+			rc = -EAFNOSUPPORT;
 			if (lip->l2tp_family != AF_UNSPEC)
-				return -EAFNOSUPPORT;
+				goto out;
 		}
 
 		daddr = lip->l2tp_addr.s_addr;
@@ -510,12 +518,15 @@ error:
 		lsa->tx_errors++;
 	}
 
+out:
+	release_sock(sk);
 	return rc;
 
 no_route:
 	IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
 	kfree_skb(skb);
-	return -EHOSTUNREACH;
+	rc = -EHOSTUNREACH;
+	goto out;
 }
 
 static int l2tp_ip_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
-- 
1.7.5.1


^ permalink raw reply related

* [PATCH 2/15] tcp: Use cork flow in tcp_v4_connect()
From: David Miller @ 2011-05-08 22:37 UTC (permalink / raw)
  To: netdev


Since this is invoked from inet_stream_connect() the socket is locked
and therefore this usage is safe.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c |   13 +++++++------
 1 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index f3d16d8..a712171 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -151,7 +151,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	struct tcp_sock *tp = tcp_sk(sk);
 	__be16 orig_sport, orig_dport;
 	__be32 daddr, nexthop;
-	struct flowi4 fl4;
+	struct flowi4 *fl4;
 	struct rtable *rt;
 	int err;
 	struct ip_options_rcu *inet_opt;
@@ -173,7 +173,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 
 	orig_sport = inet->inet_sport;
 	orig_dport = usin->sin_port;
-	rt = ip_route_connect(&fl4, nexthop, inet->inet_saddr,
+	fl4 = &inet->cork.fl.u.ip4;
+	rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
 			      RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
 			      IPPROTO_TCP,
 			      orig_sport, orig_dport, sk, true);
@@ -190,10 +191,10 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	}
 
 	if (!inet_opt || !inet_opt->opt.srr)
-		daddr = fl4.daddr;
+		daddr = fl4->daddr;
 
 	if (!inet->inet_saddr)
-		inet->inet_saddr = fl4.saddr;
+		inet->inet_saddr = fl4->saddr;
 	inet->inet_rcv_saddr = inet->inet_saddr;
 
 	if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
@@ -204,7 +205,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	}
 
 	if (tcp_death_row.sysctl_tw_recycle &&
-	    !tp->rx_opt.ts_recent_stamp && fl4.daddr == daddr) {
+	    !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr) {
 		struct inet_peer *peer = rt_get_peer(rt);
 		/*
 		 * VJ's idea. We save last timestamp seen from
@@ -240,7 +241,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	if (err)
 		goto failure;
 
-	rt = ip_route_newports(&fl4, rt, orig_sport, orig_dport,
+	rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
 			       inet->inet_sport, inet->inet_dport, sk);
 	if (IS_ERR(rt)) {
 		err = PTR_ERR(rt);
-- 
1.7.5.1


^ permalink raw reply related

* [PATCH 1/15] dccp: Use cork flow in dccp_v4_connect()
From: David Miller @ 2011-05-08 22:37 UTC (permalink / raw)
  To: netdev


Since this is invoked from inet_stream_connect() the socket is locked
and therefore this usage is safe.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ipv4.c |   11 ++++++-----
 1 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 36700a4..4ac1a72 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -45,7 +45,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	struct dccp_sock *dp = dccp_sk(sk);
 	__be16 orig_sport, orig_dport;
 	__be32 daddr, nexthop;
-	struct flowi4 fl4;
+	struct flowi4 *fl4;
 	struct rtable *rt;
 	int err;
 	struct ip_options_rcu *inet_opt;
@@ -70,7 +70,8 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 
 	orig_sport = inet->inet_sport;
 	orig_dport = usin->sin_port;
-	rt = ip_route_connect(&fl4, nexthop, inet->inet_saddr,
+	fl4 = &inet->cork.fl.u.ip4;
+	rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
 			      RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
 			      IPPROTO_DCCP,
 			      orig_sport, orig_dport, sk, true);
@@ -83,10 +84,10 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	}
 
 	if (inet_opt == NULL || !inet_opt->opt.srr)
-		daddr = fl4.daddr;
+		daddr = fl4->daddr;
 
 	if (inet->inet_saddr == 0)
-		inet->inet_saddr = fl4.saddr;
+		inet->inet_saddr = fl4->saddr;
 	inet->inet_rcv_saddr = inet->inet_saddr;
 
 	inet->inet_dport = usin->sin_port;
@@ -106,7 +107,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	if (err != 0)
 		goto failure;
 
-	rt = ip_route_newports(&fl4, rt, orig_sport, orig_dport,
+	rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
 			       inet->inet_sport, inet->inet_dport, sk);
 	if (IS_ERR(rt)) {
 		rt = NULL;
-- 
1.7.5.1


^ permalink raw reply related

* [PATCH 0/15] Pass keys down to ip_queue_xmit()
From: David Miller @ 2011-05-08 22:37 UTC (permalink / raw)
  To: netdev


The final goal of this set of changes is to normalize all uses
of ip_queue_xmit() and how sockets are connected up so that
ip_queue_xmit() can elide it's usage of rt->rt_{src,dst}

Along the way I found some locking problems in L2TP fixed here.

Basically we reuse inet_sk->cork.fl to store keying information
and pass that (or something more appropriate) to ip_queue_xmit()

This opens up the door for SCTP to express exactly what is does, by
passing in a flow which is now added to the individual transport
instances.

Taking this one step further, we could add a "struct dst **dst"
argument to ->queue_xmit() and friends, then SCTP could pass in
"&transport->dst" and it would no longer need to do that special
"pre-hookup the DST to skb" uglyness.

L2TP could be modified similarly.

Then the pre-routed SKB code can be removed completely.

Signed-off-by: David S. Miller <davem@davemloft.net>

^ permalink raw reply

* Re: Scalability of interface creation and deletion
From: Eric Dumazet @ 2011-05-08 21:00 UTC (permalink / raw)
  To: paulmck; +Cc: Alex Bligh, netdev, Jesse Gross
In-Reply-To: <20110508154854.GT2641@linux.vnet.ibm.com>

Le dimanche 08 mai 2011 à 08:48 -0700, Paul E. McKenney a écrit :
> On Sun, May 08, 2011 at 04:17:42PM +0100, Alex Bligh wrote:
> > 
> > If 6 jiffies per call to ensure cpus are idle is a fact of life,
> > then the question goes back to why interface removal is waiting
> > for rcu readers to be released synchronously, as opposed to
> > doing the update bits synchronously, then doing the reclaim
> > element (freeing the memory) afterwards using call_rcu.
> 
> This would speed things up considerably, assuming that there is no
> other reason to block for an RCU grace period.
> 

Thats not so simple... Things are modular and better be safe than crash,
on a very rare event (device dismantles are not the thing we expect to
do very often. Only special needs might need to perform hundred of them
per minute...)

For example, in the VLAN dismantle phase (ip link del eth0.103)
we have 3 calls to synchronize_rcu() and one call to rcu_barrier()

[ the 'extra' synchronize_rcu() call comes from unregister_vlan_dev() ]

Maybe with new VLAN model, we could now remove this synchronize_net()
call from vlan code. Jesse what do you think ?
Once vlan_group_set_device(grp, vlan_id, NULL) had been called, why
should we respect one rcu grace period at all, given dev is queued to
unregister_netdevice_queue() [ which has its own couples of
synchronize_net() / rcu_barrier() ]


The real scalability problem of device dismantles comes from the fact
that all these waits are done under RTNL mutex. This is the real killer
because you cannot use your eight cpus, even if you are willing to.

We can probably speed things, but we should consider the following user
actions :

ip link add link eth0 vlan103 type vlan id 103
ip link del vlan103
ip link add link eth1 vlan103 type vlan id 103

The "link del" command should return to user only if the minimum things
had been done, to make sure the following "link add" wont fail
mysteriously.




^ permalink raw reply

* Bluetooth: l2cap and rfcomm: fix 1 byte infoleak to userspace.
From: Filip Palian @ 2011-05-08 19:57 UTC (permalink / raw)
  To: Marcel Holtmann, Gustavo F. Padovan, David S. Miller,
	linux-bluetooth-u79uwXL29TY76Z2rM5mHXA,
	netdev-u79uwXL29TY76Z2rM5mHXA
  Cc: security-DgEjT+Ai2ygdnm+yROfE0A,
	oss-security-ZwoEplunGu1jrUoiu81ncdBPR1lH4CV8

Hi,

Structures "l2cap_conninfo" and "rfcomm_conninfo" have one padding
byte each. This byte in "cinfo" is copied to userspace uninitialized.

patch no.1:
-- cut --
--- a/net/bluetooth/l2cap_sock.c        2011-05-04 03:59:13.000000000 +0100
+++ b/net/bluetooth/l2cap_sock.c        2011-05-08 18:57:20.000000000 +0100
@@ -446,6 +446,7 @@ static int l2cap_sock_getsockopt_old(str
                        break;
                }

+               memset(&cinfo, 0, sizeof(cinfo));
                cinfo.hci_handle = l2cap_pi(sk)->conn->hcon->handle;
                memcpy(cinfo.dev_class, l2cap_pi(sk)->conn->hcon->dev_class, 3);

-- cut --

patch no.2:
-- cut --
--- a/net/bluetooth/rfcomm/sock.c       2011-05-04 03:59:13.000000000 +0100
+++ b/net/bluetooth/rfcomm/sock.c       2011-05-08 19:00:24.000000000 +0100
@@ -787,6 +787,7 @@ static int rfcomm_sock_getsockopt_old(st

                l2cap_sk = rfcomm_pi(sk)->dlc->session->sock->sk;

+               memset(&cinfo, 0, sizeof(cinfo));
                cinfo.hci_handle = l2cap_pi(l2cap_sk)->conn->hcon->handle;
                memcpy(cinfo.dev_class,
l2cap_pi(l2cap_sk)->conn->hcon->dev_class, 3);
-- cut --

Found by Marek Kroemeke and Filip Palian. Special thanks to Vasiliy
Kulikov for verifying this bug.


Best regards.

^ permalink raw reply

* [PATCH] pci, e1000e: Add and use __pci_disable_link_state
From: Yinghai Lu @ 2011-05-08 18:54 UTC (permalink / raw)
  To: Jeff Kirsher, Jesse Brandeburg, Bruce Allan, Carolyn Wyborny,
	Don 
  Cc: e1000-devel, netdev, Andrew Morton, linux-kernel, linux-pci


Need to use it in _e1000e_disable_aspm.

Found lock up:

[ 2374.654557] kworker/32:1    D ffff881027f6b0f0     0  6075      2 0x00000000
[ 2374.654816]  ffff88503f099a68 0000000000000046 ffff88503f098000 0000000000004000
[ 2374.654837]  00000000001d1ec0 ffff88503f099fd8 00000000001d1ec0 ffff88503f099fd8
[ 2374.654860]  0000000000004000 00000000001d1ec0 ffff88503dcc8000 ffff88503f090000
[ 2374.654880] Call Trace:
[ 2374.654898]  [<ffffffff810b1302>] ? __lock_acquired+0x3a/0x224
[ 2374.654914]  [<ffffffff81c2b59c>] ? _raw_spin_unlock_irq+0x30/0x36
[ 2374.654925]  [<ffffffff810b069d>] ? trace_hardirqs_on_caller+0x1f/0x178
[ 2374.654936]  [<ffffffff81c2ab24>] rwsem_down_failed_common+0xd3/0x103
[ 2374.654945]  [<ffffffff810b158f>] ? __lock_contended+0x3a/0x2a2
[ 2374.654955]  [<ffffffff81c2ab7b>] rwsem_down_read_failed+0x12/0x14
[ 2374.654967]  [<ffffffff813371e4>] call_rwsem_down_read_failed+0x14/0x30
[ 2374.654981]  [<ffffffff8135df20>] ? pci_disable_link_state+0x5f/0xf5
[ 2374.654990]  [<ffffffff81c2a0e6>] ? down_read+0x7e/0x91
[ 2374.654999]  [<ffffffff8135df20>] ? pci_disable_link_state+0x5f/0xf5
[ 2374.655008]  [<ffffffff8135df20>] pci_disable_link_state+0x5f/0xf5
[ 2374.655024]  [<ffffffff81661796>] e1000e_disable_aspm+0x55/0x5a
[ 2374.655037]  [<ffffffff816677eb>] e1000_io_slot_reset+0x59/0xea
[ 2374.655048]  [<ffffffff8135fe0d>] ? report_mmio_enabled+0x5d/0x5d
[ 2374.655057]  [<ffffffff8135fe3b>] report_slot_reset+0x2e/0x5d
[ 2374.655072]  [<ffffffff8135369e>] pci_walk_bus+0x8a/0xb7
[ 2374.655081]  [<ffffffff8135fe0d>] ? report_mmio_enabled+0x5d/0x5d
[ 2374.655091]  [<ffffffff813603be>] broadcast_error_message+0xa4/0xb2
[ 2374.655101]  [<ffffffff81352c71>] ? pci_bus_read_config_dword+0x72/0x80
[ 2374.655110]  [<ffffffff813606df>] do_recovery+0x9e/0xf9
[ 2374.655120]  [<ffffffff81360786>] handle_error_source+0x4c/0x51
[ 2374.655129]  [<ffffffff81360974>] aer_isr_one_error+0x1e9/0x21a
[ 2374.655138]  [<ffffffff81360a6c>] aer_isr+0xc7/0xcc
[ 2374.655147]  [<ffffffff813609a5>] ? aer_isr_one_error+0x21a/0x21a
[ 2374.655159]  [<ffffffff81096d9f>] process_one_work+0x237/0x3ec
[ 2374.655168]  [<ffffffff81096d10>] ? process_one_work+0x1a8/0x3ec
[ 2374.655178]  [<ffffffff8109728d>] worker_thread+0x17c/0x240
[ 2374.655186]  [<ffffffff810b0803>] ? trace_hardirqs_on+0xd/0xf
[ 2374.655196]  [<ffffffff81097111>] ? manage_workers+0xab/0xab
[ 2374.655209]  [<ffffffff8109c8ed>] kthread+0xa0/0xa8
[ 2374.655223]  [<ffffffff81c332d4>] kernel_thread_helper+0x4/0x10
[ 2374.655232]  [<ffffffff81c2b880>] ? retint_restore_args+0xe/0xe
[ 2374.655243]  [<ffffffff8109c84d>] ? __init_kthread_worker+0x5b/0x5b
[ 2374.655252]  [<ffffffff81c332d0>] ? gs_change+0xb/0xb

when aer happens,
pci_walk_bus already have down_read(&pci_bus_sem)...
then report_slot_reset
        ==> e1000_io_slot_reset
                ==> e1000e_disable_aspm
                        ==> pci_disable_link_state...

We can not use pci_disable_link_state, and it will try to hold pci_bus_sem again.

Try to have __pci_disable_link_state that will not need to hold pci_bus_sem.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>

---
 drivers/net/e1000e/netdev.c |    2 +-
 drivers/pci/pcie/aspm.c     |   16 +++++++++++++---
 include/linux/pci-aspm.h    |    1 +
 3 files changed, 15 insertions(+), 4 deletions(-)

Index: linux-2.6/drivers/net/e1000e/netdev.c
===================================================================
--- linux-2.6.orig/drivers/net/e1000e/netdev.c
+++ linux-2.6/drivers/net/e1000e/netdev.c
@@ -5360,7 +5360,7 @@ static void e1000_complete_shutdown(stru
 #ifdef CONFIG_PCIEASPM
 static void __e1000e_disable_aspm(struct pci_dev *pdev, u16 state)
 {
-	pci_disable_link_state(pdev, state);
+	__pci_disable_link_state(pdev, state);
 }
 #else
 static void __e1000e_disable_aspm(struct pci_dev *pdev, u16 state)
Index: linux-2.6/drivers/pci/pcie/aspm.c
===================================================================
--- linux-2.6.orig/drivers/pci/pcie/aspm.c
+++ linux-2.6/drivers/pci/pcie/aspm.c
@@ -734,7 +734,7 @@ void pcie_aspm_powersave_config_link(str
  * pci_disable_link_state - disable pci device's link state, so the link will
  * never enter specific states
  */
-void pci_disable_link_state(struct pci_dev *pdev, int state)
+static void ___pci_disable_link_state(struct pci_dev *pdev, int state, bool sem)
 {
 	struct pci_dev *parent = pdev->bus->self;
 	struct pcie_link_state *link;
@@ -747,7 +747,8 @@ void pci_disable_link_state(struct pci_d
 	if (!parent || !parent->link_state)
 		return;
 
-	down_read(&pci_bus_sem);
+	if (sem)
+		down_read(&pci_bus_sem);
 	mutex_lock(&aspm_lock);
 	link = parent->link_state;
 	if (state & PCIE_LINK_STATE_L0S)
@@ -761,7 +762,16 @@ void pci_disable_link_state(struct pci_d
 		pcie_set_clkpm(link, 0);
 	}
 	mutex_unlock(&aspm_lock);
-	up_read(&pci_bus_sem);
+	if (sem)
+		up_read(&pci_bus_sem);
+}
+void __pci_disable_link_state(struct pci_dev *pdev, int state)
+{
+	___pci_disable_link_state(pdev, state, false);
+}
+void pci_disable_link_state(struct pci_dev *pdev, int state)
+{
+	___pci_disable_link_state(pdev, state, true);
 }
 EXPORT_SYMBOL(pci_disable_link_state);
 
Index: linux-2.6/include/linux/pci-aspm.h
===================================================================
--- linux-2.6.orig/include/linux/pci-aspm.h
+++ linux-2.6/include/linux/pci-aspm.h
@@ -28,6 +28,7 @@ extern void pcie_aspm_exit_link_state(st
 extern void pcie_aspm_pm_state_change(struct pci_dev *pdev);
 extern void pcie_aspm_powersave_config_link(struct pci_dev *pdev);
 extern void pci_disable_link_state(struct pci_dev *pdev, int state);
+extern void __pci_disable_link_state(struct pci_dev *pdev, int state);
 extern void pcie_clear_aspm(void);
 extern void pcie_no_aspm(void);
 #else

------------------------------------------------------------------------------
WhatsUp Gold - Download Free Network Management Software
The most intuitive, comprehensive, and cost-effective network 
management toolset available today.  Delivers lowest initial 
acquisition cost and overall TCO of any competing solution.
http://p.sf.net/sfu/whatsupgold-sd
_______________________________________________
E1000-devel mailing list
E1000-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/e1000-devel
To learn more about Intel&#174; Ethernet, visit http://communities.intel.com/community/wired

^ permalink raw reply

* Re: [PATCH] NET: slip, fix ldisc->open retval
From: Alan Cox @ 2011-05-08 18:25 UTC (permalink / raw)
  To: Oliver Hartkopp; +Cc: matvejchikov, Jeff Dike, Linux Netdev List
In-Reply-To: <4DC6D4CF.2080006@hartkopp.net>

> Looks reasonable to me.

Ditto
> 
> Acked-by: Oliver Hartkopp <socketcan@hartkopp.net>

> Alan?

Acked-by: Alan Cox <alan@linux.intel.com>

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox