* [PATCH 0/5] net: sysctl: share ipv4/ipv6 sysctl tables
From: Lucian Adrian Grijincu @ 2011-02-04 4:37 UTC (permalink / raw)
To: linux-kernel, netdev, Eric W. Biederman, Eric Dumazet,
David S. Miller, Oct
Cc: Lucian Adrian Grijincu
Each network device gets the same 25/24 sysctl entries for ipv4/ipv6
in /proc/sys/net/ipv4/conf/DEVNAME and /proc/sys/net/ipv6/conf/DEVNAME
Unfortunately, space is wasted holding very much similar data.
Fortunately, with some tricks these entries can be shared between all
network devices.
The single entry in 'struct ctl_table' that was modified at runtime
for leaf ctl_table nodes and prevented sharing was 'parent'. This
field was first introduces for selinux and then was used to implement
sysctl_check_table. Selinux recently removed the need for this field:
* http://thread.gmane.org/gmane.linux.kernel.lsm/12623
* LKML-Reference: 1296519474-15714-1-git-send-email-lucian.grijincu@gmail.com
Remove the need for 'parent' in sysctl_check_table and remove the
'parent' field:
[PATCH 1/5] sysctl: faster reimplementation of sysctl_check_table
[PATCH 2/5] sysctl: remove useless ctl_table->parent field
Pave the way for sharing of ipv4/6 tables: allow data to be stored in
the nodes above the leafs that will be shared:
[PATCH 3/5] sysctl: write ctl_table->extra2 to entries created from ctl_path
Finally share the leaf sysctl tables for ipv4/ipv6:
[PATCH 4/5] ipv4: share sysctl net/ipv4/conf/DEVNAME/ tables
[PATCH 5/5] ipv6: share sysctl net/ipv6/conf/DEVNAME/ tables
fs/proc/proc_sysctl.c | 16 +++-
include/linux/inetdevice.h | 12 +++-
include/linux/ipv6.h | 15 +++-
include/linux/sysctl.h | 3 +-
include/net/net_namespace.h | 2 +
kernel/sysctl.c | 18 +---
kernel/sysctl_check.c | 125 +++++++++++++--------------
net/ipv4/devinet.c | 203 ++++++++++++++++++++++++++++--------------
net/ipv6/addrconf.c | 192 +++++++++++++++++++++++++++-------------
net/sysctl_net.c | 20 +++--
10 files changed, 387 insertions(+), 219 deletions(-)
--
1.7.4.rc1.7.g2cf08.dirty
^ permalink raw reply
* [PATCH 1/5] sysctl: faster reimplementation of sysctl_check_table
From: Lucian Adrian Grijincu @ 2011-02-04 4:37 UTC (permalink / raw)
To: linux-kernel, netdev, Eric W. Biederman, Eric Dumazet,
David S. Miller, Oct
Cc: Lucian Adrian Grijincu
In-Reply-To: <cover.1296793770.git.lucian.grijincu@gmail.com>
Determining the parent of a node at depth d
- previous implementation: O(d)
- current implementation: O(1)
Printing the path to a node at depth d
- previous implementation: O(d^2)
- current implementation: O(d)
This comes to a cost: we use an array ('parents') holding as many
pointers as there can be sysctl levels (currently CTL_MAXNAME=10).
The 'parents' array of pointers holds the same values as the
ctl_table->parents field because the function that updates ->parents
(sysctl_set_parent) is called with either NULL (for root nodes) or
with sysctl_set_parent(table, table->child).
Signed-off-by: Lucian Adrian Grijincu <lucian.grijincu@gmail.com>
---
kernel/sysctl_check.c | 121 ++++++++++++++++++++++++-------------------------
1 files changed, 60 insertions(+), 61 deletions(-)
diff --git a/kernel/sysctl_check.c b/kernel/sysctl_check.c
index 10b90d8..9b4fecd 100644
--- a/kernel/sysctl_check.c
+++ b/kernel/sysctl_check.c
@@ -6,58 +6,34 @@
#include <net/ip_vs.h>
-static int sysctl_depth(struct ctl_table *table)
-{
- struct ctl_table *tmp;
- int depth;
-
- depth = 0;
- for (tmp = table; tmp->parent; tmp = tmp->parent)
- depth++;
-
- return depth;
-}
-
-static struct ctl_table *sysctl_parent(struct ctl_table *table, int n)
+static void sysctl_print_path(struct ctl_table *table,
+ struct ctl_table **parents, int depth)
{
+ struct ctl_table *p;
int i;
-
- for (i = 0; table && i < n; i++)
- table = table->parent;
-
- return table;
-}
-
-
-static void sysctl_print_path(struct ctl_table *table)
-{
- struct ctl_table *tmp;
- int depth, i;
- depth = sysctl_depth(table);
if (table->procname) {
- for (i = depth; i >= 0; i--) {
- tmp = sysctl_parent(table, i);
- printk("/%s", tmp->procname?tmp->procname:"");
+ for (i = 0; i < depth; i++) {
+ p = parents[i];
+ printk("/%s", p->procname ? p->procname : "");
}
+ printk("/%s", table->procname);
}
printk(" ");
}
static struct ctl_table *sysctl_check_lookup(struct nsproxy *namespaces,
- struct ctl_table *table)
+ struct ctl_table *table, struct ctl_table **parents, int depth)
{
struct ctl_table_header *head;
struct ctl_table *ref, *test;
- int depth, cur_depth;
-
- depth = sysctl_depth(table);
+ int cur_depth;
for (head = __sysctl_head_next(namespaces, NULL); head;
head = __sysctl_head_next(namespaces, head)) {
cur_depth = depth;
ref = head->ctl_table;
repeat:
- test = sysctl_parent(table, cur_depth);
+ test = parents[depth - cur_depth];
for (; ref->procname; ref++) {
int match = 0;
if (cur_depth && !ref->child)
@@ -83,11 +59,12 @@ out:
return ref;
}
-static void set_fail(const char **fail, struct ctl_table *table, const char *str)
+static void set_fail(const char **fail, struct ctl_table *table,
+ const char *str, struct ctl_table **parents, int depth)
{
if (*fail) {
printk(KERN_ERR "sysctl table check failed: ");
- sysctl_print_path(table);
+ sysctl_print_path(table, parents, depth);
printk(" %s\n", *fail);
dump_stack();
}
@@ -95,16 +72,24 @@ static void set_fail(const char **fail, struct ctl_table *table, const char *str
}
static void sysctl_check_leaf(struct nsproxy *namespaces,
- struct ctl_table *table, const char **fail)
+ struct ctl_table *table, const char **fail,
+ struct ctl_table **parents, int depth)
{
struct ctl_table *ref;
- ref = sysctl_check_lookup(namespaces, table);
- if (ref && (ref != table))
- set_fail(fail, table, "Sysctl already exists");
+ ref = sysctl_check_lookup(namespaces, table, parents, depth);
+ if (ref && (ref != table)) {
+ printk(KERN_ALERT "sysctl_check_leaf ref[%s], table[%s]\n", ref->procname, table->procname);
+ set_fail(fail, table, "Sysctl already exists", parents, depth);
+ }
}
-int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table)
+
+
+#define SET_FAIL(str) set_fail(&fail, table, str, parents, depth)
+
+static int __sysctl_check_table(struct nsproxy *namespaces,
+ struct ctl_table *table, struct ctl_table **parents, int depth)
{
int error = 0;
for (; table->procname; table++) {
@@ -112,23 +97,23 @@ int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table)
if (table->parent) {
if (table->procname && !table->parent->procname)
- set_fail(&fail, table, "Parent without procname");
+ SET_FAIL("Parent without procname");
}
if (!table->procname)
- set_fail(&fail, table, "No procname");
+ SET_FAIL("No procname");
if (table->child) {
if (table->data)
- set_fail(&fail, table, "Directory with data?");
+ SET_FAIL("Directory with data?");
if (table->maxlen)
- set_fail(&fail, table, "Directory with maxlen?");
+ SET_FAIL("Directory with maxlen?");
if ((table->mode & (S_IRUGO|S_IXUGO)) != table->mode)
- set_fail(&fail, table, "Writable sysctl directory");
+ SET_FAIL("Writable sysctl directory");
if (table->proc_handler)
- set_fail(&fail, table, "Directory with proc_handler");
+ SET_FAIL("Directory with proc_handler");
if (table->extra1)
- set_fail(&fail, table, "Directory with extra1");
+ SET_FAIL("Directory with extra1");
if (table->extra2)
- set_fail(&fail, table, "Directory with extra2");
+ SET_FAIL("Directory with extra2");
} else {
if ((table->proc_handler == proc_dostring) ||
(table->proc_handler == proc_dointvec) ||
@@ -139,28 +124,42 @@ int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table)
(table->proc_handler == proc_doulongvec_minmax) ||
(table->proc_handler == proc_doulongvec_ms_jiffies_minmax)) {
if (!table->data)
- set_fail(&fail, table, "No data");
+ SET_FAIL("No data");
if (!table->maxlen)
- set_fail(&fail, table, "No maxlen");
+ SET_FAIL("No maxlen");
}
#ifdef CONFIG_PROC_SYSCTL
if (table->procname && !table->proc_handler)
- set_fail(&fail, table, "No proc_handler");
-#endif
-#if 0
- if (!table->procname && table->proc_handler)
- set_fail(&fail, table, "proc_handler without procname");
+ SET_FAIL("No proc_handler");
#endif
- sysctl_check_leaf(namespaces, table, &fail);
+ parents[depth] = table;
+ sysctl_check_leaf(namespaces, table, &fail,
+ parents, depth);
}
if (table->mode > 0777)
- set_fail(&fail, table, "bogus .mode");
+ SET_FAIL("bogus .mode");
if (fail) {
- set_fail(&fail, table, NULL);
+ SET_FAIL(NULL);
error = -EINVAL;
}
- if (table->child)
- error |= sysctl_check_table(namespaces, table->child);
+ if (table->child) {
+ parents[depth] = table;
+ error |= __sysctl_check_table(namespaces, table->child,
+ parents, depth + 1);
+ }
}
return error;
}
+
+
+int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table)
+{
+ struct ctl_table *parents[CTL_MAXNAME];
+ /* Keep track of parents as we go down into the tree.
+ *
+ * parents[i-1] will be the parent for parents[i].
+ * The node at depth 'd' will have the parent at parents[d-1].
+ * The root node (depth=0) has no parent in this array.
+ */
+ return __sysctl_check_table(namespaces, table, parents, 0);
+}
--
1.7.4.rc1.7.g2cf08.dirty
^ permalink raw reply related
* [PATCH 2/5] sysctl: remove useless ctl_table->parent field
From: Lucian Adrian Grijincu @ 2011-02-04 4:37 UTC (permalink / raw)
To: linux-kernel, netdev, Eric W. Biederman, Eric Dumazet,
David S. Miller, Oct
Cc: Lucian Adrian Grijincu
In-Reply-To: <cover.1296793770.git.lucian.grijincu@gmail.com>
The 'parent' field was added for selinux in:
commit d912b0cc1a617d7c590d57b7ea971d50c7f02503
[PATCH] sysctl: add a parent entry to ctl_table and set the parent entry
and then was used for sysctl_check_table.
Both of the users have found other implementations.
CC: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Lucian Adrian Grijincu <lucian.grijincu@gmail.com>
---
include/linux/sysctl.h | 1 -
kernel/sysctl.c | 11 -----------
kernel/sysctl_check.c | 4 ++--
3 files changed, 2 insertions(+), 14 deletions(-)
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 7bb5cb6..1f1da4b 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -1018,7 +1018,6 @@ struct ctl_table
int maxlen;
mode_t mode;
struct ctl_table *child;
- struct ctl_table *parent; /* Automatically set */
proc_handler *proc_handler; /* Callback for text formatting */
void *extra1;
void *extra2;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 56f6fc1..42025ec 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1695,18 +1695,8 @@ int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int op)
return test_perm(mode, op);
}
-static void sysctl_set_parent(struct ctl_table *parent, struct ctl_table *table)
-{
- for (; table->procname; table++) {
- table->parent = parent;
- if (table->child)
- sysctl_set_parent(table, table->child);
- }
-}
-
static __init int sysctl_init(void)
{
- sysctl_set_parent(NULL, root_table);
#ifdef CONFIG_SYSCTL_SYSCALL_CHECK
sysctl_check_table(current->nsproxy, root_table);
#endif
@@ -1864,7 +1854,6 @@ struct ctl_table_header *__register_sysctl_paths(
header->used = 0;
header->unregistering = NULL;
header->root = root;
- sysctl_set_parent(NULL, header->ctl_table);
header->count = 1;
#ifdef CONFIG_SYSCTL_SYSCALL_CHECK
if (sysctl_check_table(namespaces, header->ctl_table)) {
diff --git a/kernel/sysctl_check.c b/kernel/sysctl_check.c
index 9b4fecd..b7d9c66 100644
--- a/kernel/sysctl_check.c
+++ b/kernel/sysctl_check.c
@@ -95,8 +95,8 @@ static int __sysctl_check_table(struct nsproxy *namespaces,
for (; table->procname; table++) {
const char *fail = NULL;
- if (table->parent) {
- if (table->procname && !table->parent->procname)
+ if (depth != 0) { /* has parent */
+ if (table->procname && !parents[depth - 1]->procname)
SET_FAIL("Parent without procname");
}
if (!table->procname)
--
1.7.4.rc1.7.g2cf08.dirty
^ permalink raw reply related
* [PATCH 3/5] sysctl: write ctl_table->extra2 to entries created from ctl_path
From: Lucian Adrian Grijincu @ 2011-02-04 4:37 UTC (permalink / raw)
To: linux-kernel, netdev, Eric W. Biederman, Eric Dumazet,
David S. Miller, Oct
Cc: Lucian Adrian Grijincu
In-Reply-To: <cover.1296793770.git.lucian.grijincu@gmail.com>
For each entry in an array of 'struct ctl_path' we were registering a
'struct ctl_table' array with two entries:
- one to store the name + permissions,
- one as an end-of-array marker (completely blank).
We were not using any of the data storage fields
(data, extra1, extra2) in the first 'struct ctl_table'.
This patch adds possibility of storring some user provided
pointer in the 'extra2' field.
All users the next functions store NULL in the 'extra2'
field like they used to before this patch:
* register_sysctl_paths
* register_net_sysctl_table
* register_net_sysctl_rotable
Until now sysctl_check_table considered that the 'struct ctl_table' of
directories may not store anything in the 'extra2' field. We no longer
consider this a fault.
Signed-off-by: Lucian Adrian Grijincu <lucian.grijincu@gmail.com>
---
include/linux/sysctl.h | 2 +-
include/net/net_namespace.h | 2 ++
kernel/sysctl.c | 7 +++++--
kernel/sysctl_check.c | 2 --
net/sysctl_net.c | 20 ++++++++++++++------
5 files changed, 22 insertions(+), 11 deletions(-)
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 1f1da4b..090b9a3 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -1057,7 +1057,7 @@ struct ctl_path {
void register_sysctl_root(struct ctl_table_root *root);
struct ctl_table_header *__register_sysctl_paths(
struct ctl_table_root *root, struct nsproxy *namespaces,
- const struct ctl_path *path, struct ctl_table *table);
+ const struct ctl_path *path, struct ctl_table *table, void *pathdata);
struct ctl_table_header *register_sysctl_table(struct ctl_table * table);
struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
struct ctl_table *table);
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 1bf812b..42d4d61 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -272,6 +272,8 @@ struct ctl_table_header;
extern struct ctl_table_header *register_net_sysctl_table(struct net *net,
const struct ctl_path *path, struct ctl_table *table);
+struct ctl_table_header *register_net_sysctl_table_pathdata(struct net *net,
+ const struct ctl_path *path, struct ctl_table *table, void *pathdata);
extern struct ctl_table_header *register_net_sysctl_rotable(
const struct ctl_path *path, struct ctl_table *table);
extern void unregister_net_sysctl_table(struct ctl_table_header *header);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 42025ec..9b67c9e 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1759,6 +1759,8 @@ static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q)
* @namespaces: Data to compute which lists of sysctl entries are visible
* @path: The path to the directory the sysctl table is in.
* @table: the top-level table structure
+ * @pathdata: user provided pointer to data that will be stored in ->extra2
+ * for every ctl_table node allocated for entries in @path
*
* Register a sysctl table hierarchy. @table should be a filled in ctl_table
* array. A completely 0 filled entry terminates the table.
@@ -1809,7 +1811,7 @@ static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q)
struct ctl_table_header *__register_sysctl_paths(
struct ctl_table_root *root,
struct nsproxy *namespaces,
- const struct ctl_path *path, struct ctl_table *table)
+ const struct ctl_path *path, struct ctl_table *table, void *pathdata)
{
struct ctl_table_header *header;
struct ctl_table *new, **prevp;
@@ -1841,6 +1843,7 @@ struct ctl_table_header *__register_sysctl_paths(
/* Copy the procname */
new->procname = path->procname;
new->mode = 0555;
+ new->extra2 = pathdata;
*prevp = new;
prevp = &new->child;
@@ -1895,7 +1898,7 @@ struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
struct ctl_table *table)
{
return __register_sysctl_paths(&sysctl_table_root, current->nsproxy,
- path, table);
+ path, table, NULL);
}
/**
diff --git a/kernel/sysctl_check.c b/kernel/sysctl_check.c
index b7d9c66..e09f47f 100644
--- a/kernel/sysctl_check.c
+++ b/kernel/sysctl_check.c
@@ -112,8 +112,6 @@ static int __sysctl_check_table(struct nsproxy *namespaces,
SET_FAIL("Directory with proc_handler");
if (table->extra1)
SET_FAIL("Directory with extra1");
- if (table->extra2)
- SET_FAIL("Directory with extra2");
} else {
if ((table->proc_handler == proc_dostring) ||
(table->proc_handler == proc_dointvec) ||
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index ca84212..9c92cac 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -103,22 +103,30 @@ out:
}
subsys_initcall(sysctl_init);
-struct ctl_table_header *register_net_sysctl_table(struct net *net,
- const struct ctl_path *path, struct ctl_table *table)
+struct ctl_table_header *register_net_sysctl_table_pathdata(struct net *net,
+ const struct ctl_path *path, struct ctl_table *table, void *pathdata)
{
struct nsproxy namespaces;
namespaces = *current->nsproxy;
namespaces.net_ns = net;
- return __register_sysctl_paths(&net_sysctl_root,
- &namespaces, path, table);
+ return __register_sysctl_paths(&net_sysctl_root, &namespaces,
+ path, table, pathdata);
+}
+EXPORT_SYMBOL_GPL(register_net_sysctl_table_pathdata);
+
+struct ctl_table_header *register_net_sysctl_table(struct net *net,
+ const struct ctl_path *path, struct ctl_table *table)
+{
+ return register_net_sysctl_table_pathdata(net, path, table, NULL);
}
EXPORT_SYMBOL_GPL(register_net_sysctl_table);
+
struct ctl_table_header *register_net_sysctl_rotable(const
struct ctl_path *path, struct ctl_table *table)
{
- return __register_sysctl_paths(&net_sysctl_ro_root,
- &init_nsproxy, path, table);
+ return __register_sysctl_paths(&net_sysctl_ro_root, &init_nsproxy,
+ path, table, NULL);
}
EXPORT_SYMBOL_GPL(register_net_sysctl_rotable);
--
1.7.4.rc1.7.g2cf08.dirty
^ permalink raw reply related
* [PATCH 4/5] ipv4: share sysctl net/ipv4/conf/DEVNAME/ tables
From: Lucian Adrian Grijincu @ 2011-02-04 4:37 UTC (permalink / raw)
To: linux-kernel, netdev, Eric W. Biederman, Eric Dumazet,
David S. Miller, Oct
Cc: Lucian Adrian Grijincu
In-Reply-To: <cover.1296793770.git.lucian.grijincu@gmail.com>
Before this, for each network device DEVNAME that supports ipv4 a new
sysctl table was registered in $PROC/sys/net/ipv4/conf/DEVNAME/.
The sysctl table was identical for all network devices, except for:
* data: pointer to the data to be accessed in the sysctl
* extra1: the 'struct ipv4_devconf*' of the network device
* extra2: the 'struct net*' of the network namespace
Assuming we have a device name and a 'struct net*', we can get the
'struct net_device*'. From there we can compute:
* data: each entry corresponds to a position in 'struct ipv4_devconf*'
* extra1: 'struct ipv4_devconf*' can be reached from 'struct net_device*'
* extra2: the 'struct net*' that we assumed to have
The device name is determined from the path to the file (the name of
the parent dentry).
The 'struct net*' is stored in the parent 'struct ctl_table*' path by
register_net_sysctl_table_pathdata().
Signed-off-by: Lucian Adrian Grijincu <lucian.grijincu@gmail.com>
---
fs/proc/proc_sysctl.c | 16 +++-
include/linux/inetdevice.h | 12 +++-
net/ipv4/devinet.c | 203 +++++++++++++++++++++++++++++---------------
3 files changed, 161 insertions(+), 70 deletions(-)
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index fb707e0..fe392f1 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -128,6 +128,11 @@ out:
return err;
}
+
+typedef int proc_handler_extended(struct ctl_table *ctl, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos,
+ struct file *filp);
+
static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
size_t count, loff_t *ppos, int write)
{
@@ -136,6 +141,7 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
struct ctl_table *table = PROC_I(inode)->sysctl_entry;
ssize_t error;
size_t res;
+ proc_handler_extended *phx = (proc_handler_extended *) table->proc_handler;
if (IS_ERR(head))
return PTR_ERR(head);
@@ -155,7 +161,15 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
/* careful: calling conventions are nasty here */
res = count;
- error = table->proc_handler(table, write, buf, &res, ppos);
+ /* Most handlers only use the first 5 arguments (without @filp).
+ * Changing all is too much of work, as, at the time of writting only
+ * the devinet.c proc_handlers know about and use the @filp.
+ *
+ * This is just a HACK for now, I did this this way to not
+ * waste time changing all the handlers, in the final version
+ * I'll change all the handlers if there's not other solution.
+ */
+ error = phx(table, write, buf, &res, ppos, filp);
if (!error)
error = res;
out:
diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index ae8fdc5..caf06b3 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -43,8 +43,18 @@ enum
#define IPV4_DEVCONF_MAX (__IPV4_DEVCONF_MAX - 1)
+
+struct devinet_sysctl {
+ /* dev_name holds a copy of dev_name, because '.procname' is
+ * regarded as const by sysctl and we wouldn't want anyone to
+ * change it under our feet (see SIOCSIFNAME). */
+ char *dev_name;
+ struct ctl_table_header *sysctl_header;
+};
+
+
struct ipv4_devconf {
- void *sysctl;
+ struct devinet_sysctl devinet_sysctl;
int data[IPV4_DEVCONF_MAX];
DECLARE_BITMAP(state, IPV4_DEVCONF_MAX);
};
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 748cb5b..774d347 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -147,7 +147,7 @@ void in_dev_finish_destroy(struct in_device *idev)
}
EXPORT_SYMBOL(in_dev_finish_destroy);
-static struct in_device *inetdev_init(struct net_device *dev)
+struct in_device *inetdev_init(struct net_device *dev)
{
struct in_device *in_dev;
@@ -158,7 +158,8 @@ static struct in_device *inetdev_init(struct net_device *dev)
goto out;
memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
sizeof(in_dev->cnf));
- in_dev->cnf.sysctl = NULL;
+ in_dev->cnf.devinet_sysctl.dev_name = NULL;
+ in_dev->cnf.devinet_sysctl.sysctl_header = NULL;
in_dev->dev = dev;
in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
if (!in_dev->arp_parms)
@@ -1375,6 +1376,67 @@ static void inet_forward_change(struct net *net)
}
}
+
+
+static int devinet_conf_handler(ctl_table *ctl, int write,
+ void __user *buffer,
+ size_t *lenp, loff_t *ppos,
+ struct file *filp,
+ proc_handler *proc_handler)
+{
+ /* The path to this file is of the form:
+ * $PROC_MOUNT/sys/net/ipv4/conf/$DEVNAME/$CTL
+ *
+ * The array of 'struct ctl_table' of devinet entries is
+ * shared between all ipv4 network devices and the 'data'
+ * field of each structure only hold the offset into the
+ * 'data' field of 'struct ipv4_devconf'.
+ *
+ * To find the propper location of the data that must be
+ * accessed by this handler we need the device name and the
+ * network namespace in which it belongs.
+ */
+
+ /* We store the network namespace in the parent table's ->extra2 */
+ struct inode *parent_inode = filp->f_path.dentry->d_parent->d_inode;
+ struct ctl_table *parent_table = PROC_I(parent_inode)->sysctl_entry;
+ struct net *net = parent_table->extra2;
+
+ const char *dev_name = filp->f_path.dentry->d_parent->d_name.name;
+ struct ctl_table tmp_ctl;
+ struct net_device *dev = NULL;
+ struct in_device *in_dev = NULL;
+ struct ipv4_devconf *cnf;
+ int ret;
+
+ if (strcmp(dev_name, "all") == 0) {
+ cnf = net->ipv4.devconf_all;
+ } else if (strcmp(dev_name, "default") == 0) {
+ cnf = net->ipv4.devconf_dflt;
+ } else {
+ /* the device could have been renamed (SIOCSIFADDR) or
+ * deleted since we started accessing it's proc sysctl */
+ dev = dev_get_by_name(net, dev_name);
+ if (dev == NULL)
+ return -ENOENT;
+ in_dev = in_dev_get(dev);
+ cnf = &in_dev->cnf;
+ }
+
+ tmp_ctl = *ctl;
+ tmp_ctl.data += (char *)cnf - (char *)&ipv4_devconf;
+ tmp_ctl.extra1 = cnf;
+ tmp_ctl.extra2 = net;
+
+ ret = proc_handler(&tmp_ctl, write, buffer, lenp, ppos);
+
+ if (in_dev)
+ in_dev_put(in_dev);
+ if (dev)
+ dev_put(dev);
+ return ret;
+}
+
static int devinet_conf_proc(ctl_table *ctl, int write,
void __user *buffer,
size_t *lenp, loff_t *ppos)
@@ -1445,6 +1507,33 @@ static int ipv4_doint_and_flush(ctl_table *ctl, int write,
return ret;
}
+static int devinet_conf_proc__(ctl_table *ctl, int write,
+ void __user *buffer,
+ size_t *lenp, loff_t *ppos,
+ struct file *filp)
+{
+ return devinet_conf_handler(ctl, write, buffer, lenp, ppos, filp,
+ devinet_conf_proc);
+}
+
+static int devinet_sysctl_forward__(ctl_table *ctl, int write,
+ void __user *buffer,
+ size_t *lenp, loff_t *ppos,
+ struct file *filp)
+{
+ return devinet_conf_handler(ctl, write, buffer, lenp, ppos, filp,
+ devinet_sysctl_forward);
+}
+
+static int ipv4_doint_and_flush__(ctl_table *ctl, int write,
+ void __user *buffer,
+ size_t *lenp, loff_t *ppos,
+ struct file *filp)
+{
+ return devinet_conf_handler(ctl, write, buffer, lenp, ppos, filp,
+ ipv4_doint_and_flush);
+}
+
#define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
{ \
.procname = name, \
@@ -1452,67 +1541,60 @@ static int ipv4_doint_and_flush(ctl_table *ctl, int write,
IPV4_DEVCONF_ ## attr - 1, \
.maxlen = sizeof(int), \
.mode = mval, \
- .proc_handler = proc, \
- .extra1 = &ipv4_devconf, \
+ .proc_handler = (proc_handler *) proc, \
}
#define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
- DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
+ DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc__)
#define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
- DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
+ DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc__)
#define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
#define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
- DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
-
-static struct devinet_sysctl_table {
- struct ctl_table_header *sysctl_header;
- struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
- char *dev_name;
-} devinet_sysctl = {
- .devinet_vars = {
- DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
- devinet_sysctl_forward),
- DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
-
- DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
- DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
- DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
- DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
- DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
- DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
- "accept_source_route"),
- DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
- DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
- DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
- DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
- DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
- DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
- DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
- DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
- DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
- DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
- DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
- DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
- DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
-
- DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
- DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
- DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
- "force_igmp_version"),
- DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
- "promote_secondaries"),
- },
+ DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush__)
+
+const struct ctl_table ipv4_devinet_sysctl_table[__IPV4_DEVCONF_MAX] = {
+ DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
+ devinet_sysctl_forward__),
+ DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
+
+ DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
+ DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
+ DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
+ DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
+ DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
+ DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
+ "accept_source_route"),
+ DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
+ DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
+ DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
+ DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
+ DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
+ DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
+ DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
+ DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
+ DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
+ DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
+ DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
+ DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
+ DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
+
+ DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
+ DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
+ DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
+ "force_igmp_version"),
+ DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
+ "promote_secondaries"),
+ { }
};
static int __devinet_sysctl_register(struct net *net, char *dev_name,
- struct ipv4_devconf *p)
+ struct ipv4_devconf *cnf)
{
- int i;
- struct devinet_sysctl_table *t;
+ struct devinet_sysctl *t = &cnf->devinet_sysctl;
#define DEVINET_CTL_PATH_DEV 3
@@ -1524,16 +1606,6 @@ static int __devinet_sysctl_register(struct net *net, char *dev_name,
{ },
};
- t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
- if (!t)
- goto out;
-
- for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
- t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
- t->devinet_vars[i].extra1 = p;
- t->devinet_vars[i].extra2 = net;
- }
-
/*
* Make a copy of dev_name, because '.procname' is regarded as const
* by sysctl and we wouldn't want anyone to change it under our feet
@@ -1541,37 +1613,32 @@ static int __devinet_sysctl_register(struct net *net, char *dev_name,
*/
t->dev_name = kstrdup(dev_name, GFP_KERNEL);
if (!t->dev_name)
- goto free;
+ goto out;
devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
- t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
- t->devinet_vars);
+ t->sysctl_header = register_net_sysctl_table_pathdata(net,
+ devinet_ctl_path, ipv4_devinet_sysctl_table, net);
if (!t->sysctl_header)
goto free_procname;
- p->sysctl = t;
return 0;
free_procname:
kfree(t->dev_name);
-free:
- kfree(t);
out:
return -ENOBUFS;
}
static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
{
- struct devinet_sysctl_table *t = cnf->sysctl;
+ struct devinet_sysctl *t = &cnf->devinet_sysctl;
if (t == NULL)
return;
- cnf->sysctl = NULL;
unregister_sysctl_table(t->sysctl_header);
kfree(t->dev_name);
- kfree(t);
}
static void devinet_sysctl_register(struct in_device *idev)
--
1.7.4.rc1.7.g2cf08.dirty
^ permalink raw reply related
* [PATCH 5/5] ipv6: share sysctl net/ipv6/conf/DEVNAME/ tables
From: Lucian Adrian Grijincu @ 2011-02-04 4:37 UTC (permalink / raw)
To: linux-kernel, netdev, Eric W. Biederman, Eric Dumazet,
David S. Miller, Oct
Cc: Lucian Adrian Grijincu
In-Reply-To: <cover.1296793770.git.lucian.grijincu@gmail.com>
Similar to the ipv4 patch:
Before this, for each network device DEVNAME that supports ipv4 a new
sysctl table was registered in $PROC/sys/net/ipv6/conf/DEVNAME/.
The sysctl table was identical for all network devices, except for:
* data: pointer to the data to be accessed in the sysctl
* extra1: the 'struct inet6_dev*' of the network device
* extra2: the 'struct net*' of the network namespace
Assuming we have a device name and a 'struct net*', we can get the
'struct net_device*'. From there we can compute:
* data: each entry corresponds to a position in 'struct ipv6_devconf*'
* extra1: 'struct inet6_dev*' can be reached from 'struct net_device*'
* extra2: the 'struct net*' that we assume we have
The device name is determined from the path to the file (the name of
the parent dentry).
The 'struct net*' is stored in the parent 'struct ctl_table*' path by
register_net_sysctl_table_pathdata().
Signed-off-by: Lucian Adrian Grijincu <lucian.grijincu@gmail.com>
---
include/linux/ipv6.h | 15 ++++-
net/ipv6/addrconf.c | 192 +++++++++++++++++++++++++++++++++----------------
2 files changed, 143 insertions(+), 64 deletions(-)
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 0c99776..623761d 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -129,6 +129,17 @@ struct ipv6hdr {
};
#ifdef __KERNEL__
+
+#ifdef CONFIG_SYSCTL
+struct addrconf_sysctl {
+ /* dev_name holds a copy of dev_name, because '.procname' is
+ * regarded as const by sysctl and we wouldn't want anyone to
+ * change it under our feet (see SIOCSIFNAME). */
+ char *dev_name;
+ struct ctl_table_header *sysctl_header;
+};
+#endif
+
/*
* This structure contains configuration options per IPv6 link.
*/
@@ -172,7 +183,9 @@ struct ipv6_devconf {
__s32 disable_ipv6;
__s32 accept_dad;
__s32 force_tllao;
- void *sysctl;
+#ifdef CONFIG_SYSCTL
+ struct addrconf_sysctl addrconf_sysctl;
+#endif
};
struct ipv6_params {
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index fd6782e..27fd8a1 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -364,7 +364,8 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
memcpy(&ndev->cnf, dev_net(dev)->ipv6.devconf_dflt, sizeof(ndev->cnf));
ndev->cnf.mtu6 = dev->mtu;
- ndev->cnf.sysctl = NULL;
+ ndev->cnf.addrconf_sysctl.dev_name = NULL;
+ ndev->cnf.addrconf_sysctl.sysctl_header = NULL;
ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl);
if (ndev->nd_parms == NULL) {
kfree(ndev);
@@ -4249,90 +4250,176 @@ int addrconf_sysctl_disable(ctl_table *ctl, int write,
return ret;
}
-static struct addrconf_sysctl_table
+static int addrconf_handler(ctl_table *ctl, int write,
+ void __user *buffer,
+ size_t *lenp, loff_t *ppos,
+ struct file *filp,
+ proc_handler *proc_handler)
{
- struct ctl_table_header *sysctl_header;
- ctl_table addrconf_vars[DEVCONF_MAX+1];
- char *dev_name;
-} addrconf_sysctl __read_mostly = {
- .sysctl_header = NULL,
- .addrconf_vars = {
+ /* The path to this file is of the form:
+ * $PROC_MOUNT/sys/net/ipv6/conf/$DEVNAME/$CTL
+ *
+ * The array of 'struct ctl_table' of devinet entries is
+ * shared between all ipv6 network devices and the 'data'
+ * field of each structure only hold the offset into the
+ * 'data' field of 'struct ipv6_devconf'.
+ *
+ * To find the propper location of the data that must be
+ * accessed by this handler we need the device name and the
+ * network namespace in which it belongs.
+ */
+
+ /* We store the network namespace in the parent table's ->extra2 */
+ struct inode *parent_inode = filp->f_path.dentry->d_parent->d_inode;
+ struct ctl_table *parent_table = PROC_I(parent_inode)->sysctl_entry;
+ struct net *net = parent_table->extra2;
+
+ const char *dev_name = filp->f_path.dentry->d_parent->d_name.name;
+ struct ctl_table tmp_ctl;
+ struct net_device *dev = NULL;
+ struct inet6_dev *in6_dev = NULL;
+ struct ipv6_devconf *cnf;
+ int ret;
+
+ if (strcmp(dev_name, "all") == 0) {
+ cnf = net->ipv6.devconf_all;
+ } else if (strcmp(dev_name, "default") == 0) {
+ cnf = net->ipv6.devconf_dflt;
+ } else {
+ /* the device could have been renamed (SIOCSIFADDR) or
+ * deleted since we started accessing it's proc sysctl */
+ dev = dev_get_by_name(net, dev_name);
+ if (dev == NULL)
+ return -ENOENT;
+ in6_dev = in6_dev_get(dev);
+ cnf = &in6_dev->cnf;
+ }
+
+ tmp_ctl = *ctl;
+ tmp_ctl.data += (char *)cnf - (char *)&ipv6_devconf;
+ tmp_ctl.extra1 = in6_dev;
+ tmp_ctl.extra2 = net;
+
+ ret = proc_handler(&tmp_ctl, write, buffer, lenp, ppos);
+
+ if (in6_dev)
+ in6_dev_put(in6_dev);
+ if (dev)
+ dev_put(dev);
+ return ret;
+}
+
+
+static int addrconf_proc_dointvec(ctl_table *ctl, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos, struct file *filp)
+{
+ return addrconf_handler(ctl, write, buffer, lenp, ppos, filp,
+ proc_dointvec);
+}
+
+static int addrconf_proc_dointvec_jiffies(ctl_table *ctl, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos, struct file *filp)
+{
+ return addrconf_handler(ctl, write, buffer, lenp, ppos, filp,
+ proc_dointvec_jiffies);
+}
+
+static int addrconf_sysctl_forward__(ctl_table *ctl, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos, struct file *filp)
+{
+ return addrconf_handler(ctl, write, buffer, lenp, ppos, filp,
+ addrconf_sysctl_forward);
+}
+
+
+static int addrconf_sysctl_disable__(ctl_table *ctl, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos, struct file *filp)
+{
+ return addrconf_handler(ctl, write, buffer, lenp, ppos, filp,
+ addrconf_sysctl_disable);
+}
+
+static const struct ctl_table ipv6_addrconf_sysctl_table[DEVCONF_MAX+1] = {
{
.procname = "forwarding",
.data = &ipv6_devconf.forwarding,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = addrconf_sysctl_forward,
+ .proc_handler = addrconf_sysctl_forward__,
},
{
.procname = "hop_limit",
.data = &ipv6_devconf.hop_limit,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = addrconf_proc_dointvec,
},
{
.procname = "mtu",
.data = &ipv6_devconf.mtu6,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = addrconf_proc_dointvec,
},
{
.procname = "accept_ra",
.data = &ipv6_devconf.accept_ra,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = addrconf_proc_dointvec,
},
{
.procname = "accept_redirects",
.data = &ipv6_devconf.accept_redirects,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = addrconf_proc_dointvec,
},
{
.procname = "autoconf",
.data = &ipv6_devconf.autoconf,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = addrconf_proc_dointvec,
},
{
.procname = "dad_transmits",
.data = &ipv6_devconf.dad_transmits,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = addrconf_proc_dointvec,
},
{
.procname = "router_solicitations",
.data = &ipv6_devconf.rtr_solicits,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = addrconf_proc_dointvec,
},
{
.procname = "router_solicitation_interval",
.data = &ipv6_devconf.rtr_solicit_interval,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
+ .proc_handler = addrconf_proc_dointvec_jiffies,
},
{
.procname = "router_solicitation_delay",
.data = &ipv6_devconf.rtr_solicit_delay,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
+ .proc_handler = addrconf_proc_dointvec_jiffies,
},
{
.procname = "force_mld_version",
.data = &ipv6_devconf.force_mld_version,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = addrconf_proc_dointvec,
},
#ifdef CONFIG_IPV6_PRIVACY
{
@@ -4340,35 +4427,35 @@ static struct addrconf_sysctl_table
.data = &ipv6_devconf.use_tempaddr,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = addrconf_proc_dointvec,
},
{
.procname = "temp_valid_lft",
.data = &ipv6_devconf.temp_valid_lft,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = addrconf_proc_dointvec,
},
{
.procname = "temp_prefered_lft",
.data = &ipv6_devconf.temp_prefered_lft,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = addrconf_proc_dointvec,
},
{
.procname = "regen_max_retry",
.data = &ipv6_devconf.regen_max_retry,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = addrconf_proc_dointvec,
},
{
.procname = "max_desync_factor",
.data = &ipv6_devconf.max_desync_factor,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = addrconf_proc_dointvec,
},
#endif
{
@@ -4376,21 +4463,21 @@ static struct addrconf_sysctl_table
.data = &ipv6_devconf.max_addresses,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = addrconf_proc_dointvec,
},
{
.procname = "accept_ra_defrtr",
.data = &ipv6_devconf.accept_ra_defrtr,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = addrconf_proc_dointvec,
},
{
.procname = "accept_ra_pinfo",
.data = &ipv6_devconf.accept_ra_pinfo,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = addrconf_proc_dointvec,
},
#ifdef CONFIG_IPV6_ROUTER_PREF
{
@@ -4398,14 +4485,14 @@ static struct addrconf_sysctl_table
.data = &ipv6_devconf.accept_ra_rtr_pref,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = addrconf_proc_dointvec,
},
{
.procname = "router_probe_interval",
.data = &ipv6_devconf.rtr_probe_interval,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
+ .proc_handler = addrconf_proc_dointvec_jiffies,
},
#ifdef CONFIG_IPV6_ROUTE_INFO
{
@@ -4413,7 +4500,7 @@ static struct addrconf_sysctl_table
.data = &ipv6_devconf.accept_ra_rt_info_max_plen,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = addrconf_proc_dointvec,
},
#endif
#endif
@@ -4422,14 +4509,14 @@ static struct addrconf_sysctl_table
.data = &ipv6_devconf.proxy_ndp,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = addrconf_proc_dointvec,
},
{
.procname = "accept_source_route",
.data = &ipv6_devconf.accept_source_route,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = addrconf_proc_dointvec,
},
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
{
@@ -4437,7 +4524,7 @@ static struct addrconf_sysctl_table
.data = &ipv6_devconf.optimistic_dad,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = addrconf_proc_dointvec,
},
#endif
@@ -4447,7 +4534,7 @@ static struct addrconf_sysctl_table
.data = &ipv6_devconf.mc_forwarding,
.maxlen = sizeof(int),
.mode = 0444,
- .proc_handler = proc_dointvec,
+ .proc_handler = addrconf_proc_dointvec,
},
#endif
{
@@ -4455,33 +4542,31 @@ static struct addrconf_sysctl_table
.data = &ipv6_devconf.disable_ipv6,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = addrconf_sysctl_disable,
+ .proc_handler = addrconf_sysctl_disable__,
},
{
.procname = "accept_dad",
.data = &ipv6_devconf.accept_dad,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = addrconf_proc_dointvec,
},
{
.procname = "force_tllao",
.data = &ipv6_devconf.force_tllao,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec
+ .proc_handler = addrconf_proc_dointvec
},
{
/* sentinel */
}
- },
};
static int __addrconf_sysctl_register(struct net *net, char *dev_name,
struct inet6_dev *idev, struct ipv6_devconf *p)
{
- int i;
- struct addrconf_sysctl_table *t;
+ struct addrconf_sysctl *t = &p->addrconf_sysctl;
#define ADDRCONF_CTL_PATH_DEV 3
@@ -4494,16 +4579,6 @@ static int __addrconf_sysctl_register(struct net *net, char *dev_name,
};
- t = kmemdup(&addrconf_sysctl, sizeof(*t), GFP_KERNEL);
- if (t == NULL)
- goto out;
-
- for (i = 0; t->addrconf_vars[i].data; i++) {
- t->addrconf_vars[i].data += (char *)p - (char *)&ipv6_devconf;
- t->addrconf_vars[i].extra1 = idev; /* embedded; no ref */
- t->addrconf_vars[i].extra2 = net;
- }
-
/*
* Make a copy of dev_name, because '.procname' is regarded as const
* by sysctl and we wouldn't want anyone to change it under our feet
@@ -4511,38 +4586,29 @@ static int __addrconf_sysctl_register(struct net *net, char *dev_name,
*/
t->dev_name = kstrdup(dev_name, GFP_KERNEL);
if (!t->dev_name)
- goto free;
+ goto out;
addrconf_ctl_path[ADDRCONF_CTL_PATH_DEV].procname = t->dev_name;
- t->sysctl_header = register_net_sysctl_table(net, addrconf_ctl_path,
- t->addrconf_vars);
+ t->sysctl_header = register_net_sysctl_table_pathdata(net,
+ addrconf_ctl_path, ipv6_addrconf_sysctl_table, net);
if (t->sysctl_header == NULL)
goto free_procname;
- p->sysctl = t;
return 0;
free_procname:
kfree(t->dev_name);
-free:
- kfree(t);
out:
return -ENOBUFS;
}
static void __addrconf_sysctl_unregister(struct ipv6_devconf *p)
{
- struct addrconf_sysctl_table *t;
-
- if (p->sysctl == NULL)
- return;
+ struct addrconf_sysctl *t = &p->addrconf_sysctl;
- t = p->sysctl;
- p->sysctl = NULL;
unregister_sysctl_table(t->sysctl_header);
kfree(t->dev_name);
- kfree(t);
}
static void addrconf_sysctl_register(struct inet6_dev *idev)
--
1.7.4.rc1.7.g2cf08.dirty
^ permalink raw reply related
* Re: [PATCH] include/net/genetlink.h: Allow genlmsg_cancel to accept a NULL argument
From: David Miller @ 2011-02-04 4:43 UTC (permalink / raw)
To: julia; +Cc: netdev, linux-kernel, paul.moore, kernel-janitors
In-Reply-To: <Pine.LNX.4.64.1102020659520.9302@ask.diku.dk>
From: Julia Lawall <julia@diku.dk>
Date: Wed, 2 Feb 2011 07:17:29 +0100 (CET)
> This pattern occurred in eg:
>
> net/netlabel/netlabel_unlabeled.c
>
> in the function netlbl_unlabel_staticlist_gen and in other netlabel code,
> as well as in net/wireless/nl80211.c, but with the function nl80211hdr_put
> instead of genlmsg_put. I submitted patches for all of these cases, so
> that is perhaps why you don't see them. But someone suggested to change
> genlmsg_cancel as well, to be as permissive as nlmsg_cancel.
>
> For nlmsg_cancel, there are two occurrences in
> net/netfilter/nf_conntrack_netlink.c where nlmsg_cancel is reachable with
> the second argument NULL.
>
> For nlmsg_cancel the ability to accept NULL as a second argument comes
> from the fact that it only calls nlmsg_trim, which does nothing if NULL is
> the second argument. nlmsg_trim is also called by nla_nest_cancel. There
> are many calls to nla_nest_cancel with NULL as the second argument in the
> directory net/sched, for example in the function gred_dump in
> net/sched/sch_gred.c. net/sched also contains a call to nlmsg_trim with
> NULL as the second argument, in the function flow_dump, in
> net/sched/cls_flow.c.
>
> The whole thing seems somewhat sloppy. I'm sure that all of the
> above-cited occurrences could be rewritten as outlined above to skip over
> the cancel/trim function.
Thanks for the analysis Julia.
I think the only safe thing to do in net-2.6 and -stable is to add
the NULL check to genlmsg_cancel() as your patch did.
I we later want to move things such that, consistently, we never
call *nlmsg_cancel() with a NULL second arg, that's fine.
I'll apply your genlmsg_cancel() patch, thanks Julia.
^ permalink raw reply
* Re: [PATCH] be2net: use device model DMA API
From: David Miller @ 2011-02-04 4:49 UTC (permalink / raw)
To: ajit.khaparde; +Cc: ivecera, netdev, sathya.perla, subramanian.seetharaman
In-Reply-To: <c6e8a0f7-efdf-498e-9647-b544bc4adf34@exht1.ad.emulex.com>
From: Ajit Khaparde <ajit.khaparde@emulex.com>
Date: Thu, 3 Feb 2011 22:39:25 -0600
> -----Original Message-----
>> From: David Miller [mailto:davem@davemloft.net]
>> Sent: Wednesday, February 02, 2011 4:57 PM
>> To: ivecera@redhat.com
>> Cc: netdev@vger.kernel.org; Perla, Sathya; Seetharaman, Subramanian; Bandi, Sarveshwar; Khaparde, Ajit
>> Subject: Re: [PATCH] be2net: use device model DMA API
>
>> From: Ivan Vecera <ivecera@redhat.com>
>> Date: Wed, 2 Feb 2011 19:05:12 +0100
>
>> > Use DMA API as PCI equivalents will be deprecated.
>> >
>> > Signed-off-by: Ivan Vecera <ivecera@redhat.com>
>
>> Looks good to me, can I get some review from the be2net maintainers?
>
> Looks good. Thanks.
>
> Acked-by: Ajit Khaparde <ajit.khaparde@emulex.com>
Applied, thanks.
^ permalink raw reply
* Re: 2.6.38-rc3-git1: Reported regressions 2.6.36 -> 2.6.37
From: Carlos R. Mafra @ 2011-02-04 7:05 UTC (permalink / raw)
To: Linus Torvalds
Cc: Keith Packard, Dave Airlie, Dave Airlie, Rafael J. Wysocki,
Takashi Iwai, Linux Kernel Mailing List, Maciej Rutecki,
Florian Mickler, Andrew Morton, Kernel Testers List,
Network Development, Linux ACPI, Linux PM List, Linux SCSI List,
Linux Wireless List, DRI
In-Reply-To: <AANLkTin-9a5Z3qq4t8UakRvgB1G3_CT2RLKMVaHXvnLr-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
On Thu 3.Feb'11 at 17:11:14 -0800, Linus Torvalds wrote:
> On Thu, Feb 3, 2011 at 5:05 PM, Keith Packard <keithp-aN4HjG94KOLQT0dZR+AlfA@public.gmane.org> wrote:
> >
> > The goal is to make it so that when you *do* set a mode, DPMS gets set
> > to ON (as the monitor will actually be "on" at that point). Here's a
> > patch which does the DPMS_ON precisely when setting a mode.
>
> Ok, patch looks sane, but it does leave me with the "what about the
> 'fb_changed' case?" question. Is that case basically guaranteed to not
> change any existing dpms state?
>
> > (note, this patch compiles, but is otherwise only lightly tested).
>
> Carlos? Takashi? Ignore my crazy patch, try this one instead. Does it
> fix things for you?
Yes! (tested on top of 2.6.38-rc3+).
Thanks to everyone involved!
^ permalink raw reply
* Re: [PATCH] tcp: Increase the initial congestion window to 10.
From: Alexander Zimmermann @ 2011-02-04 8:38 UTC (permalink / raw)
To: David Miller; +Cc: netdev, dccp, therbert
In-Reply-To: <20110202.170750.229739784.davem@davemloft.net>
[-- Attachment #1: Type: text/plain, Size: 1822 bytes --]
Hi David,
Am 03.02.2011 um 02:07 schrieb David Miller:
>
> Signed-off-by: David S. Miller <davem@davemloft.net>
> ---
>
> I've left the DCCP code to keep using RFC3390 logic, if they
> wish to adopt this change in their code they can do so by
> simply deleting the rfc33390_bytes_to_packets() function and
> using TCP_INIT_CWND in their assignment.
>
> include/net/tcp.h | 12 +++---------
> net/dccp/ccids/ccid2.c | 9 +++++++++
> net/ipv4/tcp_input.c | 2 +-
> 3 files changed, 13 insertions(+), 10 deletions(-)
>
> diff --git a/include/net/tcp.h b/include/net/tcp.h
> index 9179111..7118668 100644
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h
> @@ -196,6 +196,9 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);
> /* TCP thin-stream limits */
> #define TCP_THIN_LINEAR_RETRIES 6 /* After 6 linear retries, do exp. backoff */
>
Could you add a reference to draft?
> +/* TCP initial congestion window */
> +#define TCP_INIT_CWND 10
> +
> extern struct inet_timewait_death_row tcp_death_row;
>
> /* sysctl variables for tcp */
> @@ -799,15 +802,6 @@ static inline __u32 tcp_current_ssthresh(const struct sock *sk)
> /* Use define here intentionally to get WARN_ON location shown at the caller */
> #define tcp_verify_left_out(tp) WARN_ON(tcp_left_out(tp) > tp->packets_out)
>
> -/*
> - * Convert RFC 3390 larger initial window into an equivalent number of packets.
> - * This is based on the numbers specified in RFC 5681, 3.1.
> - */
> -static inline u32 rfc3390_bytes_to_packets(const u32 smss)
> -{
> - return smss <= 1095 ? 4 : (smss > 2190 ? 2 : 3);
> -}
> -
> extern void tcp_enter_cwr(struct sock *sk, const int set_ssthresh);
> extern __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst);
>
[-- Attachment #2: Signierter Teil der Nachricht --]
[-- Type: application/pgp-signature, Size: 195 bytes --]
^ permalink raw reply
* [PATCH 02/14] IPVS: Add sysctl_snat_reroute()
From: Simon Horman @ 2011-02-04 9:46 UTC (permalink / raw)
To: netdev, netfilter-devel, netfilter, lvs-devel
Cc: Julian Anastasov, Hans Schillstrom, Simon Horman
In-Reply-To: <1296812782-14953-1-git-send-email-horms@verge.net.au>
In preparation for not including sysctl_snat_reroute in
struct netns_ipvs when CONFIG_SYCTL is not defined.
Signed-off-by: Simon Horman <horms@verge.net.au>
---
net/netfilter/ipvs/ip_vs_core.c | 20 ++++++++++++++++----
1 files changed, 16 insertions(+), 4 deletions(-)
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index c4c8199..3abc009 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -599,6 +599,20 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
return NF_DROP;
}
+#ifdef CONFIG_SYSCTL
+
+static int sysctl_snat_reroute(struct sk_buff *skb)
+{
+ struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
+ return ipvs->sysctl_snat_reroute;
+}
+
+#else
+
+static int sysctl_snat_reroute(struct sk_buff *skb) { return 0; }
+
+#endif
+
__sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset)
{
return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0));
@@ -633,15 +647,13 @@ static inline int ip_vs_gather_frags_v6(struct sk_buff *skb, u_int32_t user)
static int ip_vs_route_me_harder(int af, struct sk_buff *skb)
{
- struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
^ permalink raw reply related
* [PATCH 03/14] IPVS: Add sysctl_nat_icmp_send()
From: Simon Horman @ 2011-02-04 9:46 UTC (permalink / raw)
To: netdev, netfilter-devel, netfilter, lvs-devel
Cc: Julian Anastasov, Hans Schillstrom, Simon Horman
In-Reply-To: <1296812782-14953-1-git-send-email-horms@verge.net.au>
In preparation for not including sysctl_nat_icmp_send in
struct netns_ipvs when CONFIG_SYCTL is not defined.
Signed-off-by: Simon Horman <horms@verge.net.au>
---
net/netfilter/ipvs/ip_vs_core.c | 11 ++++++++---
1 files changed, 8 insertions(+), 3 deletions(-)
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 3abc009..66bb68b 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -607,9 +607,16 @@ static int sysctl_snat_reroute(struct sk_buff *skb)
return ipvs->sysctl_snat_reroute;
}
+static int sysctl_nat_icmp_send(struct net *net)
+{
+ struct netns_ipvs *ipvs = net_ipvs(net);
+ return ipvs->sysctl_nat_icmp_send;
+}
+
#else
static int sysctl_snat_reroute(struct sk_buff *skb) { return 0; }
+static int sysctl_nat_icmp_send(struct net *net) { return 0; }
#endif
@@ -1075,7 +1082,6 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
struct ip_vs_protocol *pp;
struct ip_vs_proto_data *pd;
struct ip_vs_conn *cp;
- struct netns_ipvs *ipvs;
EnterFunction(11);
@@ -1150,11 +1156,10 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
* Check if the packet belongs to an existing entry
*/
cp = pp->conn_out_get(af, skb, &iph, iph.len, 0);
- ipvs = net_ipvs(net);
if (likely(cp))
return handle_response(af, skb, pd, cp, iph.len);
- if (ipvs->sysctl_nat_icmp_send &&
+ if (sysctl_nat_icmp_send(net) &&
(pp->protocol == IPPROTO_TCP ||
pp->protocol == IPPROTO_UDP ||
pp->protocol == IPPROTO_SCTP)) {
--
1.7.2.3
^ permalink raw reply related
* [PATCH 04/14] IPVS: Add {sysctl_sync_threshold,period}()
From: Simon Horman @ 2011-02-04 9:46 UTC (permalink / raw)
To: netdev, netfilter-devel, netfilter, lvs-devel
Cc: Julian Anastasov, Hans Schillstrom, Simon Horman
In-Reply-To: <1296812782-14953-1-git-send-email-horms@verge.net.au>
In preparation for not including sysctl_sync_threshold in
struct netns_ipvs when CONFIG_SYCTL is not defined.
Signed-off-by: Simon Horman <horms@verge.net.au>
---
include/net/netns/ip_vs.h | 29 +++++++++++++++++++++++++++++
net/netfilter/ipvs/ip_vs_core.c | 10 +++++-----
net/netfilter/ipvs/ip_vs_ctl.c | 4 ++--
net/netfilter/ipvs/ip_vs_sync.c | 4 ++--
4 files changed, 38 insertions(+), 9 deletions(-)
diff --git a/include/net/netns/ip_vs.h b/include/net/netns/ip_vs.h
index 259ebac..e205767 100644
--- a/include/net/netns/ip_vs.h
+++ b/include/net/netns/ip_vs.h
@@ -140,4 +140,33 @@ struct netns_ipvs {
struct net *net; /* Needed by timer routines */
};
+#define DEFAULT_SYNC_THRESHOLD 3
+#define DEFAULT_SYNC_PERIOD 50
+
+#ifdef CONFIG_SYSCTL
+
+static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs)
+{
+ return ipvs->sysctl_sync_threshold[0];
+}
+
+static inline int sysctl_sync_period(struct netns_ipvs *ipvs)
+{
+ return ipvs->sysctl_sync_threshold[1];
+}
+
+#else
+
+static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs)
+{
+ return DEFAULT_SYNC_THRESHOLD;
+}
+
+static inline int sysctl_sync_period(struct netns_ipvs *ipvs)
+{
+ return DEFAULT_SYNC_PERIOD;
+}
+
+#endif
+
#endif /* IP_VS_H_ */
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 66bb68b..d7a7fcb 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1635,15 +1635,15 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
*/
if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
- pkts = ipvs->sysctl_sync_threshold[0];
+ pkts = sysctl_sync_threshold(ipvs);
else
pkts = atomic_add_return(1, &cp->in_pkts);
if ((ipvs->sync_state & IP_VS_STATE_MASTER) &&
cp->protocol == IPPROTO_SCTP) {
if ((cp->state == IP_VS_SCTP_S_ESTABLISHED &&
- (pkts % ipvs->sysctl_sync_threshold[1]
- == ipvs->sysctl_sync_threshold[0])) ||
+ (pkts % sysctl_sync_period(ipvs)
+ == sysctl_sync_threshold(ipvs))) ||
(cp->old_state != cp->state &&
((cp->state == IP_VS_SCTP_S_CLOSED) ||
(cp->state == IP_VS_SCTP_S_SHUT_ACK_CLI) ||
@@ -1657,8 +1657,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
else if ((ipvs->sync_state & IP_VS_STATE_MASTER) &&
(((cp->protocol != IPPROTO_TCP ||
cp->state == IP_VS_TCP_S_ESTABLISHED) &&
- (pkts % ipvs->sysctl_sync_threshold[1]
- == ipvs->sysctl_sync_threshold[0])) ||
+ (pkts % sysctl_sync_period(ipvs)
+ == sysctl_sync_threshold(ipvs))) ||
((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) &&
((cp->state == IP_VS_TCP_S_FIN_WAIT) ||
(cp->state == IP_VS_TCP_S_CLOSE) ||
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index c73b0c8..60a3686 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -3545,8 +3545,8 @@ int __net_init __ip_vs_control_init(struct net *net)
tbl[idx++].data = &ipvs->sysctl_cache_bypass;
tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
- ipvs->sysctl_sync_threshold[0] = 3;
- ipvs->sysctl_sync_threshold[1] = 50;
+ ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD;
+ ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD;
tbl[idx].data = &ipvs->sysctl_sync_threshold;
tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 2a2a836..7f8998e 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -650,7 +650,7 @@ control:
if (cp->flags & IP_VS_CONN_F_TEMPLATE) {
int pkts = atomic_add_return(1, &cp->in_pkts);
- if (pkts % ipvs->sysctl_sync_threshold[1] != 1)
+ if (pkts % sysctl_sync_period(ipvs) != 1)
return;
}
goto sloop;
@@ -795,7 +795,7 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
if (opt)
memcpy(&cp->in_seq, opt, sizeof(*opt));
- atomic_set(&cp->in_pkts, ipvs->sysctl_sync_threshold[0]);
+ atomic_set(&cp->in_pkts, sysctl_sync_threshold(ipvs));
cp->state = state;
cp->old_state = cp->state;
/*
--
1.7.2.3
^ permalink raw reply related
* [PATCH 05/14] IPVS: Add sysctl_sync_ver()
From: Simon Horman @ 2011-02-04 9:46 UTC (permalink / raw)
To: netdev, netfilter-devel, netfilter, lvs-devel
Cc: Julian Anastasov, Hans Schillstrom, Simon Horman
In-Reply-To: <1296812782-14953-1-git-send-email-horms@verge.net.au>
In preparation for not including sysctl_sync_ver in
struct netns_ipvs when CONFIG_SYCTL is not defined.
Signed-off-by: Simon Horman <horms@verge.net.au>
---
include/net/netns/ip_vs.h | 11 +++++++++++
net/netfilter/ipvs/ip_vs_sync.c | 4 ++--
2 files changed, 13 insertions(+), 2 deletions(-)
diff --git a/include/net/netns/ip_vs.h b/include/net/netns/ip_vs.h
index e205767..f7162e5 100644
--- a/include/net/netns/ip_vs.h
+++ b/include/net/netns/ip_vs.h
@@ -142,6 +142,7 @@ struct netns_ipvs {
#define DEFAULT_SYNC_THRESHOLD 3
#define DEFAULT_SYNC_PERIOD 50
+#define DEFAULT_SYNC_VER 1
#ifdef CONFIG_SYSCTL
@@ -155,6 +156,11 @@ static inline int sysctl_sync_period(struct netns_ipvs *ipvs)
return ipvs->sysctl_sync_threshold[1];
}
+static inline int sysctl_sync_ver(struct netns_ipvs *ipvs)
+{
+ return ipvs->sysctl_sync_ver;
+}
+
#else
static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs)
@@ -167,6 +173,11 @@ static inline int sysctl_sync_period(struct netns_ipvs *ipvs)
return DEFAULT_SYNC_PERIOD;
}
+static inline int sysctl_sync_ver(struct netns_ipvs *ipvs)
+{
+ return DEFAULT_SYNC_VER;
+}
+
#endif
#endif /* IP_VS_H_ */
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 7f8998e..52661da 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -394,7 +394,7 @@ void ip_vs_sync_switch_mode(struct net *net, int mode)
if (!ipvs->sync_state & IP_VS_STATE_MASTER)
return;
- if (mode == ipvs->sysctl_sync_ver || !ipvs->sync_buff)
+ if (mode == sysctl_sync_ver(ipvs) || !ipvs->sync_buff)
return;
spin_lock_bh(&ipvs->sync_buff_lock);
@@ -521,7 +521,7 @@ void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp)
unsigned int len, pe_name_len, pad;
/* Handle old version of the protocol */
- if (ipvs->sysctl_sync_ver == 0) {
+ if (sysctl_sync_ver(ipvs) == 0) {
ip_vs_sync_conn_v0(net, cp);
return;
}
--
1.7.2.3
^ permalink raw reply related
* [PATCH 06/14] IPVS: Add sysctl_expire_nodest_conn()
From: Simon Horman @ 2011-02-04 9:46 UTC (permalink / raw)
To: netdev, netfilter-devel, netfilter, lvs-devel
Cc: Julian Anastasov, Hans Schillstrom, Simon Horman
In-Reply-To: <1296812782-14953-1-git-send-email-horms@verge.net.au>
In preparation for not including sysctl_expire_nodest_conn in
struct netns_ipvs when CONFIG_SYCTL is not defined.
Signed-off-by: Simon Horman <horms@verge.net.au>
---
net/netfilter/ipvs/ip_vs_core.c | 8 +++++++-
1 files changed, 7 insertions(+), 1 deletions(-)
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index d7a7fcb..ac13db1 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -613,10 +613,16 @@ static int sysctl_nat_icmp_send(struct net *net)
return ipvs->sysctl_nat_icmp_send;
}
+static int sysctl_expire_nodest_conn(struct netns_ipvs *ipvs)
+{
+ return ipvs->sysctl_expire_nodest_conn;
+}
+
#else
static int sysctl_snat_reroute(struct sk_buff *skb) { return 0; }
static int sysctl_nat_icmp_send(struct net *net) { return 0; }
+static int sysctl_expire_nodest_conn(struct netns_ipvs *ipvs) { return 0; }
#endif
@@ -1605,7 +1611,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
/* the destination server is not available */
- if (ipvs->sysctl_expire_nodest_conn) {
+ if (sysctl_expire_nodest_conn(ipvs)) {
/* try to expire the connection immediately */
ip_vs_conn_expire_now(cp);
}
--
1.7.2.3
^ permalink raw reply related
* [PATCH 07/14] IPVS: Add expire_quiescent_template()
From: Simon Horman @ 2011-02-04 9:46 UTC (permalink / raw)
To: netdev, netfilter-devel, netfilter, lvs-devel
Cc: Julian Anastasov, Hans Schillstrom, Simon Horman
In-Reply-To: <1296812782-14953-1-git-send-email-horms@verge.net.au>
In preparation for not including sysctl_expire_quiescent_template in
struct netns_ipvs when CONFIG_SYCTL is not defined.
Signed-off-by: Simon Horman <horms@verge.net.au>
---
net/netfilter/ipvs/ip_vs_conn.c | 13 +++++++++++--
1 files changed, 11 insertions(+), 2 deletions(-)
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 83233fe..cc3b45b 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -677,6 +677,16 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp)
atomic_dec(&dest->refcnt);
}
+static int expire_quiescent_template(struct netns_ipvs *ipvs,
+ struct ip_vs_dest *dest)
+{
+#ifdef CONFIG_SYSCTL
+ return ipvs->sysctl_expire_quiescent_template &&
+ (atomic_read(&dest->weight) == 0);
+#else
+ return 0;
+#endif
+}
/*
* Checking if the destination of a connection template is available.
@@ -693,8 +703,7 @@ int ip_vs_check_template(struct ip_vs_conn *ct)
*/
if ((dest == NULL) ||
!(dest->flags & IP_VS_DEST_F_AVAILABLE) ||
- (ipvs->sysctl_expire_quiescent_template &&
- (atomic_read(&dest->weight) == 0))) {
+ expire_quiescent_template(ipvs, dest)) {
IP_VS_DBG_BUF(9, "check_template: dest not available for "
"protocol %s s:%s:%d v:%s:%d "
"-> d:%s:%d\n",
--
1.7.2.3
^ permalink raw reply related
* [PATCH 08/14] IPVS: Conditinally use sysctl_lblc{r}_expiration
From: Simon Horman @ 2011-02-04 9:46 UTC (permalink / raw)
To: netdev, netfilter-devel, netfilter, lvs-devel
Cc: Julian Anastasov, Hans Schillstrom, Simon Horman
In-Reply-To: <1296812782-14953-1-git-send-email-horms@verge.net.au>
In preparation for not including sysctl_lblc{r}_expiration in
struct netns_ipvs when CONFIG_SYCTL is not defined.
Signed-off-by: Simon Horman <horms@verge.net.au>
---
net/netfilter/ipvs/ip_vs_lblc.c | 16 +++++++++++++---
net/netfilter/ipvs/ip_vs_lblcr.c | 21 +++++++++++++++------
2 files changed, 28 insertions(+), 9 deletions(-)
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index 00b5ffa..7c5ef3f 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -63,6 +63,8 @@
#define CHECK_EXPIRE_INTERVAL (60*HZ)
#define ENTRY_TIMEOUT (6*60*HZ)
+#define DEFAULT_EXPIRATION (24*60*60*HZ)
+
/*
* It is for full expiration check.
* When there is no partial expiration check (garbage collection)
@@ -238,6 +240,15 @@ static void ip_vs_lblc_flush(struct ip_vs_lblc_table *tbl)
}
}
+static int sysctl_lblc_expiration(struct ip_vs_service *svc)
+{
+#ifdef CONFIG_SYSCTL
+ struct netns_ipvs *ipvs = net_ipvs(svc->net);
+ return ipvs->sysctl_lblc_expiration;
+#else
+ return DEFAULT_EXPIRATION;
+#endif
+}
static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
{
@@ -245,7 +256,6 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
struct ip_vs_lblc_entry *en, *nxt;
unsigned long now = jiffies;
int i, j;
- struct netns_ipvs *ipvs = net_ipvs(svc->net);
for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) {
j = (j + 1) & IP_VS_LBLC_TAB_MASK;
@@ -254,7 +264,7 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
if (time_before(now,
en->lastuse +
- ipvs->sysctl_lblc_expiration))
+ sysctl_lblc_expiration(svc)))
continue;
ip_vs_lblc_free(en);
@@ -557,7 +567,7 @@ static int __net_init __ip_vs_lblc_init(struct net *net)
return -ENOMEM;
} else
ipvs->lblc_ctl_table = vs_vars_table;
- ipvs->sysctl_lblc_expiration = 24*60*60*HZ;
+ ipvs->sysctl_lblc_expiration = DEFAULT_EXPIRATION;
ipvs->lblc_ctl_table[0].data = &ipvs->sysctl_lblc_expiration;
#ifdef CONFIG_SYSCTL
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index bfa25f1..484a700 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -63,6 +63,8 @@
#define CHECK_EXPIRE_INTERVAL (60*HZ)
#define ENTRY_TIMEOUT (6*60*HZ)
+#define DEFAULT_EXPIRATION (24*60*60*HZ)
+
/*
* It is for full expiration check.
* When there is no partial expiration check (garbage collection)
@@ -414,6 +416,15 @@ static void ip_vs_lblcr_flush(struct ip_vs_lblcr_table *tbl)
}
}
+static int sysctl_lblcr_expiration(struct ip_vs_service *svc)
+{
+#ifdef CONFIG_SYSCTL
+ struct netns_ipvs *ipvs = net_ipvs(svc->net);
+ return ipvs->sysctl_lblcr_expiration;
+#else
+ return DEFAULT_EXPIRATION;
+#endif
+}
static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc)
{
@@ -421,15 +432,14 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc)
unsigned long now = jiffies;
int i, j;
struct ip_vs_lblcr_entry *en, *nxt;
- struct netns_ipvs *ipvs = net_ipvs(svc->net);
for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {
j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
write_lock(&svc->sched_lock);
list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
- if (time_after(en->lastuse
- + ipvs->sysctl_lblcr_expiration, now))
+ if (time_after(en->lastuse +
+ sysctl_lblcr_expiration(svc), now))
continue;
ip_vs_lblcr_free(en);
@@ -661,7 +671,6 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
read_lock(&svc->sched_lock);
en = ip_vs_lblcr_get(svc->af, tbl, &iph.daddr);
if (en) {
- struct netns_ipvs *ipvs = net_ipvs(svc->net);
/* We only hold a read lock, but this is atomic */
en->lastuse = jiffies;
@@ -673,7 +682,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
/* More than one destination + enough time passed by, cleanup */
if (atomic_read(&en->set.size) > 1 &&
time_after(jiffies, en->set.lastmod +
- ipvs->sysctl_lblcr_expiration)) {
+ sysctl_lblcr_expiration(svc))) {
struct ip_vs_dest *m;
write_lock(&en->set.lock);
@@ -757,7 +766,7 @@ static int __net_init __ip_vs_lblcr_init(struct net *net)
return -ENOMEM;
} else
ipvs->lblcr_ctl_table = vs_vars_table;
- ipvs->sysctl_lblcr_expiration = 24*60*60*HZ;
+ ipvs->sysctl_lblcr_expiration = DEFAULT_EXPIRATION;
ipvs->lblcr_ctl_table[0].data = &ipvs->sysctl_lblcr_expiration;
#ifdef CONFIG_SYSCTL
--
1.7.2.3
^ permalink raw reply related
* [PATCH 09/14] IPVS: ip_vs_todrop() becomes a noop when CONFIG_SYSCTL is undefined
From: Simon Horman @ 2011-02-04 9:46 UTC (permalink / raw)
To: netdev, netfilter-devel, netfilter, lvs-devel
Cc: Julian Anastasov, Hans Schillstrom, Simon Horman
In-Reply-To: <1296812782-14953-1-git-send-email-horms@verge.net.au>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
include/net/ip_vs.h | 4 ++++
1 files changed, 4 insertions(+), 0 deletions(-)
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 5d75fea..e046e8b 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -1104,6 +1104,7 @@ extern int ip_vs_icmp_xmit_v6
int offset);
#endif
+#ifdef CONFIG_SYSCTL
/*
* This is a simple mechanism to ignore packets when
* we are loaded. Just set ip_vs_drop_rate to 'n' and
@@ -1119,6 +1120,9 @@ static inline int ip_vs_todrop(struct netns_ipvs *ipvs)
ipvs->drop_counter = ipvs->drop_rate;
return 1;
}
+#else
+static inline int ip_vs_todrop(struct netns_ipvs *ipvs) { return 0; }
+#endif
/*
* ip_vs_fwd_tag returns the forwarding tag of the connection
--
1.7.2.3
^ permalink raw reply related
* [PATCH 12/14] IPVS: Conditionally define and use ip_vs_lblc{r}_table
From: Simon Horman @ 2011-02-04 9:46 UTC (permalink / raw)
To: netdev, netfilter-devel, netfilter, lvs-devel
Cc: Julian Anastasov, Hans Schillstrom, Simon Horman
In-Reply-To: <1296812782-14953-1-git-send-email-horms@verge.net.au>
ip_vs_lblc_table and ip_vs_lblcr_table, and code that uses them
are unnecessary when CONFIG_SYSCTL is undefined.
Signed-off-by: Simon Horman <horms@verge.net.au>
---
net/netfilter/ipvs/ip_vs_lblc.c | 15 ++++++++++-----
net/netfilter/ipvs/ip_vs_lblcr.c | 14 ++++++++++----
2 files changed, 20 insertions(+), 9 deletions(-)
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index 7c5ef3f..8ab0603 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -114,7 +114,7 @@ struct ip_vs_lblc_table {
/*
* IPVS LBLC sysctl table
*/
-
+#ifdef CONFIG_SYSCTL
static ctl_table vs_vars_table[] = {
{
.procname = "lblc_expiration",
@@ -125,6 +125,7 @@ static ctl_table vs_vars_table[] = {
},
{ }
};
+#endif
static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en)
{
@@ -555,6 +556,7 @@ static struct ip_vs_scheduler ip_vs_lblc_scheduler =
/*
* per netns init.
*/
+#ifdef CONFIG_SYSCTL
static int __net_init __ip_vs_lblc_init(struct net *net)
{
struct netns_ipvs *ipvs = net_ipvs(net);
@@ -570,7 +572,6 @@ static int __net_init __ip_vs_lblc_init(struct net *net)
ipvs->sysctl_lblc_expiration = DEFAULT_EXPIRATION;
ipvs->lblc_ctl_table[0].data = &ipvs->sysctl_lblc_expiration;
-#ifdef CONFIG_SYSCTL
ipvs->lblc_ctl_header =
register_net_sysctl_table(net, net_vs_ctl_path,
ipvs->lblc_ctl_table);
@@ -579,7 +580,6 @@ static int __net_init __ip_vs_lblc_init(struct net *net)
kfree(ipvs->lblc_ctl_table);
return -ENOMEM;
}
-#endif
return 0;
}
@@ -588,14 +588,19 @@ static void __net_exit __ip_vs_lblc_exit(struct net *net)
{
struct netns_ipvs *ipvs = net_ipvs(net);
-#ifdef CONFIG_SYSCTL
unregister_net_sysctl_table(ipvs->lblc_ctl_header);
-#endif
if (!net_eq(net, &init_net))
kfree(ipvs->lblc_ctl_table);
}
+#else
+
+static int __net_init __ip_vs_lblc_init(struct net *net) { return 0; }
+static void __net_exit __ip_vs_lblc_exit(struct net *net) { }
+
+#endif
+
static struct pernet_operations ip_vs_lblc_ops = {
.init = __ip_vs_lblc_init,
.exit = __ip_vs_lblc_exit,
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index 484a700..241d1ef 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -289,6 +289,7 @@ struct ip_vs_lblcr_table {
};
+#ifdef CONFIG_SYSCTL
/*
* IPVS LBLCR sysctl table
*/
@@ -303,6 +304,7 @@ static ctl_table vs_vars_table[] = {
},
{ }
};
+#endif
static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en)
{
@@ -754,6 +756,7 @@ static struct ip_vs_scheduler ip_vs_lblcr_scheduler =
/*
* per netns init.
*/
+#ifdef CONFIG_SYSCTL
static int __net_init __ip_vs_lblcr_init(struct net *net)
{
struct netns_ipvs *ipvs = net_ipvs(net);
@@ -769,7 +772,6 @@ static int __net_init __ip_vs_lblcr_init(struct net *net)
ipvs->sysctl_lblcr_expiration = DEFAULT_EXPIRATION;
ipvs->lblcr_ctl_table[0].data = &ipvs->sysctl_lblcr_expiration;
-#ifdef CONFIG_SYSCTL
ipvs->lblcr_ctl_header =
register_net_sysctl_table(net, net_vs_ctl_path,
ipvs->lblcr_ctl_table);
@@ -778,7 +780,6 @@ static int __net_init __ip_vs_lblcr_init(struct net *net)
kfree(ipvs->lblcr_ctl_table);
return -ENOMEM;
}
-#endif
return 0;
}
@@ -787,14 +788,19 @@ static void __net_exit __ip_vs_lblcr_exit(struct net *net)
{
struct netns_ipvs *ipvs = net_ipvs(net);
-#ifdef CONFIG_SYSCTL
unregister_net_sysctl_table(ipvs->lblcr_ctl_header);
-#endif
if (!net_eq(net, &init_net))
kfree(ipvs->lblcr_ctl_table);
}
+#else
+
+static int __net_init __ip_vs_lblcr_init(struct net *net) { return 0; }
+static void __net_exit __ip_vs_lblcr_exit(struct net *net) { }
+
+#endif
+
static struct pernet_operations ip_vs_lblcr_ops = {
.init = __ip_vs_lblcr_init,
.exit = __ip_vs_lblcr_exit,
--
1.7.2.3
^ permalink raw reply related
* [PATCH 13/14] IPVS: Add __ip_vs_control_{init,cleanup}_sysctl()
From: Simon Horman @ 2011-02-04 9:46 UTC (permalink / raw)
To: netdev, netfilter-devel, netfilter, lvs-devel
Cc: Julian Anastasov, Hans Schillstrom, Simon Horman
In-Reply-To: <1296812782-14953-1-git-send-email-horms@verge.net.au>
Break out the portions of __ip_vs_control_init() and
__ip_vs_control_cleanup() where aren't necessary when
CONFIG_SYSCTL is undefined.
Signed-off-by: Simon Horman <horms@verge.net.au>
---
net/netfilter/ipvs/ip_vs_ctl.c | 110 +++++++++++++++++++++++++---------------
1 files changed, 69 insertions(+), 41 deletions(-)
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 60a3686..e06e2af 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -88,6 +88,8 @@ static int __ip_vs_addr_is_local_v6(struct net *net,
return 0;
}
#endif
+
+#ifdef CONFIG_SYSCTL
/*
* update_defense_level is called from keventd and from sysctl,
* so it needs to protect itself from softirqs
@@ -229,6 +231,7 @@ static void defense_work_handler(struct work_struct *work)
ip_vs_random_dropentry(ipvs->net);
schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
}
+#endif
int
ip_vs_use_count_inc(void)
@@ -1483,7 +1486,7 @@ static int ip_vs_zero_all(struct net *net)
return 0;
}
-
+#ifdef CONFIG_SYSCTL
static int
proc_do_defense_mode(ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -1505,7 +1508,6 @@ proc_do_defense_mode(ctl_table *table, int write,
return rc;
}
-
static int
proc_do_sync_threshold(ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -1739,6 +1741,7 @@ const struct ctl_path net_vs_ctl_path[] = {
{ }
};
EXPORT_SYMBOL_GPL(net_vs_ctl_path);
+#endif
#ifdef CONFIG_PROC_FS
@@ -3482,7 +3485,8 @@ static void ip_vs_genl_unregister(void)
/*
* per netns intit/exit func.
*/
-int __net_init __ip_vs_control_init(struct net *net)
+#ifdef CONFIG_SYSCTL
+int __net_init __ip_vs_control_init_sysctl(struct net *net)
{
int idx;
struct netns_ipvs *ipvs = net_ipvs(net);
@@ -3492,38 +3496,11 @@ int __net_init __ip_vs_control_init(struct net *net)
spin_lock_init(&ipvs->dropentry_lock);
spin_lock_init(&ipvs->droppacket_lock);
spin_lock_init(&ipvs->securetcp_lock);
- ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock);
-
- /* Initialize rs_table */
- for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
- INIT_LIST_HEAD(&ipvs->rs_table[idx]);
-
- INIT_LIST_HEAD(&ipvs->dest_trash);
- atomic_set(&ipvs->ftpsvc_counter, 0);
- atomic_set(&ipvs->nullsvc_counter, 0);
-
- /* procfs stats */
- ipvs->tot_stats = kzalloc(sizeof(struct ip_vs_stats), GFP_KERNEL);
- if (ipvs->tot_stats == NULL) {
- pr_err("%s(): no memory.\n", __func__);
- return -ENOMEM;
- }
- ipvs->cpustats = alloc_percpu(struct ip_vs_cpu_stats);
- if (!ipvs->cpustats) {
- pr_err("%s() alloc_percpu failed\n", __func__);
- goto err_alloc;
- }
- spin_lock_init(&ipvs->tot_stats->lock);
-
- proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
- proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
- proc_net_fops_create(net, "ip_vs_stats_percpu", 0,
- &ip_vs_stats_percpu_fops);
if (!net_eq(net, &init_net)) {
tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
if (tbl == NULL)
- goto err_dup;
+ return -ENOMEM;
} else
tbl = vs_vars;
/* Initialize sysctl defaults */
@@ -3552,24 +3529,79 @@ int __net_init __ip_vs_control_init(struct net *net)
tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
-#ifdef CONFIG_SYSCTL
ipvs->sysctl_hdr = register_net_sysctl_table(net, net_vs_ctl_path,
tbl);
if (ipvs->sysctl_hdr == NULL) {
if (!net_eq(net, &init_net))
kfree(tbl);
- goto err_dup;
+ return -ENOMEM;
}
-#endif
ip_vs_new_estimator(net, ipvs->tot_stats);
ipvs->sysctl_tbl = tbl;
/* Schedule defense work */
INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
+
+ return 0;
+}
+
+void __net_init __ip_vs_control_cleanup_sysctl(struct net *net)
+{
+ struct netns_ipvs *ipvs = net_ipvs(net);
+
+ cancel_delayed_work_sync(&ipvs->defense_work);
+ cancel_work_sync(&ipvs->defense_work.work);
+ unregister_net_sysctl_table(ipvs->sysctl_hdr);
+}
+
+#else
+
+int __net_init __ip_vs_control_init_sysctl(struct net *net) { return 0; }
+void __net_init __ip_vs_control_cleanup_sysctl(struct net *net) { }
+
+#endif
+
+int __net_init __ip_vs_control_init(struct net *net)
+{
+ int idx;
+ struct netns_ipvs *ipvs = net_ipvs(net);
+
+ if (__ip_vs_control_init_sysctl(net))
+ return -ENOMEM;
+
+ ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock);
+
+ /* Initialize rs_table */
+ for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
+ INIT_LIST_HEAD(&ipvs->rs_table[idx]);
+
+ INIT_LIST_HEAD(&ipvs->dest_trash);
+ atomic_set(&ipvs->ftpsvc_counter, 0);
+ atomic_set(&ipvs->nullsvc_counter, 0);
+
+ /* procfs stats */
+ ipvs->tot_stats = kzalloc(sizeof(struct ip_vs_stats), GFP_KERNEL);
+ if (ipvs->tot_stats == NULL) {
+ pr_err("%s(): no memory.\n", __func__);
+ return -ENOMEM;
+ }
+ ipvs->cpustats = alloc_percpu(struct ip_vs_cpu_stats);
+ if (!ipvs->cpustats) {
+ pr_err("%s() alloc_percpu failed\n", __func__);
+ goto err_alloc;
+ }
+ spin_lock_init(&ipvs->tot_stats->lock);
+
+ for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
+ INIT_LIST_HEAD(&ipvs->rs_table[idx]);
+
+ proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
+ proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
+ proc_net_fops_create(net, "ip_vs_stats_percpu", 0,
+ &ip_vs_stats_percpu_fops);
+
return 0;
-err_dup:
- free_percpu(ipvs->cpustats);
err_alloc:
kfree(ipvs->tot_stats);
return -ENOMEM;
@@ -3581,11 +3613,7 @@ static void __net_exit __ip_vs_control_cleanup(struct net *net)
ip_vs_trash_cleanup(net);
ip_vs_kill_estimator(net, ipvs->tot_stats);
- cancel_delayed_work_sync(&ipvs->defense_work);
- cancel_work_sync(&ipvs->defense_work.work);
-#ifdef CONFIG_SYSCTL
- unregister_net_sysctl_table(ipvs->sysctl_hdr);
-#endif
+ __ip_vs_control_cleanup_sysctl(net);
proc_net_remove(net, "ip_vs_stats_percpu");
proc_net_remove(net, "ip_vs_stats");
proc_net_remove(net, "ip_vs");
--
1.7.2.3
^ permalink raw reply related
* [PATCH 14/14] IPVS: Conditionally include sysctl members of struct netns_ipvs
From: Simon Horman @ 2011-02-04 9:46 UTC (permalink / raw)
To: netdev, netfilter-devel, netfilter, lvs-devel
Cc: Julian Anastasov, Hans Schillstrom, Simon Horman
In-Reply-To: <1296812782-14953-1-git-send-email-horms@verge.net.au>
There is now no need to include sysctl members of struct netns_ipvs
unless CONFIG_SYSCTL is defined.
Signed-off-by: Simon Horman <horms@verge.net.au>
---
include/net/netns/ip_vs.h | 21 +++++++++++++--------
1 files changed, 13 insertions(+), 8 deletions(-)
diff --git a/include/net/netns/ip_vs.h b/include/net/netns/ip_vs.h
index f7162e5..8e3e008 100644
--- a/include/net/netns/ip_vs.h
+++ b/include/net/netns/ip_vs.h
@@ -70,6 +70,17 @@ struct netns_ipvs {
seqcount_t *ustats_seq; /* u64 read retry */
int num_services; /* no of virtual services */
+
+ rwlock_t rs_lock; /* real services table */
+ /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
+ struct lock_class_key ctl_key; /* ctl_mutex debuging */
+ /* Trash for destinations */
+ struct list_head dest_trash;
+ /* Service counters */
+ atomic_t ftpsvc_counter;
+ atomic_t nullsvc_counter;
+
+#ifdef CONFIG_SYSCTL
/* 1/rate drop and drop-entry variables */
struct delayed_work defense_work; /* Work handler */
int drop_rate;
@@ -79,14 +90,6 @@ struct netns_ipvs {
spinlock_t dropentry_lock; /* drop entry handling */
spinlock_t droppacket_lock; /* drop packet handling */
spinlock_t securetcp_lock; /* state and timeout tables */
- rwlock_t rs_lock; /* real services table */
- /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
- struct lock_class_key ctl_key; /* ctl_mutex debuging */
- /* Trash for destinations */
- struct list_head dest_trash;
- /* Service counters */
- atomic_t ftpsvc_counter;
- atomic_t nullsvc_counter;
/* sys-ctl struct */
struct ctl_table_header *sysctl_hdr;
@@ -116,6 +119,8 @@ struct netns_ipvs {
int sysctl_lblcr_expiration;
struct ctl_table_header *lblcr_ctl_header;
struct ctl_table *lblcr_ctl_table;
+#endif
+
/* ip_vs_est */
struct list_head est_list; /* estimator list */
spinlock_t est_lock;
--
1.7.2.3
^ permalink raw reply related
* [rfc] IPVS: Remove conditionally include sysctl code
From: Simon Horman @ 2011-02-04 9:46 UTC (permalink / raw)
To: netdev, netfilter-devel, netfilter, lvs-devel
Cc: Julian Anastasov, Hans Schillstrom
In the case where CONFIG_SYSCTL is undefined related
control code in IPVS is unnecessary. This patch series
attempts to make the inclusion of all such code
conditional on CONFIG_SYSCTL.
The changes are available in the git repository at:
git://git.kernel.org/pub/scm/linux/kernel/git/horms/lvs-test-2.6.git config-sysctl
I consider these changes as an RFC at this stage and would
appreciate review or other feedback.
If this series is successful I will examine what if any similar
changes are appropriate for the case where CONFIG_PROC_FS is undefined.
Simon Horman (14):
IPVS: Add ip_vs_route_me_harder()
IPVS: Add sysctl_snat_reroute()
IPVS: Add sysctl_nat_icmp_send()
IPVS: Add {sysctl_sync_threshold,period}()
IPVS: Add sysctl_sync_ver()
IPVS: Add sysctl_expire_nodest_conn()
IPVS: Add expire_quiescent_template()
IPVS: Conditinally use sysctl_lblc{r}_expiration
IPVS: ip_vs_todrop() becomes a noop when CONFIG_SYSCTL is undefined
IPVS: Conditional ip_vs_conntrack_enabled()
IPVS: Minimise ip_vs_leave when CONFIG_SYSCTL is undefined
IPVS: Conditionally define and use ip_vs_lblc{r}_table
IPVS: Add __ip_vs_control_{init,cleanup}_sysctl()
IPVS: Conditionally include sysctl members of struct netns_ipvs
include/net/ip_vs.h | 8 +++
include/net/netns/ip_vs.h | 61 ++++++++++++++++++---
net/netfilter/ipvs/ip_vs_conn.c | 13 ++++-
net/netfilter/ipvs/ip_vs_core.c | 98 ++++++++++++++++++++------------
net/netfilter/ipvs/ip_vs_ctl.c | 114 +++++++++++++++++++++++--------------
net/netfilter/ipvs/ip_vs_lblc.c | 31 ++++++++---
net/netfilter/ipvs/ip_vs_lblcr.c | 35 ++++++++---
net/netfilter/ipvs/ip_vs_sync.c | 8 +-
8 files changed, 256 insertions(+), 112 deletions(-)
^ permalink raw reply
* [PATCH 01/14] IPVS: Add ip_vs_route_me_harder()
From: Simon Horman @ 2011-02-04 9:46 UTC (permalink / raw)
To: netdev, netfilter-devel, netfilter, lvs-devel
Cc: Julian Anastasov, Hans Schillstrom, Simon Horman
In-Reply-To: <1296812782-14953-1-git-send-email-horms@verge.net.au>
Add ip_vs_route_me_harder() to avoid repeating the same code twice.
Signed-off-by: Simon Horman <horms@verge.net.au>
---
net/netfilter/ipvs/ip_vs_core.c | 48 +++++++++++++++++---------------------
1 files changed, 22 insertions(+), 26 deletions(-)
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 4d06617..c4c8199 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -631,6 +631,24 @@ static inline int ip_vs_gather_frags_v6(struct sk_buff *skb, u_int32_t user)
}
#endif
+static int ip_vs_route_me_harder(int af, struct sk_buff *skb)
+{
+ struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
+
+#ifdef CONFIG_IP_VS_IPV6
+ if (af == AF_INET6) {
+ if (ipvs->sysctl_snat_reroute && ip6_route_me_harder(skb) != 0)
+ return 1;
+ } else
+#endif
+ if ((ipvs->sysctl_snat_reroute ||
+ skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
+ ip_route_me_harder(skb, RTN_LOCAL) != 0)
+ return 1;
+
+ return 0;
+}
+
/*
* Packet has been made sufficiently writable in caller
* - inout: 1=in->out, 0=out->in
@@ -737,7 +755,6 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
struct ip_vs_protocol *pp,
unsigned int offset, unsigned int ihl)
{
- struct netns_ipvs *ipvs;
unsigned int verdict = NF_DROP;
if (IP_VS_FWD_METHOD(cp) != 0) {
@@ -759,8 +776,6 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
if (!skb_make_writable(skb, offset))
goto out;
- ipvs = net_ipvs(skb_net(skb));
-
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6)
ip_vs_nat_icmp_v6(skb, pp, cp, 1);
@@ -768,16 +783,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
#endif
ip_vs_nat_icmp(skb, pp, cp, 1);
-#ifdef CONFIG_IP_VS_IPV6
- if (af == AF_INET6) {
- if (ipvs->sysctl_snat_reroute && ip6_route_me_harder(skb) != 0)
- goto out;
- } else
-#endif
- if ((ipvs->sysctl_snat_reroute ||
- skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
- ip_route_me_harder(skb, RTN_LOCAL) != 0)
- goto out;
+ if (ip_vs_route_me_harder(af, skb))
+ goto out;
/* do the statistics and put it back */
ip_vs_out_stats(cp, skb);
@@ -986,7 +993,6 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
struct ip_vs_conn *cp, int ihl)
{
struct ip_vs_protocol *pp = pd->pp;
- struct netns_ipvs *ipvs;
IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet");
@@ -1022,18 +1028,8 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
* if it came from this machine itself. So re-compute
* the routing information.
*/
- ipvs = net_ipvs(skb_net(skb));
-
-#ifdef CONFIG_IP_VS_IPV6
- if (af == AF_INET6) {
- if (ipvs->sysctl_snat_reroute && ip6_route_me_harder(skb) != 0)
- goto drop;
- } else
-#endif
- if ((ipvs->sysctl_snat_reroute ||
- skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
- ip_route_me_harder(skb, RTN_LOCAL) != 0)
- goto drop;
+ if (ip_vs_route_me_harder(af, skb))
+ goto drop;
IP_VS_DBG_PKT(10, af, pp, skb, 0, "After SNAT");
--
1.7.2.3
^ permalink raw reply related
* [PATCH 10/14] IPVS: Conditional ip_vs_conntrack_enabled()
From: Simon Horman @ 2011-02-04 9:46 UTC (permalink / raw)
To: netdev, netfilter-devel, netfilter, lvs-devel
Cc: Julian Anastasov, Hans Schillstrom, Simon Horman
In-Reply-To: <1296812782-14953-1-git-send-email-horms@verge.net.au>
ip_vs_conntrack_enabled() becomes a noop when CONFIG_SYSCTL is undefined.
In preparation for not including sysctl_conntrack in
struct netns_ipvs when CONFIG_SYCTL is not defined.
Signed-off-by: Simon Horman <horms@verge.net.au>
---
include/net/ip_vs.h | 4 ++++
1 files changed, 4 insertions(+), 0 deletions(-)
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index e046e8b..48ba292 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -1210,7 +1210,11 @@ static inline void ip_vs_notrack(struct sk_buff *skb)
*/
static inline int ip_vs_conntrack_enabled(struct netns_ipvs *ipvs)
{
+#ifdef CONFIG_SYSCTL
return ipvs->sysctl_conntrack;
+#else
+ return 0;
+#endif
}
extern void ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp,
--
1.7.2.3
^ permalink raw reply related
* [PATCH 11/14] IPVS: Minimise ip_vs_leave when CONFIG_SYSCTL is undefined
From: Simon Horman @ 2011-02-04 9:46 UTC (permalink / raw)
To: netdev, netfilter-devel, netfilter, lvs-devel
Cc: Julian Anastasov, Hans Schillstrom, Simon Horman
In-Reply-To: <1296812782-14953-1-git-send-email-horms@verge.net.au>
Much of ip_vs_leave() is unnecessary if CONFIG_SYSCTL is undefined.
I tried an approach of breaking the now #ifdef'ed portions out
into a separate function. However this appeared to grow the
compiled code on x86_64 by about 200 bytes in the case where
CONFIG_SYSCTL is defined. So I have gone with the simpler though
less elegant #ifdef'ed solution for now.
Signed-off-by: Simon Horman <horms@verge.net.au>
---
net/netfilter/ipvs/ip_vs_core.c | 9 +++++++--
1 files changed, 7 insertions(+), 2 deletions(-)
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index ac13db1..ba1f039 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -499,11 +499,13 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
struct ip_vs_proto_data *pd)
{
- struct net *net;
- struct netns_ipvs *ipvs;
__be16 _ports[2], *pptr;
struct ip_vs_iphdr iph;
+#ifdef CONFIG_SYSCTL
+ struct net *net;
+ struct netns_ipvs *ipvs;
int unicast;
+#endif
ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
@@ -512,6 +514,8 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
ip_vs_service_put(svc);
return NF_DROP;
}
+
+#ifdef CONFIG_SYSCTL
net = skb_net(skb);
#ifdef CONFIG_IP_VS_IPV6
@@ -563,6 +567,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
ip_vs_conn_put(cp);
return ret;
}
+#endif
/*
* When the virtual ftp service is presented, packets destined
--
1.7.2.3
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox