* [PATCH 13/18] netfilter: nf_ct_icmp: add icmp_kmemdup[_compat]_sysctl_table function
From: pablo @ 2012-07-06 11:17 UTC (permalink / raw)
To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1341573428-3204-1-git-send-email-pablo@netfilter.org>
From: Gao feng <gaofeng@cn.fujitsu.com>
Split sysctl function into smaller chucks to cleanup code and prepare
patches to reduce ifdef pollution.
Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 41 ++++++++++++++++++++------
1 file changed, 32 insertions(+), 9 deletions(-)
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 76f7a2f..9c2095c 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -337,34 +337,57 @@ static struct ctl_table icmp_compat_sysctl_table[] = {
#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
#endif /* CONFIG_SYSCTL */
-static int icmp_init_net(struct net *net, u_int16_t proto)
+static int icmp_kmemdup_sysctl_table(struct nf_proto_net *pn,
+ struct nf_icmp_net *in)
{
- struct nf_icmp_net *in = icmp_pernet(net);
- struct nf_proto_net *pn = (struct nf_proto_net *)in;
- in->timeout = nf_ct_icmp_timeout;
-
#ifdef CONFIG_SYSCTL
pn->ctl_table = kmemdup(icmp_sysctl_table,
sizeof(icmp_sysctl_table),
GFP_KERNEL);
if (!pn->ctl_table)
return -ENOMEM;
+
pn->ctl_table[0].data = &in->timeout;
+#endif
+ return 0;
+}
+
+static int icmp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn,
+ struct nf_icmp_net *in)
+{
+#ifdef CONFIG_SYSCTL
#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
pn->ctl_compat_table = kmemdup(icmp_compat_sysctl_table,
sizeof(icmp_compat_sysctl_table),
GFP_KERNEL);
- if (!pn->ctl_compat_table) {
- kfree(pn->ctl_table);
- pn->ctl_table = NULL;
+ if (!pn->ctl_compat_table)
return -ENOMEM;
- }
+
pn->ctl_compat_table[0].data = &in->timeout;
#endif
#endif
return 0;
}
+static int icmp_init_net(struct net *net, u_int16_t proto)
+{
+ int ret;
+ struct nf_icmp_net *in = icmp_pernet(net);
+ struct nf_proto_net *pn = &in->pn;
+
+ in->timeout = nf_ct_icmp_timeout;
+
+ ret = icmp_kmemdup_compat_sysctl_table(pn, in);
+ if (ret < 0)
+ return ret;
+
+ ret = icmp_kmemdup_sysctl_table(pn, in);
+ if (ret < 0)
+ nf_ct_kfree_compat_sysctl_table(pn);
+
+ return ret;
+}
+
struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly =
{
.l3proto = PF_INET,
--
1.7.10
^ permalink raw reply related
* [PATCH 09/18] netfilter: nf_ct_udplite: add udplite_kmemdup_sysctl_table function
From: pablo @ 2012-07-06 11:16 UTC (permalink / raw)
To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1341573428-3204-1-git-send-email-pablo@netfilter.org>
From: Gao feng <gaofeng@cn.fujitsu.com>
This cleans up nf_conntrack_l4proto_udplite[4,6] and it prepares
the moving of the sysctl code to nf_conntrack_proto_*_sysctl.c
to reduce the ifdef pollution.
And use nf_proto_net.users to identify if it's the first time
we use the nf_proto_net, in that case, we initialize it.
Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
net/netfilter/nf_conntrack_proto_udplite.c | 43 +++++++++++++++++-----------
1 file changed, 26 insertions(+), 17 deletions(-)
diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c
index d33e511..4b66df2 100644
--- a/net/netfilter/nf_conntrack_proto_udplite.c
+++ b/net/netfilter/nf_conntrack_proto_udplite.c
@@ -234,29 +234,38 @@ static struct ctl_table udplite_sysctl_table[] = {
};
#endif /* CONFIG_SYSCTL */
-static int udplite_init_net(struct net *net, u_int16_t proto)
+static int udplite_kmemdup_sysctl_table(struct nf_proto_net *pn,
+ struct udplite_net *un)
{
- int i;
- struct udplite_net *un = udplite_pernet(net);
- struct nf_proto_net *pn = (struct nf_proto_net *)un;
#ifdef CONFIG_SYSCTL
- if (!pn->ctl_table) {
-#else
- if (!pn->users++) {
+ if (pn->ctl_table)
+ return 0;
+
+ pn->ctl_table = kmemdup(udplite_sysctl_table,
+ sizeof(udplite_sysctl_table),
+ GFP_KERNEL);
+ if (!pn->ctl_table)
+ return -ENOMEM;
+
+ pn->ctl_table[0].data = &un->timeouts[UDPLITE_CT_UNREPLIED];
+ pn->ctl_table[1].data = &un->timeouts[UDPLITE_CT_REPLIED];
#endif
+ return 0;
+}
+
+static int udplite_init_net(struct net *net, u_int16_t proto)
+{
+ struct udplite_net *un = udplite_pernet(net);
+ struct nf_proto_net *pn = &un->pn;
+
+ if (!pn->users) {
+ int i;
+
for (i = 0 ; i < UDPLITE_CT_MAX; i++)
un->timeouts[i] = udplite_timeouts[i];
-#ifdef CONFIG_SYSCTL
- pn->ctl_table = kmemdup(udplite_sysctl_table,
- sizeof(udplite_sysctl_table),
- GFP_KERNEL);
- if (!pn->ctl_table)
- return -ENOMEM;
- pn->ctl_table[0].data = &un->timeouts[UDPLITE_CT_UNREPLIED];
- pn->ctl_table[1].data = &un->timeouts[UDPLITE_CT_REPLIED];
-#endif
}
- return 0;
+
+ return udplite_kmemdup_sysctl_table(pn, un);
}
static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly =
--
1.7.10
^ permalink raw reply related
* [PATCH 07/18] netfilter: nf_ct_tcp: merge tcpv[4,6]_net_init into tcp_net_init
From: pablo @ 2012-07-06 11:16 UTC (permalink / raw)
To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1341573428-3204-1-git-send-email-pablo@netfilter.org>
From: Gao feng <gaofeng@cn.fujitsu.com>
Merge tcpv4_net_init and tcpv6_net_init into tcp_net_init to
remove redundant code now that we have the u_int16_t proto
parameter.
And use nf_proto_net.users to identify if it's the first time
we use the nf_proto_net, in that case, we initialize it.
Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
net/netfilter/nf_conntrack_proto_tcp.c | 71 ++++++++++----------------------
1 file changed, 21 insertions(+), 50 deletions(-)
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 6db9d3c..44f0da8 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -1533,11 +1533,10 @@ static struct ctl_table tcp_compat_sysctl_table[] = {
#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
#endif /* CONFIG_SYSCTL */
-static int tcp_kmemdup_sysctl_table(struct nf_proto_net *pn)
+static int tcp_kmemdup_sysctl_table(struct nf_proto_net *pn,
+ struct nf_tcp_net *tn)
{
#ifdef CONFIG_SYSCTL
- struct nf_tcp_net *tn = (struct nf_tcp_net *)pn;
-
if (pn->ctl_table)
return 0;
@@ -1564,11 +1563,11 @@ static int tcp_kmemdup_sysctl_table(struct nf_proto_net *pn)
return 0;
}
-static int tcp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn)
+static int tcp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn,
+ struct nf_tcp_net *tn)
{
#ifdef CONFIG_SYSCTL
#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
- struct nf_tcp_net *tn = (struct nf_tcp_net *)pn;
pn->ctl_compat_table = kmemdup(tcp_compat_sysctl_table,
sizeof(tcp_compat_sysctl_table),
GFP_KERNEL);
@@ -1593,18 +1592,15 @@ static int tcp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn)
return 0;
}
-static int tcpv4_init_net(struct net *net, u_int16_t proto)
+static int tcp_init_net(struct net *net, u_int16_t proto)
{
- int i;
- int ret = 0;
+ int ret;
struct nf_tcp_net *tn = tcp_pernet(net);
- struct nf_proto_net *pn = (struct nf_proto_net *)tn;
+ struct nf_proto_net *pn = &tn->pn;
+
+ if (!pn->users) {
+ int i;
-#ifdef CONFIG_SYSCTL
- if (!pn->ctl_table) {
-#else
- if (!pn->users++) {
-#endif
for (i = 0; i < TCP_CONNTRACK_TIMEOUT_MAX; i++)
tn->timeouts[i] = tcp_timeouts[i];
@@ -1613,45 +1609,20 @@ static int tcpv4_init_net(struct net *net, u_int16_t proto)
tn->tcp_max_retrans = nf_ct_tcp_max_retrans;
}
- ret = tcp_kmemdup_compat_sysctl_table(pn);
-
- if (ret < 0)
- return ret;
+ if (proto == AF_INET) {
+ ret = tcp_kmemdup_compat_sysctl_table(pn, tn);
+ if (ret < 0)
+ return ret;
- ret = tcp_kmemdup_sysctl_table(pn);
+ ret = tcp_kmemdup_sysctl_table(pn, tn);
+ if (ret < 0)
+ nf_ct_kfree_compat_sysctl_table(pn);
+ } else
+ ret = tcp_kmemdup_sysctl_table(pn, tn);
-#ifdef CONFIG_SYSCTL
-#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
- if (ret < 0) {
- kfree(pn->ctl_compat_table);
- pn->ctl_compat_table = NULL;
- }
-#endif
-#endif
return ret;
}
-static int tcpv6_init_net(struct net *net, u_int16_t proto)
-{
- int i;
- struct nf_tcp_net *tn = tcp_pernet(net);
- struct nf_proto_net *pn = (struct nf_proto_net *)tn;
-
-#ifdef CONFIG_SYSCTL
- if (!pn->ctl_table) {
-#else
- if (!pn->users++) {
-#endif
- for (i = 0; i < TCP_CONNTRACK_TIMEOUT_MAX; i++)
- tn->timeouts[i] = tcp_timeouts[i];
- tn->tcp_loose = nf_ct_tcp_loose;
- tn->tcp_be_liberal = nf_ct_tcp_be_liberal;
- tn->tcp_max_retrans = nf_ct_tcp_max_retrans;
- }
-
- return tcp_kmemdup_sysctl_table(pn);
-}
-
struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly =
{
.l3proto = PF_INET,
@@ -1684,7 +1655,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly =
.nla_policy = tcp_timeout_nla_policy,
},
#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
- .init_net = tcpv4_init_net,
+ .init_net = tcp_init_net,
};
EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp4);
@@ -1720,6 +1691,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly =
.nla_policy = tcp_timeout_nla_policy,
},
#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
- .init_net = tcpv6_init_net,
+ .init_net = tcp_init_net,
};
EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp6);
--
1.7.10
^ permalink raw reply related
* [PATCH 05/18] netfilter: nf_conntrack: use l4proto->users as refcount for per-net data
From: pablo @ 2012-07-06 11:16 UTC (permalink / raw)
To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1341573428-3204-1-git-send-email-pablo@netfilter.org>
From: Gao feng <gaofeng@cn.fujitsu.com>
Currently, nf_proto_net's l4proto->users meaning is quite confusing
since it depends on the compilation tweaks.
To resolve this, we cleanup this code to regard it as the refcount
for l4proto's per-net data, since there may be two l4protos use the
same per-net data.
Thus, we increment pn->users when nf_conntrack_l4proto_register
successfully, and decrement it for nf_conntrack_l4_unregister case.
The users refcnt is not required form layer 3 protocol trackers.
Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
net/netfilter/nf_conntrack_proto.c | 76 ++++++++++++++++++++++--------------
1 file changed, 46 insertions(+), 30 deletions(-)
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 9d6b6ab..63612e6 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -39,16 +39,13 @@ static int
nf_ct_register_sysctl(struct net *net,
struct ctl_table_header **header,
const char *path,
- struct ctl_table *table,
- unsigned int *users)
+ struct ctl_table *table)
{
if (*header == NULL) {
*header = register_net_sysctl(net, path, table);
if (*header == NULL)
return -ENOMEM;
}
- if (users != NULL)
- (*users)++;
return 0;
}
@@ -56,9 +53,9 @@ nf_ct_register_sysctl(struct net *net,
static void
nf_ct_unregister_sysctl(struct ctl_table_header **header,
struct ctl_table **table,
- unsigned int *users)
+ unsigned int users)
{
- if (users != NULL && --*users > 0)
+ if (users > 0)
return;
unregister_net_sysctl_table(*header);
@@ -191,8 +188,7 @@ static int nf_ct_l3proto_register_sysctl(struct net *net,
err = nf_ct_register_sysctl(net,
&in->ctl_table_header,
l3proto->ctl_table_path,
- in->ctl_table,
- NULL);
+ in->ctl_table);
if (err < 0) {
kfree(in->ctl_table);
in->ctl_table = NULL;
@@ -213,7 +209,7 @@ static void nf_ct_l3proto_unregister_sysctl(struct net *net,
if (in->ctl_table_header != NULL)
nf_ct_unregister_sysctl(&in->ctl_table_header,
&in->ctl_table,
- NULL);
+ 0);
#endif
}
@@ -329,20 +325,17 @@ static struct nf_proto_net *nf_ct_l4proto_net(struct net *net,
static
int nf_ct_l4proto_register_sysctl(struct net *net,
+ struct nf_proto_net *pn,
struct nf_conntrack_l4proto *l4proto)
{
int err = 0;
- struct nf_proto_net *pn = nf_ct_l4proto_net(net, l4proto);
- if (pn == NULL)
- return 0;
#ifdef CONFIG_SYSCTL
if (pn->ctl_table != NULL) {
err = nf_ct_register_sysctl(net,
&pn->ctl_table_header,
"net/netfilter",
- pn->ctl_table,
- &pn->users);
+ pn->ctl_table);
if (err < 0) {
if (!pn->users) {
kfree(pn->ctl_table);
@@ -356,15 +349,14 @@ int nf_ct_l4proto_register_sysctl(struct net *net,
err = nf_ct_register_sysctl(net,
&pn->ctl_compat_header,
"net/ipv4/netfilter",
- pn->ctl_compat_table,
- NULL);
+ pn->ctl_compat_table);
if (err == 0)
goto out;
nf_ct_kfree_compat_sysctl_table(pn);
nf_ct_unregister_sysctl(&pn->ctl_table_header,
&pn->ctl_table,
- &pn->users);
+ pn->users);
}
#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
out:
@@ -374,25 +366,21 @@ out:
static
void nf_ct_l4proto_unregister_sysctl(struct net *net,
+ struct nf_proto_net *pn,
struct nf_conntrack_l4proto *l4proto)
{
- struct nf_proto_net *pn = nf_ct_l4proto_net(net, l4proto);
- if (pn == NULL)
- return;
#ifdef CONFIG_SYSCTL
if (pn->ctl_table_header != NULL)
nf_ct_unregister_sysctl(&pn->ctl_table_header,
&pn->ctl_table,
- &pn->users);
+ pn->users);
#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
if (l4proto->l3proto != AF_INET6 && pn->ctl_compat_header != NULL)
nf_ct_unregister_sysctl(&pn->ctl_compat_header,
&pn->ctl_compat_table,
- NULL);
+ 0);
#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
-#else
- pn->users--;
#endif /* CONFIG_SYSCTL */
}
@@ -458,23 +446,32 @@ int nf_conntrack_l4proto_register(struct net *net,
struct nf_conntrack_l4proto *l4proto)
{
int ret = 0;
+ struct nf_proto_net *pn = NULL;
if (l4proto->init_net) {
ret = l4proto->init_net(net, l4proto->l3proto);
if (ret < 0)
- return ret;
+ goto out;
}
- ret = nf_ct_l4proto_register_sysctl(net, l4proto);
+ pn = nf_ct_l4proto_net(net, l4proto);
+ if (pn == NULL)
+ goto out;
+
+ ret = nf_ct_l4proto_register_sysctl(net, pn, l4proto);
if (ret < 0)
- return ret;
+ goto out;
if (net == &init_net) {
ret = nf_conntrack_l4proto_register_net(l4proto);
- if (ret < 0)
- nf_ct_l4proto_unregister_sysctl(net, l4proto);
+ if (ret < 0) {
+ nf_ct_l4proto_unregister_sysctl(net, pn, l4proto);
+ goto out;
+ }
}
+ pn->users++;
+out:
return ret;
}
EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_register);
@@ -499,10 +496,18 @@ nf_conntrack_l4proto_unregister_net(struct nf_conntrack_l4proto *l4proto)
void nf_conntrack_l4proto_unregister(struct net *net,
struct nf_conntrack_l4proto *l4proto)
{
+ struct nf_proto_net *pn = NULL;
+
if (net == &init_net)
nf_conntrack_l4proto_unregister_net(l4proto);
- nf_ct_l4proto_unregister_sysctl(net, l4proto);
+ pn = nf_ct_l4proto_net(net, l4proto);
+ if (pn == NULL)
+ return;
+
+ pn->users--;
+ nf_ct_l4proto_unregister_sysctl(net, pn, l4proto);
+
/* Remove all contrack entries for this protocol */
rtnl_lock();
nf_ct_iterate_cleanup(net, kill_l4proto, l4proto);
@@ -514,11 +519,15 @@ int nf_conntrack_proto_init(struct net *net)
{
unsigned int i;
int err;
+ struct nf_proto_net *pn = nf_ct_l4proto_net(net,
+ &nf_conntrack_l4proto_generic);
+
err = nf_conntrack_l4proto_generic.init_net(net,
nf_conntrack_l4proto_generic.l3proto);
if (err < 0)
return err;
err = nf_ct_l4proto_register_sysctl(net,
+ pn,
&nf_conntrack_l4proto_generic);
if (err < 0)
return err;
@@ -528,13 +537,20 @@ int nf_conntrack_proto_init(struct net *net)
rcu_assign_pointer(nf_ct_l3protos[i],
&nf_conntrack_l3proto_generic);
}
+
+ pn->users++;
return 0;
}
void nf_conntrack_proto_fini(struct net *net)
{
unsigned int i;
+ struct nf_proto_net *pn = nf_ct_l4proto_net(net,
+ &nf_conntrack_l4proto_generic);
+
+ pn->users--;
nf_ct_l4proto_unregister_sysctl(net,
+ pn,
&nf_conntrack_l4proto_generic);
if (net == &init_net) {
/* free l3proto protocol tables */
--
1.7.10
^ permalink raw reply related
* [PATCH 02/18] netfilter: nf_conntrack: fix nf_conntrack_l3proto_register
From: pablo @ 2012-07-06 11:16 UTC (permalink / raw)
To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1341573428-3204-1-git-send-email-pablo@netfilter.org>
From: Gao feng <gaofeng@cn.fujitsu.com>
Before commit 2c352f444ccfa966a1aa4fd8e9ee29381c467448
(netfilter: nf_conntrack: prepare namespace support for
l4 protocol trackers), we register sysctl before register
protocol tracker. Thus, if sysctl is registration fails,
the protocol tracker will not be registered.
After that commit, if sysctl registration fails, protocol
registration still remains, so we leave things in intermediate
state.
To fix this, this patch registers sysctl before protocols.
And if protocol registration fail, sysctl is unregistered.
Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
net/netfilter/nf_conntrack_proto.c | 36 +++++++++++++++++++++++-------------
1 file changed, 23 insertions(+), 13 deletions(-)
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 1ea9194..9bd88aa 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -253,18 +253,23 @@ int nf_conntrack_l3proto_register(struct net *net,
{
int ret = 0;
- if (net == &init_net)
- ret = nf_conntrack_l3proto_register_net(proto);
+ if (proto->init_net) {
+ ret = proto->init_net(net);
+ if (ret < 0)
+ return ret;
+ }
+ ret = nf_ct_l3proto_register_sysctl(net, proto);
if (ret < 0)
return ret;
- if (proto->init_net) {
- ret = proto->init_net(net);
+ if (net == &init_net) {
+ ret = nf_conntrack_l3proto_register_net(proto);
if (ret < 0)
- return ret;
+ nf_ct_l3proto_unregister_sysctl(net, proto);
}
- return nf_ct_l3proto_register_sysctl(net, proto);
+
+ return ret;
}
EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_register);
@@ -454,19 +459,24 @@ int nf_conntrack_l4proto_register(struct net *net,
struct nf_conntrack_l4proto *l4proto)
{
int ret = 0;
- if (net == &init_net)
- ret = nf_conntrack_l4proto_register_net(l4proto);
- if (ret < 0)
- return ret;
-
- if (l4proto->init_net)
+ if (l4proto->init_net) {
ret = l4proto->init_net(net);
+ if (ret < 0)
+ return ret;
+ }
+ ret = nf_ct_l4proto_register_sysctl(net, l4proto);
if (ret < 0)
return ret;
- return nf_ct_l4proto_register_sysctl(net, l4proto);
+ if (net == &init_net) {
+ ret = nf_conntrack_l4proto_register_net(l4proto);
+ if (ret < 0)
+ nf_ct_l4proto_unregister_sysctl(net, l4proto);
+ }
+
+ return ret;
}
EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_register);
--
1.7.10
^ permalink raw reply related
* Re: [PATCH net-next 1/2] ipv6: Initialize the struct rt6_info behind the dst_enty field
From: David Miller @ 2012-07-06 10:57 UTC (permalink / raw)
To: steffen.klassert; +Cc: eric.dumazet, netdev
In-Reply-To: <20120706093709.GH1869@secunet.com>
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Fri, 6 Jul 2012 11:37:09 +0200
> + new = (struct dst_entry *)rt;
>
> - new = &rt->dst;
Please do not fight the typing system, the existing "net = &rt->dst;"
assignment is the correct way to do this.
The same issue should be fixed in patch #2 as well.
Thank you.
^ permalink raw reply
* Hello! I guess you don't mind meeting an attractive and interesting girl, do you?)
From: Amberly Towles @ 2012-07-06 10:36 UTC (permalink / raw)
To: r_bracewell@yahoo.com
Hey, honey! How are u? What's new?
I am Amberly.
So, I don't know from what to start. Maybe from this..
One of friends of mine showed me ur pics and I felt in love them so much!
I'm searching for smart, sedate man for such a long time but there r so many dolts who are just banging and then leaving me.
I am sure u're from their quantity!
So, if you're a good fellow then I wait for your answer.
^ permalink raw reply
* Re: 3.4.1 and 3.5-rc1 Packet lost at 250Mb/s
From: Eric Dumazet @ 2012-07-06 10:13 UTC (permalink / raw)
To: adam.niescierowicz; +Cc: Netdev
In-Reply-To: <2d2806e8e8ecad5646524acf2c4c5bcf@justnet.pl>
On Fri, 2012-07-06 at 11:47 +0200, Nieścierowicz Adam wrote:
> Hello,
> Can I send something that will help determine the cause of the problem?
>
>
> W dniu 08.06.2012 11:41, Eric Dumazet napisał(a):
>
> > On Fri, 2012-06-08 at 10:58 +0200, Nieścierowicz Adam wrote:
> >
> >> Hello, recently we changed on the router kernel from 2.6.38.1 to
> >> 3.4.1
> >> and noticed 30% packet loss when traffic increases up to 250MB / s.
> >> Similar is for kernel 3.5-rc1 Here a link to ifstat
> >> http://wklej.org/id/767577/ [2]
> >
> > You should give as much as possible delails on your setup (hardware,
> > software)
> >
> > lspci
> > cat /proc/cpuinfo
> > cat /proc/interrupts
> > ifconfig -a
> > tc -s -d qdisc
> > dmesg
> > netstat -s
>
> currently running on 2.6.38.1 and traffic is 100Mb / s
>
> lspci: http://wklej.org/id/769102/
> /proc/cpuinfo: http://wklej.org/id/769104/
> /proc/interrupts: http://wklej.org/id/769106/
> ifconfig -a: http://wklej.org/id/769108/
> tc -s -d qdisc: http://wklej.org/id/769109/
> dmesg: here are some logs from iptables
> netstat -s: http://wklej.org/id/769110/
> lsmod: http://wklej.org/id/769117/
> /proc/net/softnet_stat: http://wklej.org/id/769116/
Same infos of 3.5-rcX kernel would be nice.
What NIC is eth0 ? (dmesg please)
It seems all network traffic on 2.6.38 is handled by a single cpu (cpu0)
(seen in /proc/interrupts)
I suspect that with 3.4 or 3.5 kernels, traffic is handled by many cpus
and they hit false sharing and contention.
You probably get better performance doing some affinity tuning :
For example,
eth0 serviced by cpu0
eth2 serviced by cpu1
eth3 serviced by cpu2
eth5 serviced by cpu3
and so on...
check and/or set /proc/irq/${NUM}/smp_affinity
^ permalink raw reply
* Re: [PATCH v2] fail dentry revalidation after namespace change
From: Eric W. Biederman @ 2012-07-06 9:51 UTC (permalink / raw)
To: Glauber Costa
Cc: linux-kernel, netdev, Andrew Morton, Greg Thelen, Serge Hallyn,
Tejun Heo, Greg Kroah-Hartman
In-Reply-To: <4FF6B37B.4040005@parallels.com>
Glauber Costa <glommer@parallels.com> writes:
> On 07/06/2012 01:37 PM, Eric W. Biederman wrote:
>> Glauber Costa <glommer@parallels.com> writes:
>>
>>> When we change the namespace tag of a sysfs entry, the associated dentry
>>> is still kept around. readdir() will work correctly and not display the
>>> old entries, but open() will still succeed, so will reads and writes.
>>
>> Note reads and writes of file handles open before the move should
>> continue to work.
>
> Well, yes. But do you see it as a big problem?
>
> This can probably be fixed as well, but I foresee a big hackishness in
> the way =p
At the moment it looks like a feature.
The only reason we bounce between different instances of sysfs is
because of the unfortunate sysfs directory layout that we need
to remain compatible with.
But I don't see it making much of a difference either way.
Eric
^ permalink raw reply
* Re: 3.4.1 and 3.5-rc1 Packet lost at 250Mb/s
From: Nieścierowicz Adam @ 2012-07-06 9:47 UTC (permalink / raw)
To: Eric Dumazet, Netdev
Hello,
Can I send something that will help determine the cause of the problem?
W dniu 08.06.2012 11:41, Eric Dumazet napisał(a):
> On Fri, 2012-06-08 at 10:58 +0200, Nieścierowicz Adam wrote:
>
>> Hello, recently we changed on the router kernel from 2.6.38.1 to
>> 3.4.1
>> and noticed 30% packet loss when traffic increases up to 250MB / s.
>> Similar is for kernel 3.5-rc1 Here a link to ifstat
>> http://wklej.org/id/767577/ [2]
>
> You should give as much as possible delails on your setup (hardware,
> software)
>
> lspci
> cat /proc/cpuinfo
> cat /proc/interrupts
> ifconfig -a
> tc -s -d qdisc
> dmesg
> netstat -s
currently running on 2.6.38.1 and traffic is 100Mb / s
lspci: http://wklej.org/id/769102/
/proc/cpuinfo: http://wklej.org/id/769104/
/proc/interrupts: http://wklej.org/id/769106/
ifconfig -a: http://wklej.org/id/769108/
tc -s -d qdisc: http://wklej.org/id/769109/
dmesg: here are some logs from iptables
netstat -s: http://wklej.org/id/769110/
lsmod: http://wklej.org/id/769117/
/proc/net/softnet_stat: http://wklej.org/id/769116/
^ permalink raw reply
* Re: [PATCH v2] fail dentry revalidation after namespace change
From: Glauber Costa @ 2012-07-06 9:44 UTC (permalink / raw)
To: Eric W. Biederman
Cc: linux-kernel, netdev, Andrew Morton, Greg Thelen, Serge Hallyn,
Tejun Heo, Greg Kroah-Hartman
In-Reply-To: <87hatli62r.fsf@xmission.com>
On 07/06/2012 01:37 PM, Eric W. Biederman wrote:
> Glauber Costa <glommer@parallels.com> writes:
>
>> When we change the namespace tag of a sysfs entry, the associated dentry
>> is still kept around. readdir() will work correctly and not display the
>> old entries, but open() will still succeed, so will reads and writes.
>
> Note reads and writes of file handles open before the move should
> continue to work.
Well, yes. But do you see it as a big problem?
This can probably be fixed as well, but I foresee a big hackishness in
the way =p
^ permalink raw reply
* [PATCH net-next 2/2] xfrm: Initialize the struct xfrm_dst behind the dst_enty field
From: Steffen Klassert @ 2012-07-06 9:39 UTC (permalink / raw)
To: David Miller, Eric Dumazet; +Cc: netdev
In-Reply-To: <20120706093709.GH1869@secunet.com>
We start initializing the struct xfrm_dst at the first field
behind the struct dst_enty. This is error prone because it
might leave a new field uninitialized. So start initializing
the struct xfrm_dst right behind the dst_entry.
Suggested-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
net/xfrm/xfrm_policy.c | 5 +++--
1 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 6e97855..79c498b 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1353,8 +1353,9 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
xdst = dst_alloc(dst_ops, NULL, 0, 0, 0);
if (likely(xdst)) {
- memset(&xdst->u.rt6.rt6i_table, 0,
- sizeof(*xdst) - sizeof(struct dst_entry));
+ struct dst_entry *dst = (struct dst_entry *)xdst;
+
+ memset(dst + 1, 0, sizeof(*xdst) - sizeof(*dst));
xdst->flo.ops = &xfrm_bundle_fc_ops;
} else
xdst = ERR_PTR(-ENOBUFS);
--
1.7.0.4
^ permalink raw reply related
* Re: [RFC] Introduce to batch variants of accept() and epoll_ctl() syscall
From: Li Yu @ 2012-07-06 9:38 UTC (permalink / raw)
To: Eric Dumazet
Cc: Changli Gao, Linux Netdev List, Linux Kernel Mailing List,
davidel
In-Reply-To: <1339750318.7491.70.camel@edumazet-glaptop>
于 2012年06月15日 16:51, Eric Dumazet 写道:
> On Fri, 2012-06-15 at 13:37 +0800, Li Yu wrote:
>
>> Of course, I think that implementing them should not be a hard work :)
>>
>> Em. I really do not know whether it is necessary to introduce to a new
>> syscall here. An alternative solution to add new socket option to handle
>> such batch requirement, so applications also can detect if kernel has
>> this extended ability with a easy getsockopt() call.
>>
>> Any way, I am going to try to write a prototype first.
>
> Before that, could you post the result of "perf top", or "perf
> record ...;perf report"
>
Sorry for I just have time to write a benchmark to reproduce this
problem on my test bed, below are results of "perf record -g -C 0".
kernel is 3.4.0:
Events: 7K cycles
+ 54.87% swapper [kernel.kallsyms] [k] poll_idle
- 3.10% :22984 [kernel.kallsyms] [k] _raw_spin_lock
- _raw_spin_lock
- 64.62% sch_direct_xmit
dev_queue_xmit
ip_finish_output
ip_output
- ip_local_out
+ 49.48% ip_queue_xmit
+ 37.48% ip_build_and_send_pkt
+ 13.04% ip_send_skb
I can not reproduce complete same high CPU usage on my testing
environment, but top show that it has similar ratio of sys% and
si% on one CPU:
Tasks: 125 total, 2 running, 123 sleeping, 0 stopped, 0 zombie
Cpu0 : 1.0%us, 30.7%sy, 0.0%ni, 18.8%id, 0.0%wa, 0.0%hi, 49.5%si,
0.0%st
Well, it seem that I must acknowledge I was wrong here. however,
I recall that I indeed ever encountered this in another benchmarking a
small packets performance.
I guess, this is since TX softirq and syscall context contend same lock
in sch_direct_xmit(), is this right?
thanks
Yu
>> The top shows the kernel is most cpu hog, the testing is simple,
>> just a accept() -> epoll_ctl(ADD) loop, the ratio of cpu util sys% to
>> si% is about 2:5.
>
> This ratio is not meaningful, if we dont know where time is spent.
>
>
> I doubt epoll_ctl(ADD) is a problem here...
>
> If it is, batching the fds wont speed the thing anyway...
>
> I believe accept() is the problem here, because it contends with the
> softirq processing the tcp session handshake.
>
>
>
>
^ permalink raw reply
* [PATCH net-next 1/2] ipv6: Initialize the struct rt6_info behind the dst_enty field
From: Steffen Klassert @ 2012-07-06 9:37 UTC (permalink / raw)
To: David Miller, Eric Dumazet; +Cc: netdev
We start initializing the struct rt6_info at the first field
behind the struct dst_enty. This is error prone because it
might leave a new field uninitialized. So start initializing
the struct rt6_info right behind the dst_entry.
Suggested-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
net/ipv6/route.c | 11 ++++++-----
1 files changed, 6 insertions(+), 5 deletions(-)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 6cc6c88..1d8459b 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -273,8 +273,9 @@ static inline struct rt6_info *ip6_dst_alloc(struct net *net,
0, 0, flags);
if (rt) {
- memset(&rt->n, 0,
- sizeof(*rt) - sizeof(struct dst_entry));
+ struct dst_entry *dst = (struct dst_entry *)rt;
+
+ memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
}
return rt;
@@ -975,10 +976,10 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
if (rt) {
- memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
- rt6_init_peer(rt, net->ipv6.peers);
+ new = (struct dst_entry *)rt;
- new = &rt->dst;
+ memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
+ rt6_init_peer(rt, net->ipv6.peers);
new->__use = 1;
new->input = dst_discard;
--
1.7.0.4
^ permalink raw reply related
* Re: [PATCH v2] fail dentry revalidation after namespace change
From: Eric W. Biederman @ 2012-07-06 9:37 UTC (permalink / raw)
To: Glauber Costa
Cc: linux-kernel, netdev, Andrew Morton, Greg Thelen, Serge Hallyn,
Tejun Heo, Greg Kroah-Hartman
In-Reply-To: <1341565747-15374-1-git-send-email-glommer@parallels.com>
Glauber Costa <glommer@parallels.com> writes:
> When we change the namespace tag of a sysfs entry, the associated dentry
> is still kept around. readdir() will work correctly and not display the
> old entries, but open() will still succeed, so will reads and writes.
Note reads and writes of file handles open before the move should
continue to work.
> This will no longer happen if sysfs is remounted, hinting that this is a
> cache-related problem.
>
> I am using the following sequence to demonstrate that:
>
> shell1:
> ip link add type veth
> unshare -nm
>
> shell2:
> ip link set veth1 <pid_of_shell_1>
> cat /sys/devices/virtual/net/veth1/ifindex
>
> Before that patch, this will succeed (fail to fail). After it, it will
> correctly return an error. Differently from a normal rename, which we
> handle fine, changing the object namespace will keep it's path intact.
> So this check seems necessary as well.
>
> [ v2: get type from parent, as suggested by Eric Biederman ]
Reviewed-by: "Eric W. Biederman" <ebiederm@xmission.com>
> Signed-off-by: Glauber Costa <glommer@parallels.com>
> CC: Tejun Heo <tj@kernel.org>
> CC: Eric W. Biederman <ebiederm@xmission.com>
> CC: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
> ---
> fs/sysfs/dir.c | 8 ++++++++
> 1 file changed, 8 insertions(+)
>
> diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
> index e6bb9b2..c0bf38a 100644
> --- a/fs/sysfs/dir.c
> +++ b/fs/sysfs/dir.c
> @@ -307,6 +307,7 @@ static int sysfs_dentry_revalidate(struct dentry *dentry, struct nameidata *nd)
> {
> struct sysfs_dirent *sd;
> int is_dir;
> + int type;
>
> if (nd->flags & LOOKUP_RCU)
> return -ECHILD;
> @@ -326,6 +327,13 @@ static int sysfs_dentry_revalidate(struct dentry *dentry, struct nameidata *nd)
> if (strcmp(dentry->d_name.name, sd->s_name) != 0)
> goto out_bad;
>
> + /* The sysfs dirent has been moved to a different namespace */
> + type = KOBJ_NS_TYPE_NONE;
> + if (sd->s_parent)
> + type = sysfs_ns_type(sd->s_parent);
> + if (type && (sysfs_info(dentry->d_sb)->ns[type] != sd->s_ns))
> + goto out_bad;
> +
> mutex_unlock(&sysfs_mutex);
> out_valid:
> return 1;
^ permalink raw reply
* Re: [net-next RFC V5 5/5] virtio_net: support negotiating the number of queues through ctrl vq
From: Jason Wang @ 2012-07-06 9:26 UTC (permalink / raw)
To: Stephen Hemminger
Cc: krkumar2, habanero, kvm, mst, netdev, mashirle, linux-kernel,
virtualization, edumazet, Sasha Levin, jwhan, sri, davem, tahm
In-Reply-To: <20120705233816.3ec0b827@nehalam.linuxnetplumber.net>
On 07/06/2012 02:38 PM, Stephen Hemminger wrote:
> On Fri, 06 Jul 2012 11:20:06 +0800
> Jason Wang<jasowang@redhat.com> wrote:
>
>> On 07/05/2012 08:51 PM, Sasha Levin wrote:
>>> On Thu, 2012-07-05 at 18:29 +0800, Jason Wang wrote:
>>>> @@ -1387,6 +1404,10 @@ static int virtnet_probe(struct virtio_device *vdev)
>>>> if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ))
>>>> vi->has_cvq = true;
>>>>
>>>> + /* Use single tx/rx queue pair as default */
>>>> + vi->num_queue_pairs = 1;
>>>> + vi->total_queue_pairs = num_queue_pairs;
>>> The code is using this "default" even if the amount of queue pairs it
>>> wants was specified during initialization. This basically limits any
>>> device to use 1 pair when starting up.
>>>
>> Yes, currently the virtio-net driver would use 1 txq/txq by default
>> since multiqueue may not outperform in all kinds of workload. So it's
>> better to keep it as default and let user enable multiqueue by ethtool -L.
>>
> I would prefer that the driver sized number of queues based on number
> of online CPU's. That is what real hardware does. What kind of workload
> are you doing? If it is some DBMS benchmark then maybe the issue is that
> some CPU's need to be reserved.
I run rr and stream test of netperf, and multiqueue shows improvement on
rr test and regression on small packet transmission in stream test. For
small packet transmission, multiqueue tends to send much more small
packets which also increase the cpu utilization. I suspect multiqueue is
faster and tcp does not merger big enough packet to send, but may need
more think.
^ permalink raw reply
* Re: BUG: unable to handle kernel paging request at 00000000d8be176d
From: Fengguang Wu @ 2012-07-06 8:34 UTC (permalink / raw)
To: David Miller; +Cc: eric.dumazet, netdev, steffen.klassert
In-Reply-To: <20120706.012921.1688197243231281609.davem@davemloft.net>
On Fri, Jul 06, 2012 at 01:29:21AM -0700, David Miller wrote:
> From: Fengguang Wu <wfg@linux.intel.com>
> Date: Fri, 6 Jul 2012 15:52:45 +0800
>
> > On Fri, Jul 06, 2012 at 03:37:45PM +0800, Fengguang Wu wrote:
> >> Hi David,
> >>
> >> > Fengguang Wu can I ask you politely not to quote the quilty patch in
> >> > it's entirety when reporting bugs? That screws up my workflow because
> >> > that patch goes then gets installed as a new patch in patchwork and I
> >> > have to therefore tick it off every time you report a bug.
> >>
> >> Sorry for that! Is it fine to _attach_ the referenced patch, or just
> >> a raw diff? Or, the commit SHA and subject are all you want to see?
> >
> > I used git-format-patch which makes a formal patch. How about git-show?
> > The output will be less like a formal patch, for example:
>
> No patch, in any format.
>
> It's completely pointless to attach the diff, anyone can use the
> commit log message and SHA ID to fetch the patch if they want.
OK!
> It's redundancy therefore also makes it a huge waste of bandwidth. I
> have no idea why you provide it in the first place.
I find it very convenient on myself for confirming the error/warning..
Thanks,
Fengguang
^ permalink raw reply
* Re: [net-next:master 294/295] drivers/net/ethernet/stmicro/stmmac/stmmac_main.c:287:3: error: implicit declaration of function 'phy_init_eee'
From: David Miller @ 2012-07-06 8:30 UTC (permalink / raw)
To: wfg; +Cc: peppe.cavallaro, netdev
In-Reply-To: <20120706081556.GA28281@localhost>
From: Fengguang Wu <wfg@linux.intel.com>
Date: Fri, 6 Jul 2012 16:15:56 +0800
> I'm not sure if net-next tree is rebase-able.
Never was, never will be.
Please start to tone down your reports, many are overly verbose and
largely in the end not useful.
^ permalink raw reply
* Re: BUG: unable to handle kernel paging request at 00000000d8be176d
From: David Miller @ 2012-07-06 8:29 UTC (permalink / raw)
To: wfg; +Cc: eric.dumazet, netdev, steffen.klassert
In-Reply-To: <20120706075245.GA28521@localhost>
From: Fengguang Wu <wfg@linux.intel.com>
Date: Fri, 6 Jul 2012 15:52:45 +0800
> On Fri, Jul 06, 2012 at 03:37:45PM +0800, Fengguang Wu wrote:
>> Hi David,
>>
>> > Fengguang Wu can I ask you politely not to quote the quilty patch in
>> > it's entirety when reporting bugs? That screws up my workflow because
>> > that patch goes then gets installed as a new patch in patchwork and I
>> > have to therefore tick it off every time you report a bug.
>>
>> Sorry for that! Is it fine to _attach_ the referenced patch, or just
>> a raw diff? Or, the commit SHA and subject are all you want to see?
>
> I used git-format-patch which makes a formal patch. How about git-show?
> The output will be less like a formal patch, for example:
No patch, in any format.
It's completely pointless to attach the diff, anyone can use the
commit log message and SHA ID to fetch the patch if they want.
It's redundancy therefore also makes it a huge waste of bandwidth. I
have no idea why you provide it in the first place.
^ permalink raw reply
* Re: BUG: unable to handle kernel paging request at 00000000d8be176d
From: David Miller @ 2012-07-06 8:26 UTC (permalink / raw)
To: wfg; +Cc: eric.dumazet, netdev, steffen.klassert
In-Reply-To: <20120706073745.GA28197@localhost>
From: Fengguang Wu <wfg@linux.intel.com>
Date: Fri, 6 Jul 2012 15:37:45 +0800
> Sorry for that! Is it fine to _attach_ the referenced patch, or just
> a raw diff? Or, the commit SHA and subject are all you want to see?
Don't provide the diff at all, in any form. Even if you attach it
patchwork can still parse it and queued it up.
^ permalink raw reply
* Re: [PATCH net-next v2 0/4] 6lowpan: Various bug fixes
From: Alexander Smirnov @ 2012-07-06 9:17 UTC (permalink / raw)
To: Tony Cheneau; +Cc: David S. Miller, netdev
In-Reply-To: <1341550225-13112-1-git-send-email-tony.cheneau@amnesiak.org>
> After reading and playing with the 6lowpan code, I found out a few issues. This
> patchset fixes them. This patchset should apply cleanly against the current
> net-next. It contains only bug fixes, I'll send later on an other patchset that
> will contain new functionalities.
>
> Alexander commented on the previous version of this patchset and made me
> understand that the commit messages were not specific enough. This new version
> hopefully improves that.
>
> This is a set of 4 small patches that correct bugs in 6lowpan:
> - patch 1 fixes a potential crash when reassembling UDP fragments
> - patch 2 fixes a type length issues that prevent the fragmentation reassembly
> to operate properly.
> - patch 3 and 4 corrects field encoding issues (byte order was not right)
>
> Hope it helps.
Looks good.
Just one note. Please add the project name into the header of your
patches, something like:
6lowpan: blablabla
^ permalink raw reply
* Re: [PATCH net-next] ipv6: Initialize the neighbour pointer of rt6_info on allocation
From: Steffen Klassert @ 2012-07-06 9:16 UTC (permalink / raw)
To: Eric Dumazet; +Cc: David Miller, netdev
In-Reply-To: <20120706065426.GF1869@secunet.com>
On Fri, Jul 06, 2012 at 08:54:26AM +0200, Steffen Klassert wrote:
> On Thu, Jul 05, 2012 at 05:16:44PM +0200, Eric Dumazet wrote:
> >
> > Hmm, could we find a way to avoid this for future changes ?
> >
> > We know dst_entry is the first field, so maybe :
> >
> > if (rt) {
> > struct dst_entry *dst = (struct dst_entry *)rt;
> >
> > memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
> >
>
> Yes, I think we need to do something like this.
>
> I've just noticed that ip6_blackhole_route, xfrm_alloc_dst,
> dn_route_output_slow and dn_route_input_slow have the same issue.
Actually, decnet is ok. So I'll send patches with the change you
suggested for ipv6 and xfrm.
^ permalink raw reply
* Re: [net-next:master 294/295] drivers/net/ethernet/stmicro/stmmac/stmmac_main.c:287:3: error: implicit declaration of function 'phy_init_eee'
From: Fengguang Wu @ 2012-07-06 9:10 UTC (permalink / raw)
To: David Miller; +Cc: peppe.cavallaro, netdev
In-Reply-To: <20120706.013055.1908685269314070614.davem@davemloft.net>
Hi David,
> > I'm not sure if net-next tree is rebase-able.
>
> Never was, never will be.
>
> Please start to tone down your reports, many are overly verbose and
> largely in the end not useful.
It took me a while to get to know your working style and preferences.
I'm aiming for the best quality service and will try to feed you with
the information that matters.
Thanks,
Fengguang
^ permalink raw reply
* [PATCH v2] fail dentry revalidation after namespace change
From: Glauber Costa @ 2012-07-06 9:09 UTC (permalink / raw)
To: linux-kernel
Cc: netdev, Andrew Morton, Greg Thelen, Serge Hallyn, Glauber Costa,
Tejun Heo, Eric W. Biederman, Greg Kroah-Hartman
When we change the namespace tag of a sysfs entry, the associated dentry
is still kept around. readdir() will work correctly and not display the
old entries, but open() will still succeed, so will reads and writes.
This will no longer happen if sysfs is remounted, hinting that this is a
cache-related problem.
I am using the following sequence to demonstrate that:
shell1:
ip link add type veth
unshare -nm
shell2:
ip link set veth1 <pid_of_shell_1>
cat /sys/devices/virtual/net/veth1/ifindex
Before that patch, this will succeed (fail to fail). After it, it will
correctly return an error. Differently from a normal rename, which we
handle fine, changing the object namespace will keep it's path intact.
So this check seems necessary as well.
[ v2: get type from parent, as suggested by Eric Biederman ]
Signed-off-by: Glauber Costa <glommer@parallels.com>
CC: Tejun Heo <tj@kernel.org>
CC: Eric W. Biederman <ebiederm@xmission.com>
CC: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
fs/sysfs/dir.c | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index e6bb9b2..c0bf38a 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -307,6 +307,7 @@ static int sysfs_dentry_revalidate(struct dentry *dentry, struct nameidata *nd)
{
struct sysfs_dirent *sd;
int is_dir;
+ int type;
if (nd->flags & LOOKUP_RCU)
return -ECHILD;
@@ -326,6 +327,13 @@ static int sysfs_dentry_revalidate(struct dentry *dentry, struct nameidata *nd)
if (strcmp(dentry->d_name.name, sd->s_name) != 0)
goto out_bad;
+ /* The sysfs dirent has been moved to a different namespace */
+ type = KOBJ_NS_TYPE_NONE;
+ if (sd->s_parent)
+ type = sysfs_ns_type(sd->s_parent);
+ if (type && (sysfs_info(dentry->d_sb)->ns[type] != sd->s_ns))
+ goto out_bad;
+
mutex_unlock(&sysfs_mutex);
out_valid:
return 1;
--
1.7.10.4
^ permalink raw reply related
* Re: [PATCH] force dentry revalidation after namespace change
From: Glauber Costa @ 2012-07-06 9:00 UTC (permalink / raw)
To: Eric W. Biederman
Cc: linux-kernel, netdev, Andrew Morton, Tejun Heo,
Greg Kroah-Hartman
In-Reply-To: <8762a1vl76.fsf@xmission.com>
On 07/06/2012 03:31 AM, Eric W. Biederman wrote:
> The important difference there it is the directory that the dirent is
> in that the type comes from. Not the dirent itself.
>
>> > /* The sysfs dirent has been deleted */
>> > if (sd->s_flags & SYSFS_FLAG_REMOVED)
>> > goto out_bad;
> Glauber. Do you think you can fix your patch and resubmit.
>
> Eric
Yes. In a quick test it seems to work. I'll resubmit shortly.
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox