* [PATCH 1/2] IPVS: take care of return value from protocol init_netns @ 2012-04-16 11:39 Hans Schillstrom 2012-04-16 11:39 ` [PATCH 2/2] IPVS: make failure of netns init more stable Hans Schillstrom 0 siblings, 1 reply; 8+ messages in thread From: Hans Schillstrom @ 2012-04-16 11:39 UTC (permalink / raw) To: horms, ja, wensong, lvs-devel, netdev, netfilter-devel Cc: hans, Hans Schillstrom ip_vs_create_timeout_table() can return NULL All functions protocol init_netns is affected of this patch. Signed-off-by: Hans Schillstrom <hans.schillstrom@ericsson.com> --- include/net/ip_vs.h | 2 +- net/netfilter/ipvs/ip_vs_proto.c | 2 +- net/netfilter/ipvs/ip_vs_proto_sctp.c | 5 ++++- net/netfilter/ipvs/ip_vs_proto_tcp.c | 5 ++++- net/netfilter/ipvs/ip_vs_proto_udp.c | 5 ++++- 5 files changed, 14 insertions(+), 5 deletions(-) diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index a903a82..04e2211 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -393,7 +393,7 @@ struct ip_vs_protocol { void (*exit)(struct ip_vs_protocol *pp); - void (*init_netns)(struct net *net, struct ip_vs_proto_data *pd); + int (*init_netns)(struct net *net, struct ip_vs_proto_data *pd); void (*exit_netns)(struct net *net, struct ip_vs_proto_data *pd); diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c index 643ff67..c4d73c2 100644 --- a/net/netfilter/ipvs/ip_vs_proto.c +++ b/net/netfilter/ipvs/ip_vs_proto.c @@ -79,7 +79,7 @@ register_ip_vs_proto_netns(struct net *net, struct ip_vs_protocol *pp) atomic_set(&pd->appcnt, 0); /* Init app counter */ if (pp->init_netns != NULL) - pp->init_netns(net, pd); + return pp->init_netns(net, pd); return 0; } diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index 1fbf7a2..9f3fb75 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -1090,7 +1090,7 @@ out: * timeouts is netns related now. * --------------------------------------------- */ -static void __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd) +static int __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd) { struct netns_ipvs *ipvs = net_ipvs(net); @@ -1098,6 +1098,9 @@ static void __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd) spin_lock_init(&ipvs->sctp_app_lock); pd->timeout_table = ip_vs_create_timeout_table((int *)sctp_timeouts, sizeof(sctp_timeouts)); + if (!pd->timeout_table) + return -ENOMEM; + return 0; } static void __ip_vs_sctp_exit(struct net *net, struct ip_vs_proto_data *pd) diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index ef8641f..cd609cc 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -677,7 +677,7 @@ void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp) * timeouts is netns related now. * --------------------------------------------- */ -static void __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd) +static int __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd) { struct netns_ipvs *ipvs = net_ipvs(net); @@ -685,7 +685,10 @@ static void __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd) spin_lock_init(&ipvs->tcp_app_lock); pd->timeout_table = ip_vs_create_timeout_table((int *)tcp_timeouts, sizeof(tcp_timeouts)); + if (!pd->timeout_table) + return -ENOMEM; pd->tcp_state_table = tcp_states; + return 0; } static void __ip_vs_tcp_exit(struct net *net, struct ip_vs_proto_data *pd) diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c index f4b7262..2fedb2d 100644 --- a/net/netfilter/ipvs/ip_vs_proto_udp.c +++ b/net/netfilter/ipvs/ip_vs_proto_udp.c @@ -467,7 +467,7 @@ udp_state_transition(struct ip_vs_conn *cp, int direction, cp->timeout = pd->timeout_table[IP_VS_UDP_S_NORMAL]; } -static void __udp_init(struct net *net, struct ip_vs_proto_data *pd) +static int __udp_init(struct net *net, struct ip_vs_proto_data *pd) { struct netns_ipvs *ipvs = net_ipvs(net); @@ -475,6 +475,9 @@ static void __udp_init(struct net *net, struct ip_vs_proto_data *pd) spin_lock_init(&ipvs->udp_app_lock); pd->timeout_table = ip_vs_create_timeout_table((int *)udp_timeouts, sizeof(udp_timeouts)); + if (!pd->timeout_table) + return -ENOMEM; + return 0; } static void __udp_exit(struct net *net, struct ip_vs_proto_data *pd) -- 1.7.2.3 ^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 2/2] IPVS: make failure of netns init more stable 2012-04-16 11:39 [PATCH 1/2] IPVS: take care of return value from protocol init_netns Hans Schillstrom @ 2012-04-16 11:39 ` Hans Schillstrom 2012-04-16 14:25 ` Julian Anastasov 0 siblings, 1 reply; 8+ messages in thread From: Hans Schillstrom @ 2012-04-16 11:39 UTC (permalink / raw) To: horms, ja, wensong, lvs-devel, netdev, netfilter-devel Cc: hans, Hans Schillstrom Add a IP_VS_F_NET_INIT_OK flag that other modules can use for check of a successful init of ip_vs Signed-off-by: Hans Schillstrom <hans.schillstrom@ericsson.com> --- include/net/ip_vs.h | 20 +++++++++++++++++++- net/netfilter/ipvs/ip_vs_conn.c | 2 +- net/netfilter/ipvs/ip_vs_core.c | 15 +++++++++------ net/netfilter/ipvs/ip_vs_ctl.c | 2 +- net/netfilter/ipvs/ip_vs_ftp.c | 2 ++ net/netfilter/ipvs/ip_vs_lblc.c | 3 +++ net/netfilter/ipvs/ip_vs_lblcr.c | 3 +++ 7 files changed, 38 insertions(+), 9 deletions(-) diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 04e2211..96add20 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -787,7 +787,8 @@ struct ip_vs_app { /* IPVS in network namespace */ struct netns_ipvs { int gen; /* Generation */ - int enable; /* enable like nf_hooks do */ + /* Status flags: enable like nf_hooks, and netns init ok */ + int status; /* * Hash table: for real service lookups */ @@ -909,6 +910,23 @@ struct netns_ipvs { struct net *net; /* Needed by timer routines */ }; +/* + * Status flags for each netns + */ +enum { + IP_VS_ENABLE, /* Enabled ipvs i.e. service registered */ + IP_VS_NET_INIT_OK, /* Netns init is OK */ + IP_VS_F_ENABLE = (1 << IP_VS_ENABLE), + IP_VS_F_NET_INIT_OK = (1 << IP_VS_NET_INIT_OK) +}; + +#define IS_IPVS_ENABLED(ipvs) (ipvs->status & IP_VS_F_ENABLE) +#define IPVS_ENABLE(ipvs) (ipvs->status |= IP_VS_F_ENABLE) +#define IPVS_DISABLE(ipvs) (ipvs->status &= ~IP_VS_F_ENABLE) + +#define IS_IPVS_NETNS_OK(ipvs) (ipvs->status & IP_VS_F_NET_INIT_OK) +#define IPVS_NETNS_OK(ipvs) (ipvs->status |= IP_VS_F_NET_INIT_OK) + #define DEFAULT_SYNC_THRESHOLD 3 #define DEFAULT_SYNC_PERIOD 50 #define DEFAULT_SYNC_VER 1 diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 4a09b78..4f2a7ec 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -783,7 +783,7 @@ static void ip_vs_conn_expire(unsigned long data) * conntrack cleanup for the net. */ smp_rmb(); - if (ipvs->enable) + if (IS_IPVS_ENABLED(ipvs)) ip_vs_conn_drop_conntrack(cp); } diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index b5a5c73..58722a2 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1112,7 +1112,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) return NF_ACCEPT; net = skb_net(skb); - if (!net_ipvs(net)->enable) + if (!IS_IPVS_ENABLED(net_ipvs(net))) return NF_ACCEPT; ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); @@ -1513,7 +1513,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) } /* ipvs enabled in this netns ? */ net = skb_net(skb); - if (!net_ipvs(net)->enable) + if (!IS_IPVS_ENABLED(net_ipvs(net))) return NF_ACCEPT; ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); @@ -1735,7 +1735,7 @@ ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb, /* ipvs enabled in this netns ? */ net = skb_net(skb); - if (!net_ipvs(net)->enable) + if (!IS_IPVS_ENABLED(net_ipvs(net))) return NF_ACCEPT; return ip_vs_in_icmp(skb, &r, hooknum); @@ -1755,7 +1755,7 @@ ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb, /* ipvs enabled in this netns ? */ net = skb_net(skb); - if (!net_ipvs(net)->enable) + if (!IS_IPVS_ENABLED(net_ipvs(net))) return NF_ACCEPT; return ip_vs_in_icmp_v6(skb, &r, hooknum); @@ -1881,7 +1881,7 @@ static int __net_init __ip_vs_init(struct net *net) return -ENOMEM; /* Hold the beast until a service is registerd */ - ipvs->enable = 0; + ipvs->status = 0; ipvs->net = net; /* Counters used for creating unique names */ ipvs->gen = atomic_read(&ipvs_netns_cnt); @@ -1906,6 +1906,7 @@ static int __net_init __ip_vs_init(struct net *net) if (ip_vs_sync_net_init(net) < 0) goto sync_fail; + IPVS_NETNS_OK(ipvs); printk(KERN_INFO "IPVS: Creating netns size=%zu id=%d\n", sizeof(struct netns_ipvs), ipvs->gen); return 0; @@ -1924,6 +1925,7 @@ protocol_fail: control_fail: ip_vs_estimator_net_cleanup(net); estimator_fail: + ipvs->status = 0; /* Nothing is enabled */ return -ENOMEM; } @@ -1936,12 +1938,13 @@ static void __net_exit __ip_vs_cleanup(struct net *net) ip_vs_control_net_cleanup(net); ip_vs_estimator_net_cleanup(net); IP_VS_DBG(2, "ipvs netns %d released\n", net_ipvs(net)->gen); + net->ipvs = NULL; } static void __net_exit __ip_vs_dev_cleanup(struct net *net) { EnterFunction(2); - net_ipvs(net)->enable = 0; /* Disable packet reception */ + IPVS_DISABLE(net_ipvs(net)); /* Disable packet reception */ smp_wmb(); ip_vs_sync_net_cleanup(net); LeaveFunction(2); diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index b8d0df7..5392de9 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1221,7 +1221,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, *svc_p = svc; /* Now there is a service - full throttle */ - ipvs->enable = 1; + IPVS_ENABLE(ipvs); return 0; diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index debb8c7..6bc2420 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -439,6 +439,8 @@ static int __net_init __ip_vs_ftp_init(struct net *net) struct ip_vs_app *app; struct netns_ipvs *ipvs = net_ipvs(net); + if (!IS_IPVS_NETNS_OK(ipvs)) + return -EUNATCH; app = kmemdup(&ip_vs_ftp, sizeof(struct ip_vs_app), GFP_KERNEL); if (!app) return -ENOMEM; diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index 27c24f1..f158f0c 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -551,6 +551,9 @@ static int __net_init __ip_vs_lblc_init(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); + if (!IS_IPVS_NETNS_OK(ipvs)) + return -EUNATCH; + if (!net_eq(net, &init_net)) { ipvs->lblc_ctl_table = kmemdup(vs_vars_table, sizeof(vs_vars_table), diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index 7498756..aeecda4 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -745,6 +745,9 @@ static int __net_init __ip_vs_lblcr_init(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); + if (!IS_IPVS_NETNS_OK(ipvs)) + return -EUNATCH; + if (!net_eq(net, &init_net)) { ipvs->lblcr_ctl_table = kmemdup(vs_vars_table, sizeof(vs_vars_table), -- 1.7.2.3 ^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH 2/2] IPVS: make failure of netns init more stable 2012-04-16 11:39 ` [PATCH 2/2] IPVS: make failure of netns init more stable Hans Schillstrom @ 2012-04-16 14:25 ` Julian Anastasov 2012-04-16 17:16 ` Hans Schillstrom 2012-04-17 11:15 ` Hans Schillstrom 0 siblings, 2 replies; 8+ messages in thread From: Julian Anastasov @ 2012-04-16 14:25 UTC (permalink / raw) To: Hans Schillstrom; +Cc: horms, wensong, lvs-devel, netdev, netfilter-devel, hans Hello, On Mon, 16 Apr 2012, Hans Schillstrom wrote: > Add a IP_VS_F_NET_INIT_OK flag > that other modules can use for check of a successful init of ip_vs > > Signed-off-by: Hans Schillstrom <hans.schillstrom@ericsson.com> > --- > diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c > index b5a5c73..58722a2 100644 > --- a/net/netfilter/ipvs/ip_vs_core.c > +++ b/net/netfilter/ipvs/ip_vs_core.c > @@ -1881,7 +1881,7 @@ static int __net_init __ip_vs_init(struct net *net) > return -ENOMEM; > > /* Hold the beast until a service is registerd */ > - ipvs->enable = 0; > + ipvs->status = 0; > ipvs->net = net; > /* Counters used for creating unique names */ > ipvs->gen = atomic_read(&ipvs_netns_cnt); > @@ -1906,6 +1906,7 @@ static int __net_init __ip_vs_init(struct net *net) > if (ip_vs_sync_net_init(net) < 0) > goto sync_fail; > > + IPVS_NETNS_OK(ipvs); > printk(KERN_INFO "IPVS: Creating netns size=%zu id=%d\n", > sizeof(struct netns_ipvs), ipvs->gen); > return 0; > @@ -1924,6 +1925,7 @@ protocol_fail: > control_fail: > ip_vs_estimator_net_cleanup(net); > estimator_fail: > + ipvs->status = 0; /* Nothing is enabled */ While 1st patch looks ok, keeping net->ipvs after failure is not going to work. It seems you ignored the patch I already posted. We duplicate the pointer in net->ipvs but it should not be used after free. Note that net_generic() and net->ipvs can not be used after ops_exit/ops_free and failed ops_init. But I see some inconsistency in net/core/net_namespace.c: __register_pernet_operations when CONFIG_NET_NS is enabled does not call ops_free after failed ops_init while when CONFIG_NET_NS is not enabled ops_free is called. The problem is that we leak the ops->size data allocated for the failed net. I think, the fix should be ops_init to free the data. The 2nd problem is that after ops_free the net_generic pointer remains assigned in ptr[id-1], even after being freed. Not sure if recent changes to net_generic() can deal with freed data. This BUG_ON(!ptr) can not catch problems if one subsys uses net_generic() for another subsys that can be unregistered. I'm going to test such fix for ops_init: [PATCH] netns: do not leak net_generic data on failed init ops_init should free the net_generic data on init failure and __register_pernet_operations should not call ops_free when NET_NS is not enabled. Signed-off-by: Julian Anastasov <ja@ssi.bg> --- net/core/net_namespace.c | 33 ++++++++++++++++++--------------- 1 files changed, 18 insertions(+), 15 deletions(-) diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 0e950fd..31a5ae5 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -83,21 +83,29 @@ assign: static int ops_init(const struct pernet_operations *ops, struct net *net) { - int err; + int err = -ENOMEM; + void *data = NULL; + if (ops->id && ops->size) { - void *data = kzalloc(ops->size, GFP_KERNEL); + data = kzalloc(ops->size, GFP_KERNEL); if (!data) - return -ENOMEM; + goto out; err = net_assign_generic(net, *ops->id, data); - if (err) { - kfree(data); - return err; - } + if (err) + goto cleanup; } + err = 0; if (ops->init) - return ops->init(net); - return 0; + err = ops->init(net); + if (!err) + return 0; + +cleanup: + kfree(data); + +out: + return err; } static void ops_free(const struct pernet_operations *ops, struct net *net) @@ -448,12 +456,7 @@ static void __unregister_pernet_operations(struct pernet_operations *ops) static int __register_pernet_operations(struct list_head *list, struct pernet_operations *ops) { - int err = 0; - err = ops_init(ops, &init_net); - if (err) - ops_free(ops, &init_net); - return err; - + return ops_init(ops, &init_net); } static void __unregister_pernet_operations(struct pernet_operations *ops) -- 1.7.3.4 > diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c > index debb8c7..6bc2420 100644 > --- a/net/netfilter/ipvs/ip_vs_ftp.c > +++ b/net/netfilter/ipvs/ip_vs_ftp.c > @@ -439,6 +439,8 @@ static int __net_init __ip_vs_ftp_init(struct net *net) > struct ip_vs_app *app; > struct netns_ipvs *ipvs = net_ipvs(net); > > + if (!IS_IPVS_NETNS_OK(ipvs)) > + return -EUNATCH; I already provided patch for ip_vs_ftp. > app = kmemdup(&ip_vs_ftp, sizeof(struct ip_vs_app), GFP_KERNEL); > if (!app) > return -ENOMEM; > diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c > index 27c24f1..f158f0c 100644 > --- a/net/netfilter/ipvs/ip_vs_lblc.c > +++ b/net/netfilter/ipvs/ip_vs_lblc.c > @@ -551,6 +551,9 @@ static int __net_init __ip_vs_lblc_init(struct net *net) > { > struct netns_ipvs *ipvs = net_ipvs(net); > > + if (!IS_IPVS_NETNS_OK(ipvs)) > + return -EUNATCH; > + As for LBLC* they need just net->ipvs != NULL check because net_generic can not be trusted for other subsys. > if (!net_eq(net, &init_net)) { > ipvs->lblc_ctl_table = kmemdup(vs_vars_table, > sizeof(vs_vars_table), > diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c > index 7498756..aeecda4 100644 > --- a/net/netfilter/ipvs/ip_vs_lblcr.c > +++ b/net/netfilter/ipvs/ip_vs_lblcr.c > @@ -745,6 +745,9 @@ static int __net_init __ip_vs_lblcr_init(struct net *net) > { > struct netns_ipvs *ipvs = net_ipvs(net); > > + if (!IS_IPVS_NETNS_OK(ipvs)) > + return -EUNATCH; > + > if (!net_eq(net, &init_net)) { > ipvs->lblcr_ctl_table = kmemdup(vs_vars_table, > sizeof(vs_vars_table), > -- > 1.7.2.3 Regards -- Julian Anastasov <ja@ssi.bg> ^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH 2/2] IPVS: make failure of netns init more stable 2012-04-16 14:25 ` Julian Anastasov @ 2012-04-16 17:16 ` Hans Schillstrom 2012-04-16 17:31 ` Julian Anastasov 2012-04-17 11:15 ` Hans Schillstrom 1 sibling, 1 reply; 8+ messages in thread From: Hans Schillstrom @ 2012-04-16 17:16 UTC (permalink / raw) To: Julian Anastasov Cc: horms@verge.net.au, wensong@linux-vs.org, lvs-devel@vger.kernel.org, netdev@vger.kernel.org, netfilter-devel@vger.kernel.org, hans@schillstrom.com Hello On Monday 16 April 2012 16:25:23 Julian Anastasov wrote: > > Hello, > > On Mon, 16 Apr 2012, Hans Schillstrom wrote: > > > Add a IP_VS_F_NET_INIT_OK flag > > that other modules can use for check of a successful init of ip_vs > > > > Signed-off-by: Hans Schillstrom <hans.schillstrom@ericsson.com> > > --- > > > diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c > > index b5a5c73..58722a2 100644 > > --- a/net/netfilter/ipvs/ip_vs_core.c > > +++ b/net/netfilter/ipvs/ip_vs_core.c > > > @@ -1881,7 +1881,7 @@ static int __net_init __ip_vs_init(struct net *net) > > return -ENOMEM; > > > > /* Hold the beast until a service is registerd */ > > - ipvs->enable = 0; > > + ipvs->status = 0; > > ipvs->net = net; > > /* Counters used for creating unique names */ > > ipvs->gen = atomic_read(&ipvs_netns_cnt); > > @@ -1906,6 +1906,7 @@ static int __net_init __ip_vs_init(struct net *net) > > if (ip_vs_sync_net_init(net) < 0) > > goto sync_fail; > > > > + IPVS_NETNS_OK(ipvs); > > printk(KERN_INFO "IPVS: Creating netns size=%zu id=%d\n", > > sizeof(struct netns_ipvs), ipvs->gen); > > return 0; > > @@ -1924,6 +1925,7 @@ protocol_fail: > > control_fail: > > ip_vs_estimator_net_cleanup(net); > > estimator_fail: > > + ipvs->status = 0; /* Nothing is enabled */ > > While 1st patch looks ok, keeping net->ipvs after > failure is not going to work. It seems you ignored the patch > I already posted. We duplicate the pointer in net->ipvs but > it should not be used after free. > Sorry, I'll revert that. > Note that net_generic() and net->ipvs can not be > used after ops_exit/ops_free and failed ops_init. True, when ip_vs(.ko) fails. I missed that. > > But I see some inconsistency in net/core/net_namespace.c: > __register_pernet_operations when CONFIG_NET_NS is enabled > does not call ops_free after failed ops_init while when > CONFIG_NET_NS is not enabled ops_free is called. The > problem is that we leak the ops->size data allocated for the > failed net. I think, the fix should be ops_init to free the data. Are you sure ? In my code it does... static int __register_pernet_operations(struct list_head *list, struct pernet_operations *ops) at line 417 .. for_each_net(net) { error = ops_init(ops, net); if (error) goto out_undo; ... line 426 out_undo: /* If I have an error cleanup all namespaces I initialized */ list_del(&ops->list); ops_exit_list(ops, &net_exit_list); ops_free_list(ops, &net_exit_list); return error; } > The 2nd problem is that after ops_free the > net_generic pointer remains assigned in ptr[id-1], even > after being freed. Not sure if recent changes to net_generic() > can deal with freed data. This BUG_ON(!ptr) can not catch > problems if one subsys uses net_generic() for another > subsys that can be unregistered. > > I'm going to test such fix for ops_init: > > [PATCH] netns: do not leak net_generic data on failed init > > ops_init should free the net_generic data on > init failure and __register_pernet_operations should not > call ops_free when NET_NS is not enabled. > > Signed-off-by: Julian Anastasov <ja@ssi.bg> > --- > net/core/net_namespace.c | 33 ++++++++++++++++++--------------- > 1 files changed, 18 insertions(+), 15 deletions(-) > > diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c > index 0e950fd..31a5ae5 100644 > --- a/net/core/net_namespace.c > +++ b/net/core/net_namespace.c > @@ -83,21 +83,29 @@ assign: > > static int ops_init(const struct pernet_operations *ops, struct net *net) > { > - int err; > + int err = -ENOMEM; > + void *data = NULL; > + > if (ops->id && ops->size) { > - void *data = kzalloc(ops->size, GFP_KERNEL); > + data = kzalloc(ops->size, GFP_KERNEL); > if (!data) > - return -ENOMEM; > + goto out; > > err = net_assign_generic(net, *ops->id, data); > - if (err) { > - kfree(data); > - return err; > - } > + if (err) > + goto cleanup; > } > + err = 0; > if (ops->init) > - return ops->init(net); > - return 0; > + err = ops->init(net); > + if (!err) > + return 0; > + > +cleanup: > + kfree(data); > + > +out: > + return err; > } > > static void ops_free(const struct pernet_operations *ops, struct net *net) > @@ -448,12 +456,7 @@ static void __unregister_pernet_operations(struct pernet_operations *ops) > static int __register_pernet_operations(struct list_head *list, > struct pernet_operations *ops) > { > - int err = 0; > - err = ops_init(ops, &init_net); > - if (err) > - ops_free(ops, &init_net); > - return err; > - > + return ops_init(ops, &init_net); > } > > static void __unregister_pernet_operations(struct pernet_operations *ops) -- Regards Hans Schillstrom <hans.schillstrom@ericsson.com> ^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 2/2] IPVS: make failure of netns init more stable 2012-04-16 17:16 ` Hans Schillstrom @ 2012-04-16 17:31 ` Julian Anastasov 2012-04-16 17:45 ` Hans Schillstrom 0 siblings, 1 reply; 8+ messages in thread From: Julian Anastasov @ 2012-04-16 17:31 UTC (permalink / raw) To: Hans Schillstrom Cc: horms@verge.net.au, wensong@linux-vs.org, lvs-devel@vger.kernel.org, netdev@vger.kernel.org, netfilter-devel@vger.kernel.org, hans@schillstrom.com Hello, On Mon, 16 Apr 2012, Hans Schillstrom wrote: > > But I see some inconsistency in net/core/net_namespace.c: > > __register_pernet_operations when CONFIG_NET_NS is enabled > > does not call ops_free after failed ops_init while when > > CONFIG_NET_NS is not enabled ops_free is called. The > > problem is that we leak the ops->size data allocated for the > > failed net. I think, the fix should be ops_init to free the data. > > Are you sure ? > In my code it does... > > static int __register_pernet_operations(struct list_head *list, > struct pernet_operations *ops) > at line 417 > .. > for_each_net(net) { > error = ops_init(ops, net); > if (error) > goto out_undo; There is line here that registers current net for cleanup only after ops_init success: list_add_tail(&net->exit_list, &net_exit_list); If ops_init fails for first net then net_exit_list will be empty. > ... > line 426 > out_undo: > /* If I have an error cleanup all namespaces I initialized */ > list_del(&ops->list); > ops_exit_list(ops, &net_exit_list); > ops_free_list(ops, &net_exit_list); > return error; > } Regards -- Julian Anastasov <ja@ssi.bg> ^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 2/2] IPVS: make failure of netns init more stable 2012-04-16 17:31 ` Julian Anastasov @ 2012-04-16 17:45 ` Hans Schillstrom 0 siblings, 0 replies; 8+ messages in thread From: Hans Schillstrom @ 2012-04-16 17:45 UTC (permalink / raw) To: Julian Anastasov Cc: horms@verge.net.au, wensong@linux-vs.org, lvs-devel@vger.kernel.org, netdev@vger.kernel.org, netfilter-devel@vger.kernel.org, hans@schillstrom.com On Monday 16 April 2012 19:31:40 Julian Anastasov wrote: > > Hello, > > On Mon, 16 Apr 2012, Hans Schillstrom wrote: > > > > But I see some inconsistency in net/core/net_namespace.c: > > > __register_pernet_operations when CONFIG_NET_NS is enabled > > > does not call ops_free after failed ops_init while when > > > CONFIG_NET_NS is not enabled ops_free is called. The > > > problem is that we leak the ops->size data allocated for the > > > failed net. I think, the fix should be ops_init to free the data. > > > > Are you sure ? > > In my code it does... > > > > static int __register_pernet_operations(struct list_head *list, > > struct pernet_operations *ops) > > at line 417 > > .. > > for_each_net(net) { > > error = ops_init(ops, net); > > if (error) > > goto out_undo; > > There is line here that registers current net for > cleanup only after ops_init success: > > list_add_tail(&net->exit_list, &net_exit_list); > > If ops_init fails for first net then net_exit_list will > be empty. Yes, you are right as allways :-) > > > ... > > line 426 > > out_undo: > > /* If I have an error cleanup all namespaces I initialized */ > > list_del(&ops->list); > > ops_exit_list(ops, &net_exit_list); > > ops_free_list(ops, &net_exit_list); > > return error; > > } > > Regards > > -- > Julian Anastasov <ja@ssi.bg> > -- Regards Hans Schillstrom <hans.schillstrom@ericsson.com> ^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 2/2] IPVS: make failure of netns init more stable 2012-04-16 14:25 ` Julian Anastasov 2012-04-16 17:16 ` Hans Schillstrom @ 2012-04-17 11:15 ` Hans Schillstrom 2012-04-17 20:57 ` Julian Anastasov 1 sibling, 1 reply; 8+ messages in thread From: Hans Schillstrom @ 2012-04-17 11:15 UTC (permalink / raw) To: Julian Anastasov Cc: horms@verge.net.au, wensong@linux-vs.org, lvs-devel@vger.kernel.org, netdev@vger.kernel.org, netfilter-devel@vger.kernel.org, hans@schillstrom.com Hello Julian On Monday 16 April 2012 16:25:23 Julian Anastasov wrote: > > Hello, [snip] > > Note that net_generic() and net->ipvs can not be > used after ops_exit/ops_free and failed ops_init. > I wonder if we are chasing ghosts... With proper fault handling I can't even see a case when it (net->ipvs) can be used. Can you see a case when it could happen? Still we can set it to NULL on error exit and cleanup as you suggested, that doesn't harm I think. A. If you add a netns and it fails the entire ns will be rolled back, and no access to that ns can occur. That ns does not exist B. If you insert ip_vs.ko when having one or more name spaces and __ip_vs_init() returns an error the module will be unloaded. All ready loaded ns will not be affected. C. insmod of ex. ip_vs_ftp only affects loaded name spaces and if the load of ip_vs_ftp fails it will be unloaded without affecting ip_vs(.ko) (If ip_vs.ko is not loaded then it has to be loaded first case B...) With a "compiled in" ip_vs case B doesn't exist. With proper fault handling i.e. all ways returning fault codes to the netns init, there is no need for checking for "if (!net->ipvs)" or any other action. -- Regards Hans Schillstrom <hans.schillstrom@ericsson.com> ^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 2/2] IPVS: make failure of netns init more stable 2012-04-17 11:15 ` Hans Schillstrom @ 2012-04-17 20:57 ` Julian Anastasov 0 siblings, 0 replies; 8+ messages in thread From: Julian Anastasov @ 2012-04-17 20:57 UTC (permalink / raw) To: Hans Schillstrom Cc: horms@verge.net.au, wensong@linux-vs.org, lvs-devel@vger.kernel.org, netdev@vger.kernel.org, netfilter-devel@vger.kernel.org, hans@schillstrom.com Hello, On Tue, 17 Apr 2012, Hans Schillstrom wrote: > I wonder if we are chasing ghosts... > > With proper fault handling I can't even see a case when it (net->ipvs) can be used. > Can you see a case when it could happen? > Still we can set it to NULL on error exit and cleanup as you suggested, that doesn't harm I think. > > A. If you add a netns and it fails the entire ns will be rolled back, > and no access to that ns can occur. > That ns does not exist Agreed > B. If you insert ip_vs.ko when having one or more name spaces and > __ip_vs_init() returns an error the module will be unloaded. > All ready loaded ns will not be affected. Yes, ip_vs_init fails. > C. insmod of ex. ip_vs_ftp only affects loaded name spaces > and if the load of ip_vs_ftp fails it will be unloaded without affecting ip_vs(.ko) > (If ip_vs.ko is not loaded then it has to be loaded first case B...) > > With a "compiled in" ip_vs case B doesn't exist. It is this case that can happen, we can only guess how difficult is to get ENOMEM here. IIRC, we can generate only ENOMEM error on IPVS core load. I assume Simon has such setup and changes code to trigger load error. When I generate ENOMEM on IPVS core init for such case I get ENOENT from register_ip_vs_app when patch 1 and 2 for apps are applied, i.e. net->ipvs is NULL. You can check it with NF_CONNTRACK=y, IP_VS=y and IP_VS_FTP=m. You only need to trigger ENOMEM in __ip_vs_init. > With proper fault handling i.e. all ways returning fault codes to the netns init, > there is no need for checking for "if (!net->ipvs)" or any other action. Probably but one check on load does not hurt much. Regards -- Julian Anastasov <ja@ssi.bg> ^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2012-04-17 20:57 UTC | newest] Thread overview: 8+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2012-04-16 11:39 [PATCH 1/2] IPVS: take care of return value from protocol init_netns Hans Schillstrom 2012-04-16 11:39 ` [PATCH 2/2] IPVS: make failure of netns init more stable Hans Schillstrom 2012-04-16 14:25 ` Julian Anastasov 2012-04-16 17:16 ` Hans Schillstrom 2012-04-16 17:31 ` Julian Anastasov 2012-04-16 17:45 ` Hans Schillstrom 2012-04-17 11:15 ` Hans Schillstrom 2012-04-17 20:57 ` Julian Anastasov
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).