netfilter-devel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [rfc] IPVS: convert scheduler management to RCU
@ 2010-08-20 13:33 Simon Horman
  2010-08-20 13:44 ` Changli Gao
                   ` (2 more replies)
  0 siblings, 3 replies; 16+ messages in thread
From: Simon Horman @ 2010-08-20 13:33 UTC (permalink / raw)
  To: lvs-devel, netdev, netfilter-devel
  Cc: Stephen Hemminger, Wensong Zhang, Julian Anastasov

Signed-off-by: Simon Horman <horms@verge.net.au>

--- 

I'm still getting my head around RCU, so review would be greatly appreciated.

It occurs to me that this code is not performance critical, so
perhaps simply replacing the rwlock with a spinlock would be better?

Index: nf-next-2.6/net/netfilter/ipvs/ip_vs_sched.c
===================================================================
--- nf-next-2.6.orig/net/netfilter/ipvs/ip_vs_sched.c	2010-08-20 22:21:01.000000000 +0900
+++ nf-next-2.6/net/netfilter/ipvs/ip_vs_sched.c	2010-08-20 22:21:51.000000000 +0900
@@ -35,7 +35,7 @@
 static LIST_HEAD(ip_vs_schedulers);
 
 /* lock for service table */
-static DEFINE_RWLOCK(__ip_vs_sched_lock);
+static DEFINE_SPINLOCK(ip_vs_sched_mutex);
 
 
 /*
@@ -91,9 +91,9 @@ static struct ip_vs_scheduler *ip_vs_sch
 
 	IP_VS_DBG(2, "%s(): sched_name \"%s\"\n", __func__, sched_name);
 
-	read_lock_bh(&__ip_vs_sched_lock);
+	rcu_read_lock_bh();
 
-	list_for_each_entry(sched, &ip_vs_schedulers, n_list) {
+	list_for_each_entry_rcu(sched, &ip_vs_schedulers, n_list) {
 		/*
 		 * Test and get the modules atomically
 		 */
@@ -105,14 +105,14 @@ static struct ip_vs_scheduler *ip_vs_sch
 		}
 		if (strcmp(sched_name, sched->name)==0) {
 			/* HIT */
-			read_unlock_bh(&__ip_vs_sched_lock);
+			rcu_read_unlock_bh();
 			return sched;
 		}
 		if (sched->module)
 			module_put(sched->module);
 	}
 
-	read_unlock_bh(&__ip_vs_sched_lock);
+	rcu_read_unlock_bh();
 	return NULL;
 }
 
@@ -167,10 +167,10 @@ int register_ip_vs_scheduler(struct ip_v
 	/* increase the module use count */
 	ip_vs_use_count_inc();
 
-	write_lock_bh(&__ip_vs_sched_lock);
+	spin_lock_bh(&ip_vs_sched_mutex);
 
 	if (!list_empty(&scheduler->n_list)) {
-		write_unlock_bh(&__ip_vs_sched_lock);
+		spin_unlock_bh(&ip_vs_sched_mutex);
 		ip_vs_use_count_dec();
 		pr_err("%s(): [%s] scheduler already linked\n",
 		       __func__, scheduler->name);
@@ -181,9 +181,9 @@ int register_ip_vs_scheduler(struct ip_v
 	 *  Make sure that the scheduler with this name doesn't exist
 	 *  in the scheduler list.
 	 */
-	list_for_each_entry(sched, &ip_vs_schedulers, n_list) {
+	list_for_each_entry_rcu(sched, &ip_vs_schedulers, n_list) {
 		if (strcmp(scheduler->name, sched->name) == 0) {
-			write_unlock_bh(&__ip_vs_sched_lock);
+			spin_unlock_bh(&ip_vs_sched_mutex);
 			ip_vs_use_count_dec();
 			pr_err("%s(): [%s] scheduler already existed "
 			       "in the system\n", __func__, scheduler->name);
@@ -193,8 +193,8 @@ int register_ip_vs_scheduler(struct ip_v
 	/*
 	 *	Add it into the d-linked scheduler list
 	 */
-	list_add(&scheduler->n_list, &ip_vs_schedulers);
-	write_unlock_bh(&__ip_vs_sched_lock);
+	list_add_rcu(&scheduler->n_list, &ip_vs_schedulers);
+	spin_unlock_bh(&ip_vs_sched_mutex);
 
 	pr_info("[%s] scheduler registered.\n", scheduler->name);
 
@@ -212,9 +212,9 @@ int unregister_ip_vs_scheduler(struct ip
 		return -EINVAL;
 	}
 
-	write_lock_bh(&__ip_vs_sched_lock);
+	spin_lock_bh(&ip_vs_sched_mutex);
 	if (list_empty(&scheduler->n_list)) {
-		write_unlock_bh(&__ip_vs_sched_lock);
+		spin_unlock_bh(&ip_vs_sched_mutex);
 		pr_err("%s(): [%s] scheduler is not in the list. failed\n",
 		       __func__, scheduler->name);
 		return -EINVAL;
@@ -223,8 +223,8 @@ int unregister_ip_vs_scheduler(struct ip
 	/*
 	 *	Remove it from the d-linked scheduler list
 	 */
-	list_del(&scheduler->n_list);
-	write_unlock_bh(&__ip_vs_sched_lock);
+	list_del_rcu(&scheduler->n_list);
+	spin_unlock_bh(&ip_vs_sched_mutex);
 
 	/* decrease the module use count */
 	ip_vs_use_count_dec();

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [rfc] IPVS: convert scheduler management to RCU
  2010-08-20 13:33 [rfc] IPVS: convert scheduler management to RCU Simon Horman
@ 2010-08-20 13:44 ` Changli Gao
  2010-08-20 14:00   ` Simon Horman
  2010-08-20 14:05   ` Eric Dumazet
  2010-08-20 13:59 ` Simon Horman
  2010-08-20 18:03 ` Julian Anastasov
  2 siblings, 2 replies; 16+ messages in thread
From: Changli Gao @ 2010-08-20 13:44 UTC (permalink / raw)
  To: Simon Horman
  Cc: lvs-devel, netdev, netfilter-devel, Stephen Hemminger,
	Wensong Zhang, Julian Anastasov, Paul E McKenney

On Fri, Aug 20, 2010 at 9:33 PM, Simon Horman <horms@verge.net.au> wrote:
> Signed-off-by: Simon Horman <horms@verge.net.au>
>
> ---
>
> I'm still getting my head around RCU, so review would be greatly appreciated.
>
> It occurs to me that this code is not performance critical, so
> perhaps simply replacing the rwlock with a spinlock would be better?
>
> Index: nf-next-2.6/net/netfilter/ipvs/ip_vs_sched.c
> ===================================================================
> --- nf-next-2.6.orig/net/netfilter/ipvs/ip_vs_sched.c   2010-08-20 22:21:01.000000000 +0900
> +++ nf-next-2.6/net/netfilter/ipvs/ip_vs_sched.c        2010-08-20 22:21:51.000000000 +0900
> @@ -35,7 +35,7 @@
>  static LIST_HEAD(ip_vs_schedulers);
>
>  /* lock for service table */
> -static DEFINE_RWLOCK(__ip_vs_sched_lock);
> +static DEFINE_SPINLOCK(ip_vs_sched_mutex);
>
>
>  /*
> @@ -91,9 +91,9 @@ static struct ip_vs_scheduler *ip_vs_sch
>
>        IP_VS_DBG(2, "%s(): sched_name \"%s\"\n", __func__, sched_name);
>
> -       read_lock_bh(&__ip_vs_sched_lock);
> +       rcu_read_lock_bh();
>
> -       list_for_each_entry(sched, &ip_vs_schedulers, n_list) {
> +       list_for_each_entry_rcu(sched, &ip_vs_schedulers, n_list) {
>                /*
>                 * Test and get the modules atomically
>                 */
> @@ -105,14 +105,14 @@ static struct ip_vs_scheduler *ip_vs_sch
>                }
>                if (strcmp(sched_name, sched->name)==0) {
>                        /* HIT */
> -                       read_unlock_bh(&__ip_vs_sched_lock);
> +                       rcu_read_unlock_bh();
>                        return sched;
>                }
>                if (sched->module)
>                        module_put(sched->module);
>        }
>
> -       read_unlock_bh(&__ip_vs_sched_lock);
> +       rcu_read_unlock_bh();
>        return NULL;
>  }
>
> @@ -167,10 +167,10 @@ int register_ip_vs_scheduler(struct ip_v
>        /* increase the module use count */
>        ip_vs_use_count_inc();
>
> -       write_lock_bh(&__ip_vs_sched_lock);
> +       spin_lock_bh(&ip_vs_sched_mutex);
>
>        if (!list_empty(&scheduler->n_list)) {
> -               write_unlock_bh(&__ip_vs_sched_lock);
> +               spin_unlock_bh(&ip_vs_sched_mutex);
>                ip_vs_use_count_dec();
>                pr_err("%s(): [%s] scheduler already linked\n",
>                       __func__, scheduler->name);
> @@ -181,9 +181,9 @@ int register_ip_vs_scheduler(struct ip_v
>         *  Make sure that the scheduler with this name doesn't exist
>         *  in the scheduler list.
>         */
> -       list_for_each_entry(sched, &ip_vs_schedulers, n_list) {
> +       list_for_each_entry_rcu(sched, &ip_vs_schedulers, n_list) {
>                if (strcmp(scheduler->name, sched->name) == 0) {
> -                       write_unlock_bh(&__ip_vs_sched_lock);
> +                       spin_unlock_bh(&ip_vs_sched_mutex);
>                        ip_vs_use_count_dec();
>                        pr_err("%s(): [%s] scheduler already existed "
>                               "in the system\n", __func__, scheduler->name);
> @@ -193,8 +193,8 @@ int register_ip_vs_scheduler(struct ip_v
>        /*
>         *      Add it into the d-linked scheduler list
>         */
> -       list_add(&scheduler->n_list, &ip_vs_schedulers);
> -       write_unlock_bh(&__ip_vs_sched_lock);
> +       list_add_rcu(&scheduler->n_list, &ip_vs_schedulers);
> +       spin_unlock_bh(&ip_vs_sched_mutex);
>
>        pr_info("[%s] scheduler registered.\n", scheduler->name);
>
> @@ -212,9 +212,9 @@ int unregister_ip_vs_scheduler(struct ip
>                return -EINVAL;
>        }
>
> -       write_lock_bh(&__ip_vs_sched_lock);
> +       spin_lock_bh(&ip_vs_sched_mutex);
>        if (list_empty(&scheduler->n_list)) {
> -               write_unlock_bh(&__ip_vs_sched_lock);
> +               spin_unlock_bh(&ip_vs_sched_mutex);
>                pr_err("%s(): [%s] scheduler is not in the list. failed\n",
>                       __func__, scheduler->name);
>                return -EINVAL;
> @@ -223,8 +223,8 @@ int unregister_ip_vs_scheduler(struct ip
>        /*
>         *      Remove it from the d-linked scheduler list
>         */
> -       list_del(&scheduler->n_list);
> -       write_unlock_bh(&__ip_vs_sched_lock);
> +       list_del_rcu(&scheduler->n_list);
> +       spin_unlock_bh(&ip_vs_sched_mutex);

Need a rcu_barrier_bh().

>
>        /* decrease the module use count */
>        ip_vs_use_count_dec();



-- 
Regards,
Changli Gao(xiaosuo@gmail.com)
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [rfc] IPVS: convert scheduler management to RCU
  2010-08-20 13:33 [rfc] IPVS: convert scheduler management to RCU Simon Horman
  2010-08-20 13:44 ` Changli Gao
@ 2010-08-20 13:59 ` Simon Horman
  2010-08-20 19:29   ` Paul E. McKenney
  2010-08-20 18:03 ` Julian Anastasov
  2 siblings, 1 reply; 16+ messages in thread
From: Simon Horman @ 2010-08-20 13:59 UTC (permalink / raw)
  To: lvs-devel, netdev, netfilter-devel
  Cc: Stephen Hemminger, Wensong Zhang, Julian Anastasov

On Fri, Aug 20, 2010 at 10:33:21PM +0900, Simon Horman wrote:
> Signed-off-by: Simon Horman <horms@verge.net.au>
> 
> --- 
> 
> I'm still getting my head around RCU, so review would be greatly appreciated.
> 
> It occurs to me that this code is not performance critical, so
> perhaps simply replacing the rwlock with a spinlock would be better?
> 
> Index: nf-next-2.6/net/netfilter/ipvs/ip_vs_sched.c
> ===================================================================
> --- nf-next-2.6.orig/net/netfilter/ipvs/ip_vs_sched.c	2010-08-20 22:21:01.000000000 +0900
> +++ nf-next-2.6/net/netfilter/ipvs/ip_vs_sched.c	2010-08-20 22:21:51.000000000 +0900
> @@ -35,7 +35,7 @@
>  static LIST_HEAD(ip_vs_schedulers);
>  
>  /* lock for service table */
> -static DEFINE_RWLOCK(__ip_vs_sched_lock);
> +static DEFINE_SPINLOCK(ip_vs_sched_mutex);
>  
>  
>  /*
> @@ -91,9 +91,9 @@ static struct ip_vs_scheduler *ip_vs_sch
>  
>  	IP_VS_DBG(2, "%s(): sched_name \"%s\"\n", __func__, sched_name);
>  
> -	read_lock_bh(&__ip_vs_sched_lock);
> +	rcu_read_lock_bh();
>  
> -	list_for_each_entry(sched, &ip_vs_schedulers, n_list) {
> +	list_for_each_entry_rcu(sched, &ip_vs_schedulers, n_list) {
>  		/*
>  		 * Test and get the modules atomically
>  		 */
> @@ -105,14 +105,14 @@ static struct ip_vs_scheduler *ip_vs_sch
>  		}
>  		if (strcmp(sched_name, sched->name)==0) {
>  			/* HIT */
> -			read_unlock_bh(&__ip_vs_sched_lock);
> +			rcu_read_unlock_bh();
>  			return sched;
>  		}
>  		if (sched->module)
>  			module_put(sched->module);
>  	}
>  
> -	read_unlock_bh(&__ip_vs_sched_lock);
> +	rcu_read_unlock_bh();
>  	return NULL;
>  }
>  
> @@ -167,10 +167,10 @@ int register_ip_vs_scheduler(struct ip_v
>  	/* increase the module use count */
>  	ip_vs_use_count_inc();
>  
> -	write_lock_bh(&__ip_vs_sched_lock);
> +	spin_lock_bh(&ip_vs_sched_mutex);
>  
>  	if (!list_empty(&scheduler->n_list)) {
> -		write_unlock_bh(&__ip_vs_sched_lock);
> +		spin_unlock_bh(&ip_vs_sched_mutex);
>  		ip_vs_use_count_dec();
>  		pr_err("%s(): [%s] scheduler already linked\n",
>  		       __func__, scheduler->name);
> @@ -181,9 +181,9 @@ int register_ip_vs_scheduler(struct ip_v
>  	 *  Make sure that the scheduler with this name doesn't exist
>  	 *  in the scheduler list.
>  	 */
> -	list_for_each_entry(sched, &ip_vs_schedulers, n_list) {
> +	list_for_each_entry_rcu(sched, &ip_vs_schedulers, n_list) {
>  		if (strcmp(scheduler->name, sched->name) == 0) {
> -			write_unlock_bh(&__ip_vs_sched_lock);
> +			spin_unlock_bh(&ip_vs_sched_mutex);
>  			ip_vs_use_count_dec();
>  			pr_err("%s(): [%s] scheduler already existed "
>  			       "in the system\n", __func__, scheduler->name);
> @@ -193,8 +193,8 @@ int register_ip_vs_scheduler(struct ip_v
>  	/*
>  	 *	Add it into the d-linked scheduler list
>  	 */
> -	list_add(&scheduler->n_list, &ip_vs_schedulers);
> -	write_unlock_bh(&__ip_vs_sched_lock);
> +	list_add_rcu(&scheduler->n_list, &ip_vs_schedulers);
> +	spin_unlock_bh(&ip_vs_sched_mutex);
>  
>  	pr_info("[%s] scheduler registered.\n", scheduler->name);
>  
> @@ -212,9 +212,9 @@ int unregister_ip_vs_scheduler(struct ip
>  		return -EINVAL;
>  	}
>  
> -	write_lock_bh(&__ip_vs_sched_lock);
> +	spin_lock_bh(&ip_vs_sched_mutex);
>  	if (list_empty(&scheduler->n_list)) {
> -		write_unlock_bh(&__ip_vs_sched_lock);
> +		spin_unlock_bh(&ip_vs_sched_mutex);
>  		pr_err("%s(): [%s] scheduler is not in the list. failed\n",
>  		       __func__, scheduler->name);
>  		return -EINVAL;
> @@ -223,8 +223,8 @@ int unregister_ip_vs_scheduler(struct ip
>  	/*
>  	 *	Remove it from the d-linked scheduler list
>  	 */
> -	list_del(&scheduler->n_list);
> -	write_unlock_bh(&__ip_vs_sched_lock);
> +	list_del_rcu(&scheduler->n_list);
> +	spin_unlock_bh(&ip_vs_sched_mutex);

On further reading, I believe that I need a synchronize_rcu(); here,

>  
>  	/* decrease the module use count */
>  	ip_vs_use_count_dec();
> --
> To unsubscribe from this list: send the line "unsubscribe lvs-devel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [rfc] IPVS: convert scheduler management to RCU
  2010-08-20 13:44 ` Changli Gao
@ 2010-08-20 14:00   ` Simon Horman
  2010-08-20 14:05   ` Eric Dumazet
  1 sibling, 0 replies; 16+ messages in thread
From: Simon Horman @ 2010-08-20 14:00 UTC (permalink / raw)
  To: Changli Gao
  Cc: lvs-devel, netdev, netfilter-devel, Stephen Hemminger,
	Wensong Zhang, Julian Anastasov, Paul E McKenney

On Fri, Aug 20, 2010 at 09:44:08PM +0800, Changli Gao wrote:
> On Fri, Aug 20, 2010 at 9:33 PM, Simon Horman <horms@verge.net.au> wrote:
> > Signed-off-by: Simon Horman <horms@verge.net.au>
> >
> > ---
> >
> > I'm still getting my head around RCU, so review would be greatly appreciated.
> >
> > It occurs to me that this code is not performance critical, so
> > perhaps simply replacing the rwlock with a spinlock would be better?
> >
> > Index: nf-next-2.6/net/netfilter/ipvs/ip_vs_sched.c
> > ===================================================================
> > --- nf-next-2.6.orig/net/netfilter/ipvs/ip_vs_sched.c   2010-08-20 22:21:01.000000000 +0900
> > +++ nf-next-2.6/net/netfilter/ipvs/ip_vs_sched.c        2010-08-20 22:21:51.000000000 +0900
> > @@ -35,7 +35,7 @@
> >  static LIST_HEAD(ip_vs_schedulers);
> >
> >  /* lock for service table */
> > -static DEFINE_RWLOCK(__ip_vs_sched_lock);
> > +static DEFINE_SPINLOCK(ip_vs_sched_mutex);
> >
> >
> >  /*
> > @@ -91,9 +91,9 @@ static struct ip_vs_scheduler *ip_vs_sch
> >
> >        IP_VS_DBG(2, "%s(): sched_name \"%s\"\n", __func__, sched_name);
> >
> > -       read_lock_bh(&__ip_vs_sched_lock);
> > +       rcu_read_lock_bh();
> >
> > -       list_for_each_entry(sched, &ip_vs_schedulers, n_list) {
> > +       list_for_each_entry_rcu(sched, &ip_vs_schedulers, n_list) {
> >                /*
> >                 * Test and get the modules atomically
> >                 */
> > @@ -105,14 +105,14 @@ static struct ip_vs_scheduler *ip_vs_sch
> >                }
> >                if (strcmp(sched_name, sched->name)==0) {
> >                        /* HIT */
> > -                       read_unlock_bh(&__ip_vs_sched_lock);
> > +                       rcu_read_unlock_bh();
> >                        return sched;
> >                }
> >                if (sched->module)
> >                        module_put(sched->module);
> >        }
> >
> > -       read_unlock_bh(&__ip_vs_sched_lock);
> > +       rcu_read_unlock_bh();
> >        return NULL;
> >  }
> >
> > @@ -167,10 +167,10 @@ int register_ip_vs_scheduler(struct ip_v
> >        /* increase the module use count */
> >        ip_vs_use_count_inc();
> >
> > -       write_lock_bh(&__ip_vs_sched_lock);
> > +       spin_lock_bh(&ip_vs_sched_mutex);
> >
> >        if (!list_empty(&scheduler->n_list)) {
> > -               write_unlock_bh(&__ip_vs_sched_lock);
> > +               spin_unlock_bh(&ip_vs_sched_mutex);
> >                ip_vs_use_count_dec();
> >                pr_err("%s(): [%s] scheduler already linked\n",
> >                       __func__, scheduler->name);
> > @@ -181,9 +181,9 @@ int register_ip_vs_scheduler(struct ip_v
> >         *  Make sure that the scheduler with this name doesn't exist
> >         *  in the scheduler list.
> >         */
> > -       list_for_each_entry(sched, &ip_vs_schedulers, n_list) {
> > +       list_for_each_entry_rcu(sched, &ip_vs_schedulers, n_list) {
> >                if (strcmp(scheduler->name, sched->name) == 0) {
> > -                       write_unlock_bh(&__ip_vs_sched_lock);
> > +                       spin_unlock_bh(&ip_vs_sched_mutex);
> >                        ip_vs_use_count_dec();
> >                        pr_err("%s(): [%s] scheduler already existed "
> >                               "in the system\n", __func__, scheduler->name);
> > @@ -193,8 +193,8 @@ int register_ip_vs_scheduler(struct ip_v
> >        /*
> >         *      Add it into the d-linked scheduler list
> >         */
> > -       list_add(&scheduler->n_list, &ip_vs_schedulers);
> > -       write_unlock_bh(&__ip_vs_sched_lock);
> > +       list_add_rcu(&scheduler->n_list, &ip_vs_schedulers);
> > +       spin_unlock_bh(&ip_vs_sched_mutex);
> >
> >        pr_info("[%s] scheduler registered.\n", scheduler->name);
> >
> > @@ -212,9 +212,9 @@ int unregister_ip_vs_scheduler(struct ip
> >                return -EINVAL;
> >        }
> >
> > -       write_lock_bh(&__ip_vs_sched_lock);
> > +       spin_lock_bh(&ip_vs_sched_mutex);
> >        if (list_empty(&scheduler->n_list)) {
> > -               write_unlock_bh(&__ip_vs_sched_lock);
> > +               spin_unlock_bh(&ip_vs_sched_mutex);
> >                pr_err("%s(): [%s] scheduler is not in the list. failed\n",
> >                       __func__, scheduler->name);
> >                return -EINVAL;
> > @@ -223,8 +223,8 @@ int unregister_ip_vs_scheduler(struct ip
> >        /*
> >         *      Remove it from the d-linked scheduler list
> >         */
> > -       list_del(&scheduler->n_list);
> > -       write_unlock_bh(&__ip_vs_sched_lock);
> > +       list_del_rcu(&scheduler->n_list);
> > +       spin_unlock_bh(&ip_vs_sched_mutex);
> 
> Need a rcu_barrier_bh().

Thanks.

> 
> >
> >        /* decrease the module use count */
> >        ip_vs_use_count_dec();
> 
> 
> 
> -- 
> Regards,
> Changli Gao(xiaosuo@gmail.com)
> --
> To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [rfc] IPVS: convert scheduler management to RCU
  2010-08-20 13:44 ` Changli Gao
  2010-08-20 14:00   ` Simon Horman
@ 2010-08-20 14:05   ` Eric Dumazet
  2010-08-20 14:16     ` yao zhao
  2010-08-20 14:31     ` Simon Horman
  1 sibling, 2 replies; 16+ messages in thread
From: Eric Dumazet @ 2010-08-20 14:05 UTC (permalink / raw)
  To: Changli Gao
  Cc: Simon Horman, lvs-devel, netdev, netfilter-devel,
	Stephen Hemminger, Wensong Zhang, Julian Anastasov,
	Paul E McKenney

Le vendredi 20 août 2010 à 21:44 +0800, Changli Gao a écrit :
> On Fri, Aug 20, 2010 at 9:33 PM, Simon Horman <horms@verge.net.au> wrote:
> > Signed-off-by: Simon Horman <horms@verge.net.au>
> >
> > ---
> >
> > I'm still getting my head around RCU, so review would be greatly appreciated.
> >
> > It occurs to me that this code is not performance critical, so
> > perhaps simply replacing the rwlock with a spinlock would be better?
> >
> > Index: nf-next-2.6/net/netfilter/ipvs/ip_vs_sched.c


> > -       write_unlock_bh(&__ip_vs_sched_lock);
> > +       list_del_rcu(&scheduler->n_list);
> > +       spin_unlock_bh(&ip_vs_sched_mutex);
> 
> Need a rcu_barrier_bh().
> 
> >
> >        /* decrease the module use count */
> >        ip_vs_use_count_dec();


Quite frankly, if this is not performance critical, just use the
spinlock (and dont use 'mutex' in its name ;) )

Using RCU here will force at least one RCU grace period at dismantle
time...



--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [rfc] IPVS: convert scheduler management to RCU
  2010-08-20 14:05   ` Eric Dumazet
@ 2010-08-20 14:16     ` yao zhao
  2010-08-20 14:32       ` Eric Dumazet
  2010-08-20 14:33       ` Simon Horman
  2010-08-20 14:31     ` Simon Horman
  1 sibling, 2 replies; 16+ messages in thread
From: yao zhao @ 2010-08-20 14:16 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Changli Gao, Simon Horman, lvs-devel, netdev, netfilter-devel,
	Stephen Hemminger, Wensong Zhang, Julian Anastasov,
	Paul E McKenney

On Fri, Aug 20, 2010 at 10:05 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> Le vendredi 20 août 2010 à 21:44 +0800, Changli Gao a écrit :
>> On Fri, Aug 20, 2010 at 9:33 PM, Simon Horman <horms@verge.net.au> wrote:
>> > Signed-off-by: Simon Horman <horms@verge.net.au>
>> >
>> > ---
>> >
>> > I'm still getting my head around RCU, so review would be greatly appreciated.
>> >
>> > It occurs to me that this code is not performance critical, so
>> > perhaps simply replacing the rwlock with a spinlock would be better?
>> >
>> > Index: nf-next-2.6/net/netfilter/ipvs/ip_vs_sched.c
>
>
>> > -       write_unlock_bh(&__ip_vs_sched_lock);
>> > +       list_del_rcu(&scheduler->n_list);
>> > +       spin_unlock_bh(&ip_vs_sched_mutex);
>>
>> Need a rcu_barrier_bh().
>>
>> >
>> >        /* decrease the module use count */
>> >        ip_vs_use_count_dec();
>
>
> Quite frankly, if this is not performance critical, just use the
> spinlock (and dont use 'mutex' in its name ;) )
>
if it is not performance critical, you should use  the
read_lock/write_lock, it should make the readers happier than
spinlock. the name "mutex" is a little bit confuse.
synchronize_rcu() is not necessary when you only need to delete from a
list as it is atomic.

> Using RCU here will force at least one RCU grace period at dismantle
> time...
>
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

yao

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [rfc] IPVS: convert scheduler management to RCU
  2010-08-20 14:05   ` Eric Dumazet
  2010-08-20 14:16     ` yao zhao
@ 2010-08-20 14:31     ` Simon Horman
  1 sibling, 0 replies; 16+ messages in thread
From: Simon Horman @ 2010-08-20 14:31 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Changli Gao, lvs-devel, netdev, netfilter-devel,
	Stephen Hemminger, Wensong Zhang, Julian Anastasov,
	Paul E McKenney

On Fri, Aug 20, 2010 at 04:05:50PM +0200, Eric Dumazet wrote:
> Le vendredi 20 août 2010 à 21:44 +0800, Changli Gao a écrit :
> > On Fri, Aug 20, 2010 at 9:33 PM, Simon Horman <horms@verge.net.au> wrote:
> > > Signed-off-by: Simon Horman <horms@verge.net.au>
> > >
> > > ---
> > >
> > > I'm still getting my head around RCU, so review would be greatly appreciated.
> > >
> > > It occurs to me that this code is not performance critical, so
> > > perhaps simply replacing the rwlock with a spinlock would be better?
> > >
> > > Index: nf-next-2.6/net/netfilter/ipvs/ip_vs_sched.c
> 
> 
> > > -       write_unlock_bh(&__ip_vs_sched_lock);
> > > +       list_del_rcu(&scheduler->n_list);
> > > +       spin_unlock_bh(&ip_vs_sched_mutex);
> > 
> > Need a rcu_barrier_bh().
> > 
> > >
> > >        /* decrease the module use count */
> > >        ip_vs_use_count_dec();
> 
> 
> Quite frankly, if this is not performance critical, just use the
> spinlock (and dont use 'mutex' in its name ;) )

Will do.

> Using RCU here will force at least one RCU grace period at dismantle
> time...
> 
> 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [rfc] IPVS: convert scheduler management to RCU
  2010-08-20 14:16     ` yao zhao
@ 2010-08-20 14:32       ` Eric Dumazet
  2010-08-20 15:04         ` yao zhao
  2010-08-20 14:33       ` Simon Horman
  1 sibling, 1 reply; 16+ messages in thread
From: Eric Dumazet @ 2010-08-20 14:32 UTC (permalink / raw)
  To: yao zhao
  Cc: Changli Gao, Simon Horman, lvs-devel, netdev, netfilter-devel,
	Stephen Hemminger, Wensong Zhang, Julian Anastasov,
	Paul E McKenney

Le vendredi 20 août 2010 à 10:16 -0400, yao zhao a écrit :

> if it is not performance critical, you should use  the
> read_lock/write_lock, it should make the readers happier than
> spinlock. the name "mutex" is a little bit confuse.

Yes, I mentioned the 'mutex' name oddity.

Point is :

We want to remove read_write locks. They dont fit the bill.

If performance critical, lot of readers -> RCU (a lot faster)
If not, or too much writers versus readers -> spinlock (a bit faster)



> synchronize_rcu() is not necessary when you only need to delete from a
> list as it is atomic.
> 

Thats a rather strange and completely wrong claim. A big part of RCU job
is to have appropriate work done on deletes. Inserts are more easy (only
needs a smp_wmb())

Take a look at Documentation/RCU/* before saying such things ;)

Not only synchronize_rcu() is not enough to protect this kind of code,
but you need something stronger.



^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [rfc] IPVS: convert scheduler management to RCU
  2010-08-20 14:16     ` yao zhao
  2010-08-20 14:32       ` Eric Dumazet
@ 2010-08-20 14:33       ` Simon Horman
  1 sibling, 0 replies; 16+ messages in thread
From: Simon Horman @ 2010-08-20 14:33 UTC (permalink / raw)
  To: yao zhao
  Cc: Eric Dumazet, Changli Gao, lvs-devel, netdev, netfilter-devel,
	Stephen Hemminger, Wensong Zhang, Julian Anastasov,
	Paul E McKenney

On Fri, Aug 20, 2010 at 10:16:23AM -0400, yao zhao wrote:
> On Fri, Aug 20, 2010 at 10:05 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> > Le vendredi 20 août 2010 à 21:44 +0800, Changli Gao a écrit :
> >> On Fri, Aug 20, 2010 at 9:33 PM, Simon Horman <horms@verge.net.au> wrote:
> >> > Signed-off-by: Simon Horman <horms@verge.net.au>
> >> >
> >> > ---
> >> >
> >> > I'm still getting my head around RCU, so review would be greatly appreciated.
> >> >
> >> > It occurs to me that this code is not performance critical, so
> >> > perhaps simply replacing the rwlock with a spinlock would be better?
> >> >
> >> > Index: nf-next-2.6/net/netfilter/ipvs/ip_vs_sched.c
> >
> >
> >> > -       write_unlock_bh(&__ip_vs_sched_lock);
> >> > +       list_del_rcu(&scheduler->n_list);
> >> > +       spin_unlock_bh(&ip_vs_sched_mutex);
> >>
> >> Need a rcu_barrier_bh().
> >>
> >> >
> >> >        /* decrease the module use count */
> >> >        ip_vs_use_count_dec();
> >
> >
> > Quite frankly, if this is not performance critical, just use the
> > spinlock (and dont use 'mutex' in its name ;) )
> >
> if it is not performance critical, you should use  the
> read_lock/write_lock, it should make the readers happier than
> spinlock.

The whole point of the exercise is to stop using read_lock/write_lock
because they are generally slower than a spinlock.

> the name "mutex" is a little bit confuse.
> synchronize_rcu() is not necessary when you only need to delete from a
> list as it is atomic.
>
> > Using RCU here will force at least one RCU grace period at dismantle
> > time...
> >
> >
> >
> > --
> > To unsubscribe from this list: send the line "unsubscribe netdev" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> >
> 
> yao
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [rfc] IPVS: convert scheduler management to RCU
  2010-08-20 14:32       ` Eric Dumazet
@ 2010-08-20 15:04         ` yao zhao
  2010-08-20 15:32           ` Eric Dumazet
  0 siblings, 1 reply; 16+ messages in thread
From: yao zhao @ 2010-08-20 15:04 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Changli Gao, Simon Horman, lvs-devel, netdev, netfilter-devel,
	Stephen Hemminger, Wensong Zhang, Julian Anastasov,
	Paul E McKenney

On Fri, Aug 20, 2010 at 10:32 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> Le vendredi 20 août 2010 à 10:16 -0400, yao zhao a écrit :
>
>> if it is not performance critical, you should use  the
>> read_lock/write_lock, it should make the readers happier than
>> spinlock. the name "mutex" is a little bit confuse.
>
> Yes, I mentioned the 'mutex' name oddity.
>
> Point is :
>
> We want to remove read_write locks. They dont fit the bill.
>
> If performance critical, lot of readers -> RCU (a lot faster)
> If not, or too much writers versus readers -> spinlock (a bit faster)
>
>
for writers more than readers of course spin will be better, that is
what read/write lock for and spin for.
But the case here is whether these register_ip_vs_scheduler or
unregister are more frequent than the readers.
if not definitely read_lock will better than spin_lock. although worse than rcu.
>
>> synchronize_rcu() is not necessary when you only need to delete from a
>> list as it is atomic.
>>
>
> Thats a rather strange and completely wrong claim. A big part of RCU job
> is to have appropriate work done on deletes. Inserts are more easy (only
> needs a smp_wmb())
>
> Take a look at Documentation/RCU/* before saying such things ;)
>
> Not only synchronize_rcu() is not enough to protect this kind of code,
> but you need something stronger.
>
>
>
The code here is deleting a global from the list, am I right? I didn't
see any called case.
what are you going to do more? free it? write_unlock_bh should make the mb.

yao
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [rfc] IPVS: convert scheduler management to RCU
  2010-08-20 15:04         ` yao zhao
@ 2010-08-20 15:32           ` Eric Dumazet
  2010-08-20 17:54             ` yao zhao
  0 siblings, 1 reply; 16+ messages in thread
From: Eric Dumazet @ 2010-08-20 15:32 UTC (permalink / raw)
  To: yao zhao
  Cc: Changli Gao, Simon Horman, lvs-devel, netdev, netfilter-devel,
	Stephen Hemminger, Wensong Zhang, Julian Anastasov,
	Paul E McKenney

Le vendredi 20 août 2010 à 11:04 -0400, yao zhao a écrit :

> The code here is deleting a global from the list, am I right? I didn't
> see any called case.
> what are you going to do more? free it? write_unlock_bh should make the mb.


If you dont wait _after_ delete from list and following actions 
(kfree() without a call_rcu(), or module unload, or whatever), a reader
might access your data/code and crash the box.

spin_unlock_bh() wont help you at all, since only writers are freezed by
the lock (since readers only hold rcu_lock)

Documentation/RCU/whatisRCU.txt line 705

Documentation/RCU/checklist.txt  15)




^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [rfc] IPVS: convert scheduler management to RCU
  2010-08-20 15:32           ` Eric Dumazet
@ 2010-08-20 17:54             ` yao zhao
  0 siblings, 0 replies; 16+ messages in thread
From: yao zhao @ 2010-08-20 17:54 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Changli Gao, Simon Horman, lvs-devel, netdev, netfilter-devel,
	Stephen Hemminger, Wensong Zhang, Julian Anastasov,
	Paul E McKenney

On Fri, Aug 20, 2010 at 11:32 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> Le vendredi 20 août 2010 à 11:04 -0400, yao zhao a écrit :
>
>> The code here is deleting a global from the list, am I right? I didn't
>> see any called case.
>> what are you going to do more? free it? write_unlock_bh should make the mb.
>
>
> If you dont wait _after_ delete from list and following actions
> (kfree() without a call_rcu(), or module unload, or whatever), a reader
> might access your data/code and crash the box.
>
> spin_unlock_bh() wont help you at all, since only writers are freezed by
> the lock (since readers only hold rcu_lock)
>
> Documentation/RCU/whatisRCU.txt line 705
>
> Documentation/RCU/checklist.txt  15)
>
>
>
>
I read the code again and that global is in a module then you are right.
If that global is not in a module then you don't need it at all, as in
that global only functions pointer or name... which never be changed.

yao

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [rfc] IPVS: convert scheduler management to RCU
  2010-08-20 13:33 [rfc] IPVS: convert scheduler management to RCU Simon Horman
  2010-08-20 13:44 ` Changli Gao
  2010-08-20 13:59 ` Simon Horman
@ 2010-08-20 18:03 ` Julian Anastasov
  2010-08-21  3:30   ` Simon Horman
  2 siblings, 1 reply; 16+ messages in thread
From: Julian Anastasov @ 2010-08-20 18:03 UTC (permalink / raw)
  To: Simon Horman
  Cc: lvs-devel, netdev, netfilter-devel, Stephen Hemminger,
	Wensong Zhang


	Hello,

On Fri, 20 Aug 2010, Simon Horman wrote:

> Signed-off-by: Simon Horman <horms@verge.net.au>
> 
> --- 
> 
> I'm still getting my head around RCU, so review would be greatly appreciated.
> 
> It occurs to me that this code is not performance critical, so
> perhaps simply replacing the rwlock with a spinlock would be better?

	This specific code does not need RCU conversion, see below

> Index: nf-next-2.6/net/netfilter/ipvs/ip_vs_sched.c
> ===================================================================
> --- nf-next-2.6.orig/net/netfilter/ipvs/ip_vs_sched.c	2010-08-20 22:21:01.000000000 +0900
> +++ nf-next-2.6/net/netfilter/ipvs/ip_vs_sched.c	2010-08-20 22:21:51.000000000 +0900
> @@ -35,7 +35,7 @@
>  static LIST_HEAD(ip_vs_schedulers);
>  
>  /* lock for service table */
> -static DEFINE_RWLOCK(__ip_vs_sched_lock);
> +static DEFINE_SPINLOCK(ip_vs_sched_mutex);

	Here is what I got as list of locking points:

__ip_vs_conntbl_lock_array:
	- can benefit from RCU, main benefits come from here

- ip_vs_conn_unhash() followed by ip_vs_conn_hash() is tricky with RCU,
	needs more thinking, eg. when cport is changed

cp->lock, cp->refcnt:
	- not a problem

tcp_app_lock, udp_app_lock, sctp_app_lock:
	- can benefit from RCU (once per connection)

svc->sched_lock:
	- only 1 read_lock, mostly writers that need exclusive access
	- so, not suitable for RCU, can be switched to spin_lock for speed

__ip_vs_sched_lock:
	- not called by packet handlers, no need for RCU
	- used only by one ip_vs_ctl user (configuration) and the
	scheduler modules
	- can remain RWLOCK, no changes in locking are needed

__ip_vs_svc_lock:
	- spin_lock, use RCU
	- restrictions for schedulers with .update_service method
	because svc->sched_lock is write locked, see below

__ip_vs_rs_lock:
	- spin_lock, use RCU

Schedulers:
	- every .schedule method has its own locking, two examples:
		- write_lock: to protect the scheduler state (can be
		changed to spin_lock), see WRR. Difficult for RCU.
		- no lock: relies on IP_VS_WAIT_WHILE, no state
		is protected explicitly, fast like RCU, see WLC

Scheduler state, eg. mark->cl:
	- careful RCU assignment, may be all .update_service methods
	should use copy-on-update (WRR). OTOH, ip_vs_wlc_schedule (WLC)
	has no locks at all, thanks to the IP_VS_WAIT_WHILE, so
	it is fast as RCU.

Statistics:
dest->stats.lock, svc->stats.lock, ip_vs_stats.lock:
	- called for every packet, BAD for SMP, see ip_vs_in_stats(),
	ip_vs_out_stats(), ip_vs_conn_stats()

curr_sb_lock:
	- called for every packet depending on conn state
	- No benefits from RCU, should be spin_lock

	To summarize:

- the main problem remains stats:
	dest->stats.lock, svc->stats.lock, ip_vs_stats.lock

- RCU benefits when connection processes many packets per connection, eg.
	for TCP, SCTP, not much for UDP. No gains for the 1st
	packet in connection.

- svc: no benefits from RCU, some schedulers protect state and
need exclusive access, others have no state (and they do not use
locks even now)

Regards

--
Julian Anastasov <ja@ssi.bg>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [rfc] IPVS: convert scheduler management to RCU
  2010-08-20 13:59 ` Simon Horman
@ 2010-08-20 19:29   ` Paul E. McKenney
  2010-08-21  3:28     ` Simon Horman
  0 siblings, 1 reply; 16+ messages in thread
From: Paul E. McKenney @ 2010-08-20 19:29 UTC (permalink / raw)
  To: Simon Horman
  Cc: lvs-devel, netdev, netfilter-devel, Stephen Hemminger,
	Wensong Zhang, Julian Anastasov

On Fri, Aug 20, 2010 at 10:59:19PM +0900, Simon Horman wrote:
> On Fri, Aug 20, 2010 at 10:33:21PM +0900, Simon Horman wrote:
> > Signed-off-by: Simon Horman <horms@verge.net.au>
> > 
> > --- 
> > 
> > I'm still getting my head around RCU, so review would be greatly appreciated.
> > 
> > It occurs to me that this code is not performance critical, so
> > perhaps simply replacing the rwlock with a spinlock would be better?
> > 
> > Index: nf-next-2.6/net/netfilter/ipvs/ip_vs_sched.c
> > ===================================================================
> > --- nf-next-2.6.orig/net/netfilter/ipvs/ip_vs_sched.c	2010-08-20 22:21:01.000000000 +0900
> > +++ nf-next-2.6/net/netfilter/ipvs/ip_vs_sched.c	2010-08-20 22:21:51.000000000 +0900
> > @@ -35,7 +35,7 @@
> >  static LIST_HEAD(ip_vs_schedulers);
> >  
> >  /* lock for service table */
> > -static DEFINE_RWLOCK(__ip_vs_sched_lock);
> > +static DEFINE_SPINLOCK(ip_vs_sched_mutex);
> >  
> >  
> >  /*
> > @@ -91,9 +91,9 @@ static struct ip_vs_scheduler *ip_vs_sch
> >  
> >  	IP_VS_DBG(2, "%s(): sched_name \"%s\"\n", __func__, sched_name);
> >  
> > -	read_lock_bh(&__ip_vs_sched_lock);
> > +	rcu_read_lock_bh();
> >  
> > -	list_for_each_entry(sched, &ip_vs_schedulers, n_list) {
> > +	list_for_each_entry_rcu(sched, &ip_vs_schedulers, n_list) {
> >  		/*
> >  		 * Test and get the modules atomically
> >  		 */
> > @@ -105,14 +105,14 @@ static struct ip_vs_scheduler *ip_vs_sch
> >  		}
> >  		if (strcmp(sched_name, sched->name)==0) {
> >  			/* HIT */
> > -			read_unlock_bh(&__ip_vs_sched_lock);
> > +			rcu_read_unlock_bh();
> >  			return sched;
> >  		}
> >  		if (sched->module)
> >  			module_put(sched->module);
> >  	}
> >  
> > -	read_unlock_bh(&__ip_vs_sched_lock);
> > +	rcu_read_unlock_bh();
> >  	return NULL;
> >  }
> >  
> > @@ -167,10 +167,10 @@ int register_ip_vs_scheduler(struct ip_v
> >  	/* increase the module use count */
> >  	ip_vs_use_count_inc();
> >  
> > -	write_lock_bh(&__ip_vs_sched_lock);
> > +	spin_lock_bh(&ip_vs_sched_mutex);
> >  
> >  	if (!list_empty(&scheduler->n_list)) {
> > -		write_unlock_bh(&__ip_vs_sched_lock);
> > +		spin_unlock_bh(&ip_vs_sched_mutex);
> >  		ip_vs_use_count_dec();
> >  		pr_err("%s(): [%s] scheduler already linked\n",
> >  		       __func__, scheduler->name);
> > @@ -181,9 +181,9 @@ int register_ip_vs_scheduler(struct ip_v
> >  	 *  Make sure that the scheduler with this name doesn't exist
> >  	 *  in the scheduler list.
> >  	 */
> > -	list_for_each_entry(sched, &ip_vs_schedulers, n_list) {
> > +	list_for_each_entry_rcu(sched, &ip_vs_schedulers, n_list) {
> >  		if (strcmp(scheduler->name, sched->name) == 0) {
> > -			write_unlock_bh(&__ip_vs_sched_lock);
> > +			spin_unlock_bh(&ip_vs_sched_mutex);
> >  			ip_vs_use_count_dec();
> >  			pr_err("%s(): [%s] scheduler already existed "
> >  			       "in the system\n", __func__, scheduler->name);
> > @@ -193,8 +193,8 @@ int register_ip_vs_scheduler(struct ip_v
> >  	/*
> >  	 *	Add it into the d-linked scheduler list
> >  	 */
> > -	list_add(&scheduler->n_list, &ip_vs_schedulers);
> > -	write_unlock_bh(&__ip_vs_sched_lock);
> > +	list_add_rcu(&scheduler->n_list, &ip_vs_schedulers);
> > +	spin_unlock_bh(&ip_vs_sched_mutex);
> >  
> >  	pr_info("[%s] scheduler registered.\n", scheduler->name);
> >  
> > @@ -212,9 +212,9 @@ int unregister_ip_vs_scheduler(struct ip
> >  		return -EINVAL;
> >  	}
> >  
> > -	write_lock_bh(&__ip_vs_sched_lock);
> > +	spin_lock_bh(&ip_vs_sched_mutex);
> >  	if (list_empty(&scheduler->n_list)) {
> > -		write_unlock_bh(&__ip_vs_sched_lock);
> > +		spin_unlock_bh(&ip_vs_sched_mutex);
> >  		pr_err("%s(): [%s] scheduler is not in the list. failed\n",
> >  		       __func__, scheduler->name);
> >  		return -EINVAL;
> > @@ -223,8 +223,8 @@ int unregister_ip_vs_scheduler(struct ip
> >  	/*
> >  	 *	Remove it from the d-linked scheduler list
> >  	 */
> > -	list_del(&scheduler->n_list);
> > -	write_unlock_bh(&__ip_vs_sched_lock);
> > +	list_del_rcu(&scheduler->n_list);
> > +	spin_unlock_bh(&ip_vs_sched_mutex);
> 
> On further reading, I believe that I need a synchronize_rcu(); here,

Good catch!

However, you actually need synchronize_rcu_bh() to match your
rcu_read_lock_bh() calls.  Also, given Julian's comment, you probably
need something to show that this conversion is a real improvement.

							Thanx, Paul

> >  	/* decrease the module use count */
> >  	ip_vs_use_count_dec();
> > --
> > To unsubscribe from this list: send the line "unsubscribe lvs-devel" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [rfc] IPVS: convert scheduler management to RCU
  2010-08-20 19:29   ` Paul E. McKenney
@ 2010-08-21  3:28     ` Simon Horman
  0 siblings, 0 replies; 16+ messages in thread
From: Simon Horman @ 2010-08-21  3:28 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: lvs-devel, netdev, netfilter-devel, Stephen Hemminger,
	Wensong Zhang, Julian Anastasov

On Fri, Aug 20, 2010 at 12:29:00PM -0700, Paul E. McKenney wrote:
> On Fri, Aug 20, 2010 at 10:59:19PM +0900, Simon Horman wrote:
> > On Fri, Aug 20, 2010 at 10:33:21PM +0900, Simon Horman wrote:
> > > Signed-off-by: Simon Horman <horms@verge.net.au>

[ snip ]

> > > @@ -223,8 +223,8 @@ int unregister_ip_vs_scheduler(struct ip
> > >  	/*
> > >  	 *	Remove it from the d-linked scheduler list
> > >  	 */
> > > -	list_del(&scheduler->n_list);
> > > -	write_unlock_bh(&__ip_vs_sched_lock);
> > > +	list_del_rcu(&scheduler->n_list);
> > > +	spin_unlock_bh(&ip_vs_sched_mutex);
> > 
> > On further reading, I believe that I need a synchronize_rcu(); here,
> 
> Good catch!

:-)

> However, you actually need synchronize_rcu_bh() to match your
> rcu_read_lock_bh() calls.  Also, given Julian's comment, you probably
> need something to show that this conversion is a real improvement.

Thanks.  As suggested by Julian and others, I've decided to just
use a spinlock and not use RCU for this.


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [rfc] IPVS: convert scheduler management to RCU
  2010-08-20 18:03 ` Julian Anastasov
@ 2010-08-21  3:30   ` Simon Horman
  0 siblings, 0 replies; 16+ messages in thread
From: Simon Horman @ 2010-08-21  3:30 UTC (permalink / raw)
  To: Julian Anastasov
  Cc: lvs-devel, netdev, netfilter-devel, Stephen Hemminger,
	Wensong Zhang

On Fri, Aug 20, 2010 at 09:03:03PM +0300, Julian Anastasov wrote:
> 
> 	Hello,
> 
> On Fri, 20 Aug 2010, Simon Horman wrote:
> 
> > Signed-off-by: Simon Horman <horms@verge.net.au>
> > 
> > --- 
> > 
> > I'm still getting my head around RCU, so review would be greatly appreciated.
> > 
> > It occurs to me that this code is not performance critical, so
> > perhaps simply replacing the rwlock with a spinlock would be better?
> 
> 	This specific code does not need RCU conversion, see below

Agreed.

> > Index: nf-next-2.6/net/netfilter/ipvs/ip_vs_sched.c
> > ===================================================================
> > --- nf-next-2.6.orig/net/netfilter/ipvs/ip_vs_sched.c	2010-08-20 22:21:01.000000000 +0900
> > +++ nf-next-2.6/net/netfilter/ipvs/ip_vs_sched.c	2010-08-20 22:21:51.000000000 +0900
> > @@ -35,7 +35,7 @@
> >  static LIST_HEAD(ip_vs_schedulers);
> >  
> >  /* lock for service table */
> > -static DEFINE_RWLOCK(__ip_vs_sched_lock);
> > +static DEFINE_SPINLOCK(ip_vs_sched_mutex);
> 
> 	Here is what I got as list of locking points:
> 
> __ip_vs_conntbl_lock_array:
> 	- can benefit from RCU, main benefits come from here
> 
> - ip_vs_conn_unhash() followed by ip_vs_conn_hash() is tricky with RCU,
> 	needs more thinking, eg. when cport is changed
> 
> cp->lock, cp->refcnt:
> 	- not a problem
> 
> tcp_app_lock, udp_app_lock, sctp_app_lock:
> 	- can benefit from RCU (once per connection)
> 
> svc->sched_lock:
> 	- only 1 read_lock, mostly writers that need exclusive access
> 	- so, not suitable for RCU, can be switched to spin_lock for speed
> 
> __ip_vs_sched_lock:
> 	- not called by packet handlers, no need for RCU
> 	- used only by one ip_vs_ctl user (configuration) and the
> 	scheduler modules
> 	- can remain RWLOCK, no changes in locking are needed
> 
> __ip_vs_svc_lock:
> 	- spin_lock, use RCU
> 	- restrictions for schedulers with .update_service method
> 	because svc->sched_lock is write locked, see below
> 
> __ip_vs_rs_lock:
> 	- spin_lock, use RCU
> 
> Schedulers:
> 	- every .schedule method has its own locking, two examples:
> 		- write_lock: to protect the scheduler state (can be
> 		changed to spin_lock), see WRR. Difficult for RCU.
> 		- no lock: relies on IP_VS_WAIT_WHILE, no state
> 		is protected explicitly, fast like RCU, see WLC
> 
> Scheduler state, eg. mark->cl:
> 	- careful RCU assignment, may be all .update_service methods
> 	should use copy-on-update (WRR). OTOH, ip_vs_wlc_schedule (WLC)
> 	has no locks at all, thanks to the IP_VS_WAIT_WHILE, so
> 	it is fast as RCU.
> 
> Statistics:
> dest->stats.lock, svc->stats.lock, ip_vs_stats.lock:
> 	- called for every packet, BAD for SMP, see ip_vs_in_stats(),
> 	ip_vs_out_stats(), ip_vs_conn_stats()
> 
> curr_sb_lock:
> 	- called for every packet depending on conn state
> 	- No benefits from RCU, should be spin_lock
> 
> 	To summarize:
> 
> - the main problem remains stats:
> 	dest->stats.lock, svc->stats.lock, ip_vs_stats.lock
> 
> - RCU benefits when connection processes many packets per connection, eg.
> 	for TCP, SCTP, not much for UDP. No gains for the 1st
> 	packet in connection.
> 
> - svc: no benefits from RCU, some schedulers protect state and
> need exclusive access, others have no state (and they do not use
> locks even now)

Thanks for the list. It looks like a good basis for some conversion work.


^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2010-08-21  3:30 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-08-20 13:33 [rfc] IPVS: convert scheduler management to RCU Simon Horman
2010-08-20 13:44 ` Changli Gao
2010-08-20 14:00   ` Simon Horman
2010-08-20 14:05   ` Eric Dumazet
2010-08-20 14:16     ` yao zhao
2010-08-20 14:32       ` Eric Dumazet
2010-08-20 15:04         ` yao zhao
2010-08-20 15:32           ` Eric Dumazet
2010-08-20 17:54             ` yao zhao
2010-08-20 14:33       ` Simon Horman
2010-08-20 14:31     ` Simon Horman
2010-08-20 13:59 ` Simon Horman
2010-08-20 19:29   ` Paul E. McKenney
2010-08-21  3:28     ` Simon Horman
2010-08-20 18:03 ` Julian Anastasov
2010-08-21  3:30   ` Simon Horman

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).