netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] bonding: rejoin multicast groups on VLANs
@ 2010-09-29  7:12 Flavio Leitner
  2010-09-29 13:17 ` Flavio Leitner
  2010-09-29 18:44 ` [PATCH] bonding: rejoin multicast groups on VLANs Andy Gospodarek
  0 siblings, 2 replies; 17+ messages in thread
From: Flavio Leitner @ 2010-09-29  7:12 UTC (permalink / raw)
  To: netdev; +Cc: Flavio Leitner

It fixes bonding to rejoin multicast groups added
to VLAN devices on top of bonding when a failover
happens.

The first packet may be discarded, so the timer
assure that at least 3 Reports are sent.

Signed-off-by: Flavio Leitner <fleitner@redhat.com>
---
 drivers/net/bonding/bond_main.c |   59 +++++++++++++++++++++++++++++++++-----
 drivers/net/bonding/bonding.h   |    2 +
 2 files changed, 53 insertions(+), 8 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 3b16f62..a23a5fa 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -865,18 +865,14 @@ static void bond_mc_del(struct bonding *bond, void *addr)
 }
 
 
-/*
- * Retrieve the list of registered multicast addresses for the bonding
- * device and retransmit an IGMP JOIN request to the current active
- * slave.
- */
-static void bond_resend_igmp_join_requests(struct bonding *bond)
+static void __bond_resend_igmp_join_requests(struct net_device *dev)
 {
 	struct in_device *in_dev;
 	struct ip_mc_list *im;
 
 	rcu_read_lock();
-	in_dev = __in_dev_get_rcu(bond->dev);
+
+	in_dev = __in_dev_get_rcu(dev);
 	if (in_dev) {
 		for (im = in_dev->mc_list; im; im = im->next)
 			ip_mc_rejoin_group(im);
@@ -885,6 +881,42 @@ static void bond_resend_igmp_join_requests(struct bonding *bond)
 	rcu_read_unlock();
 }
 
+
+/*
+ * Retrieve the list of registered multicast addresses for the bonding
+ * device and retransmit an IGMP JOIN request to the current active
+ * slave.
+ */
+static void bond_resend_igmp_join_requests(struct bonding *bond)
+{
+	struct net_device *vlan_dev;
+	struct vlan_entry *vlan;
+
+	read_lock(&bond->lock);
+	if (bond->kill_timers)
+		goto out;
+
+	/* rejoin all groups on bond device */
+	__bond_resend_igmp_join_requests(bond->dev);
+
+	if (!bond->vlgrp)
+		goto reschedule;
+
+	/* rejoin all groups on vlan devices */
+	list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
+		vlan_dev = vlan_group_get_device(bond->vlgrp, vlan->vlan_id);
+		if (vlan_dev)
+			__bond_resend_igmp_join_requests(vlan_dev);
+	}
+
+reschedule:
+	if (--bond->resend_igmp > 0)
+		mod_timer(&bond->mc_timer, jiffies + HZ/5);
+
+out:
+	read_unlock(&bond->lock);
+}
+
 /*
  * flush all members of flush->mc_list from device dev->mc_list
  */
@@ -944,7 +976,10 @@ static void bond_mc_swap(struct bonding *bond, struct slave *new_active,
 
 		netdev_for_each_mc_addr(ha, bond->dev)
 			dev_mc_add(new_active->dev, ha->addr);
-		bond_resend_igmp_join_requests(bond);
+
+		/* rejoin multicast groups */
+		bond->resend_igmp = 3;
+		mod_timer(&bond->mc_timer, jiffies + 1);
 	}
 }
 
@@ -3741,9 +3776,15 @@ static int bond_xmit_hash_policy_l2(struct sk_buff *skb, int count)
 static int bond_open(struct net_device *bond_dev)
 {
 	struct bonding *bond = netdev_priv(bond_dev);
+	struct timer_list *mc_timer = &bond->mc_timer;
 
 	bond->kill_timers = 0;
 
+	/* multicast */
+	init_timer(mc_timer);
+	mc_timer->data = (unsigned long)bond;
+	mc_timer->function = (void *)&bond_resend_igmp_join_requests;
+
 	if (bond_is_lb(bond)) {
 		/* bond_alb_initialize must be called before the timer
 		 * is started.
@@ -3808,6 +3849,8 @@ static int bond_close(struct net_device *bond_dev)
 
 	write_unlock_bh(&bond->lock);
 
+	del_timer_sync(&bond->mc_timer);
+
 	if (bond->params.miimon) {  /* link check interval, in milliseconds. */
 		cancel_delayed_work(&bond->mii_work);
 	}
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index c6fdd85..5fd4164 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -198,6 +198,8 @@ struct bonding {
 	s32      slave_cnt; /* never change this value outside the attach/detach wrappers */
 	rwlock_t lock;
 	rwlock_t curr_slave_lock;
+	struct   timer_list mc_timer;
+	s8       resend_igmp;
 	s8       kill_timers;
 	s8	 send_grat_arp;
 	s8	 send_unsol_na;
-- 
1.7.2.3


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH] bonding: rejoin multicast groups on VLANs
  2010-09-29  7:12 [PATCH] bonding: rejoin multicast groups on VLANs Flavio Leitner
@ 2010-09-29 13:17 ` Flavio Leitner
  2010-09-30 20:45   ` [PATCH v2] " Flavio Leitner
  2010-09-30 20:46   ` [PATCH] " Flavio Leitner
  2010-09-29 18:44 ` [PATCH] bonding: rejoin multicast groups on VLANs Andy Gospodarek
  1 sibling, 2 replies; 17+ messages in thread
From: Flavio Leitner @ 2010-09-29 13:17 UTC (permalink / raw)
  To: netdev; +Cc: bonding-devel, Jay Vosburgh


forgot CC to bonding-devel

On Wed, Sep 29, 2010 at 04:12:24AM -0300, Flavio Leitner wrote:
> It fixes bonding to rejoin multicast groups added
> to VLAN devices on top of bonding when a failover
> happens.
> 
> The first packet may be discarded, so the timer
> assure that at least 3 Reports are sent.
> 
> Signed-off-by: Flavio Leitner <fleitner@redhat.com>
> ---
>  drivers/net/bonding/bond_main.c |   59 +++++++++++++++++++++++++++++++++-----
>  drivers/net/bonding/bonding.h   |    2 +
>  2 files changed, 53 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
> index 3b16f62..a23a5fa 100644
> --- a/drivers/net/bonding/bond_main.c
> +++ b/drivers/net/bonding/bond_main.c
> @@ -865,18 +865,14 @@ static void bond_mc_del(struct bonding *bond, void *addr)
>  }
>  
>  
> -/*
> - * Retrieve the list of registered multicast addresses for the bonding
> - * device and retransmit an IGMP JOIN request to the current active
> - * slave.
> - */
> -static void bond_resend_igmp_join_requests(struct bonding *bond)
> +static void __bond_resend_igmp_join_requests(struct net_device *dev)
>  {
>  	struct in_device *in_dev;
>  	struct ip_mc_list *im;
>  
>  	rcu_read_lock();
> -	in_dev = __in_dev_get_rcu(bond->dev);
> +
> +	in_dev = __in_dev_get_rcu(dev);
>  	if (in_dev) {
>  		for (im = in_dev->mc_list; im; im = im->next)
>  			ip_mc_rejoin_group(im);
> @@ -885,6 +881,42 @@ static void bond_resend_igmp_join_requests(struct bonding *bond)
>  	rcu_read_unlock();
>  }
>  
> +
> +/*
> + * Retrieve the list of registered multicast addresses for the bonding
> + * device and retransmit an IGMP JOIN request to the current active
> + * slave.
> + */
> +static void bond_resend_igmp_join_requests(struct bonding *bond)
> +{
> +	struct net_device *vlan_dev;
> +	struct vlan_entry *vlan;
> +
> +	read_lock(&bond->lock);
> +	if (bond->kill_timers)
> +		goto out;
> +
> +	/* rejoin all groups on bond device */
> +	__bond_resend_igmp_join_requests(bond->dev);
> +
> +	if (!bond->vlgrp)
> +		goto reschedule;
> +
> +	/* rejoin all groups on vlan devices */
> +	list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
> +		vlan_dev = vlan_group_get_device(bond->vlgrp, vlan->vlan_id);
> +		if (vlan_dev)
> +			__bond_resend_igmp_join_requests(vlan_dev);
> +	}
> +
> +reschedule:
> +	if (--bond->resend_igmp > 0)
> +		mod_timer(&bond->mc_timer, jiffies + HZ/5);
> +
> +out:
> +	read_unlock(&bond->lock);
> +}
> +
>  /*
>   * flush all members of flush->mc_list from device dev->mc_list
>   */
> @@ -944,7 +976,10 @@ static void bond_mc_swap(struct bonding *bond, struct slave *new_active,
>  
>  		netdev_for_each_mc_addr(ha, bond->dev)
>  			dev_mc_add(new_active->dev, ha->addr);
> -		bond_resend_igmp_join_requests(bond);
> +
> +		/* rejoin multicast groups */
> +		bond->resend_igmp = 3;
> +		mod_timer(&bond->mc_timer, jiffies + 1);
>  	}
>  }
>  
> @@ -3741,9 +3776,15 @@ static int bond_xmit_hash_policy_l2(struct sk_buff *skb, int count)
>  static int bond_open(struct net_device *bond_dev)
>  {
>  	struct bonding *bond = netdev_priv(bond_dev);
> +	struct timer_list *mc_timer = &bond->mc_timer;
>  
>  	bond->kill_timers = 0;
>  
> +	/* multicast */
> +	init_timer(mc_timer);
> +	mc_timer->data = (unsigned long)bond;
> +	mc_timer->function = (void *)&bond_resend_igmp_join_requests;
> +
>  	if (bond_is_lb(bond)) {
>  		/* bond_alb_initialize must be called before the timer
>  		 * is started.
> @@ -3808,6 +3849,8 @@ static int bond_close(struct net_device *bond_dev)
>  
>  	write_unlock_bh(&bond->lock);
>  
> +	del_timer_sync(&bond->mc_timer);
> +
>  	if (bond->params.miimon) {  /* link check interval, in milliseconds. */
>  		cancel_delayed_work(&bond->mii_work);
>  	}
> diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
> index c6fdd85..5fd4164 100644
> --- a/drivers/net/bonding/bonding.h
> +++ b/drivers/net/bonding/bonding.h
> @@ -198,6 +198,8 @@ struct bonding {
>  	s32      slave_cnt; /* never change this value outside the attach/detach wrappers */
>  	rwlock_t lock;
>  	rwlock_t curr_slave_lock;
> +	struct   timer_list mc_timer;
> +	s8       resend_igmp;
>  	s8       kill_timers;
>  	s8	 send_grat_arp;
>  	s8	 send_unsol_na;
> -- 
> 1.7.2.3
> 
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

-- 
Flavio

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] bonding: rejoin multicast groups on VLANs
  2010-09-29  7:12 [PATCH] bonding: rejoin multicast groups on VLANs Flavio Leitner
  2010-09-29 13:17 ` Flavio Leitner
@ 2010-09-29 18:44 ` Andy Gospodarek
  2010-09-29 19:35   ` Flavio Leitner
  1 sibling, 1 reply; 17+ messages in thread
From: Andy Gospodarek @ 2010-09-29 18:44 UTC (permalink / raw)
  To: Flavio Leitner; +Cc: netdev

On Wed, Sep 29, 2010 at 04:12:24AM -0300, Flavio Leitner wrote:
> It fixes bonding to rejoin multicast groups added
> to VLAN devices on top of bonding when a failover
> happens.
> 
> The first packet may be discarded, so the timer
> assure that at least 3 Reports are sent.
> 

Good find, Flavio.  Clearly the fact that multicast membership is broken
needs to be fixed, but I would rather not see timers used at all.  We
worked hard in the past to eliminate timers for several reasons, so I
would rather see a workqueue used.

I also don't like retransmitting the membership report 3 times when it
may not be needed.  Though many switches can handle it, the cost of
receiving and processing what might be a large list of multicast
addresses every 200ms for 600ms doesn't seem ideal.  It also feels like
a hack. :)

If retransmission of the membership reports is a requirement for some
users, I would rather see it as a configuration option.

Maybe something like this?

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 3b16f62..b7b4a74 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -109,6 +109,7 @@ static char *arp_validate;
 static char *fail_over_mac;
 static int all_slaves_active = 0;
 static struct bond_params bonding_defaults;
+static int resend_igmp = BOND_DEFAULT_RESEND_IGMP;
 
 module_param(max_bonds, int, 0);
 MODULE_PARM_DESC(max_bonds, "Max number of bonded devices");
@@ -163,6 +164,8 @@ module_param(all_slaves_active, int, 0);
 MODULE_PARM_DESC(all_slaves_active, "Keep all frames received on an interface"
 				     "by setting active flag for all slaves.  "
 				     "0 for never (default), 1 for always.");
+module_param(resend_igmp, int, 0);
+MODULE_PARM_DESC(resend_igmp, "Number of IGMP membership reports to send on link failure");
 
 /*----------------------------- Global variables ----------------------------*/
 
@@ -865,18 +868,14 @@ static void bond_mc_del(struct bonding *bond, void *addr)
 }
 
 
-/*
- * Retrieve the list of registered multicast addresses for the bonding
- * device and retransmit an IGMP JOIN request to the current active
- * slave.
- */
-static void bond_resend_igmp_join_requests(struct bonding *bond)
+static void __bond_resend_igmp_join_requests(struct net_device *dev)
 {
 	struct in_device *in_dev;
 	struct ip_mc_list *im;
 
 	rcu_read_lock();
-	in_dev = __in_dev_get_rcu(bond->dev);
+
+	in_dev = __in_dev_get_rcu(dev);
 	if (in_dev) {
 		for (im = in_dev->mc_list; im; im = im->next)
 			ip_mc_rejoin_group(im);
@@ -885,6 +884,48 @@ static void bond_resend_igmp_join_requests(struct bonding *bond)
 	rcu_read_unlock();
 }
 
+
+/*
+ * Retrieve the list of registered multicast addresses for the bonding
+ * device and retransmit an IGMP JOIN request to the current active
+ * slave.
+ */
+static void bond_resend_igmp_join_requests(struct bonding *bond)
+{
+	struct net_device *vlan_dev;
+	struct vlan_entry *vlan;
+
+	read_lock(&bond->lock);
+	if (bond->kill_timers)
+		goto out;
+
+	/* rejoin all groups on bond device */
+	__bond_resend_igmp_join_requests(bond->dev);
+
+	/* rejoin all groups on vlan devices */
+	if (bond->vlgrp) {
+		list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
+			vlan_dev = vlan_group_get_device(bond->vlgrp,
+							 vlan->vlan_id);
+			if (vlan_dev)
+				__bond_resend_igmp_join_requests(vlan_dev);
+		}
+	}
+
+	if (--bond->igmp_retrans > 0)
+		queue_delayed_work(bond->wq, &bond->mcast_work, HZ/5);
+
+out:
+	read_unlock(&bond->lock);
+}
+
+void bond_resend_igmp_join_requests_delayed(struct work_struct *work)
+{
+	struct bonding *bond = container_of(work, struct bonding,
+					    mcast_work.work);
+	bond_resend_igmp_join_requests(bond);
+}
+
 /*
  * flush all members of flush->mc_list from device dev->mc_list
  */
@@ -944,7 +985,10 @@ static void bond_mc_swap(struct bonding *bond, struct slave *new_active,
 
 		netdev_for_each_mc_addr(ha, bond->dev)
 			dev_mc_add(new_active->dev, ha->addr);
-		bond_resend_igmp_join_requests(bond);
+
+		/* rejoin multicast groups */
+		bond->igmp_retrans = bond->params.resend_igmp;
+		queue_delayed_work(bond->wq, &bond->mcast_work, 0);
 	}
 }
 
@@ -3744,6 +3788,9 @@ static int bond_open(struct net_device *bond_dev)
 
 	bond->kill_timers = 0;
 
+	/* multicast retrans */
+	INIT_DELAYED_WORK(&bond->mcast_work, bond_resend_igmp_join_requests_delayed);
+
 	if (bond_is_lb(bond)) {
 		/* bond_alb_initialize must be called before the timer
 		 * is started.
@@ -3828,6 +3875,8 @@ static int bond_close(struct net_device *bond_dev)
 		break;
 	}
 
+	if (delayed_work_pending(&bond->mcast_work))
+		cancel_delayed_work(&bond->ad_work);
 
 	if (bond_is_lb(bond)) {
 		/* Must be called only after all
@@ -4699,6 +4748,9 @@ static void bond_work_cancel_all(struct bonding *bond)
 	if (bond->params.mode == BOND_MODE_8023AD &&
 	    delayed_work_pending(&bond->ad_work))
 		cancel_delayed_work(&bond->ad_work);
+
+	if (delayed_work_pending(&bond->mcast_work))
+		cancel_delayed_work(&bond->ad_work);
 }
 
 /*
@@ -4891,6 +4943,12 @@ static int bond_check_params(struct bond_params *params)
 		all_slaves_active = 0;
 	}
 
+	if (resend_igmp < 0 || resend_igmp > 255) {
+		pr_warning("Warning: resend_igmp (%d) should be between "
+			   "0 and 255, resetting to %d\n",
+			   resend_igmp, BOND_DEFAULT_RESEND_IGMP);
+		resend_igmp = BOND_DEFAULT_RESEND_IGMP;
+	}
 	/* reset values for TLB/ALB */
 	if ((bond_mode == BOND_MODE_TLB) ||
 	    (bond_mode == BOND_MODE_ALB)) {
@@ -5063,6 +5121,7 @@ static int bond_check_params(struct bond_params *params)
 	params->fail_over_mac = fail_over_mac_value;
 	params->tx_queues = tx_queues;
 	params->all_slaves_active = all_slaves_active;
+	params->resend_igmp = resend_igmp;
 
 	if (primary) {
 		strncpy(params->primary, primary, IFNAMSIZ);
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index c6fdd85..c49bdb0 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -136,6 +136,7 @@ struct bond_params {
 	__be32 arp_targets[BOND_MAX_ARP_TARGETS];
 	int tx_queues;
 	int all_slaves_active;
+	int resend_igmp;
 };
 
 struct bond_parm_tbl {
@@ -198,6 +199,7 @@ struct bonding {
 	s32      slave_cnt; /* never change this value outside the attach/detach wrappers */
 	rwlock_t lock;
 	rwlock_t curr_slave_lock;
+	s8       igmp_retrans;
 	s8       kill_timers;
 	s8	 send_grat_arp;
 	s8	 send_unsol_na;
@@ -223,6 +225,7 @@ struct bonding {
 	struct   delayed_work arp_work;
 	struct   delayed_work alb_work;
 	struct   delayed_work ad_work;
+	struct   delayed_work mcast_work;
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 	struct   in6_addr master_ipv6;
 #endif
diff --git a/include/linux/if_bonding.h b/include/linux/if_bonding.h
index 2c79943..d2f78b7 100644
--- a/include/linux/if_bonding.h
+++ b/include/linux/if_bonding.h
@@ -84,6 +84,9 @@
 #define BOND_DEFAULT_MAX_BONDS  1   /* Default maximum number of devices to support */
 
 #define BOND_DEFAULT_TX_QUEUES 16   /* Default number of tx queues per device */
+
+#define BOND_DEFAULT_RESEND_IGMP	1 /* Default number of IGMP membership reports
+					     to resend on link failure. */
 /* hashing types */
 #define BOND_XMIT_POLICY_LAYER2		0 /* layer 2 (MAC only), default */
 #define BOND_XMIT_POLICY_LAYER34	1 /* layer 3+4 (IP ^ (TCP || UDP)) */

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH] bonding: rejoin multicast groups on VLANs
  2010-09-29 18:44 ` [PATCH] bonding: rejoin multicast groups on VLANs Andy Gospodarek
@ 2010-09-29 19:35   ` Flavio Leitner
  2010-09-29 19:54     ` Andy Gospodarek
  0 siblings, 1 reply; 17+ messages in thread
From: Flavio Leitner @ 2010-09-29 19:35 UTC (permalink / raw)
  To: Andy Gospodarek; +Cc: netdev

On Wed, Sep 29, 2010 at 02:44:13PM -0400, Andy Gospodarek wrote:
> On Wed, Sep 29, 2010 at 04:12:24AM -0300, Flavio Leitner wrote:
> > It fixes bonding to rejoin multicast groups added
> > to VLAN devices on top of bonding when a failover
> > happens.
> > 
> > The first packet may be discarded, so the timer
> > assure that at least 3 Reports are sent.
> > 
> 
> Good find, Flavio.  Clearly the fact that multicast membership is broken
> needs to be fixed, but I would rather not see timers used at all.  We
> worked hard in the past to eliminate timers for several reasons, so I
> would rather see a workqueue used.

I noticed that the code is using workqueues now, just thought a
simple thing which may run couple times would fit perfectly with
a simple timer.


> I also don't like retransmitting the membership report 3 times when it
> may not be needed.  Though many switches can handle it, the cost of
> receiving and processing what might be a large list of multicast
> addresses every 200ms for 600ms doesn't seem ideal.  It also feels like
> a hack. :)

Definitely a parameter is much better, but I wasn't sure about
the patch approach so I was expecting a review like this and then
do the refinements needed. Better to post early, right? :)

I see your point to change the default to one membership report,
but we can't assure during a failover if everything has been
received. Also, it isn't supposed to keep failing flooding the
network, so I would rather have couple membership reports being
send than watch an important multicast application failing.

Perhaps 3 is too much, but one sounds too few to me.

what you think?

Flavio

> 
> If retransmission of the membership reports is a requirement for some
> users, I would rather see it as a configuration option.
> 
> Maybe something like this?
> 
> diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
> index 3b16f62..b7b4a74 100644
> --- a/drivers/net/bonding/bond_main.c
> +++ b/drivers/net/bonding/bond_main.c
> @@ -109,6 +109,7 @@ static char *arp_validate;
>  static char *fail_over_mac;
>  static int all_slaves_active = 0;
>  static struct bond_params bonding_defaults;
> +static int resend_igmp = BOND_DEFAULT_RESEND_IGMP;
>  
>  module_param(max_bonds, int, 0);
>  MODULE_PARM_DESC(max_bonds, "Max number of bonded devices");
> @@ -163,6 +164,8 @@ module_param(all_slaves_active, int, 0);
>  MODULE_PARM_DESC(all_slaves_active, "Keep all frames received on an interface"
>  				     "by setting active flag for all slaves.  "
>  				     "0 for never (default), 1 for always.");
> +module_param(resend_igmp, int, 0);
> +MODULE_PARM_DESC(resend_igmp, "Number of IGMP membership reports to send on link failure");
>  
>  /*----------------------------- Global variables ----------------------------*/
>  
> @@ -865,18 +868,14 @@ static void bond_mc_del(struct bonding *bond, void *addr)
>  }
>  
>  
> -/*
> - * Retrieve the list of registered multicast addresses for the bonding
> - * device and retransmit an IGMP JOIN request to the current active
> - * slave.
> - */
> -static void bond_resend_igmp_join_requests(struct bonding *bond)
> +static void __bond_resend_igmp_join_requests(struct net_device *dev)
>  {
>  	struct in_device *in_dev;
>  	struct ip_mc_list *im;
>  
>  	rcu_read_lock();
> -	in_dev = __in_dev_get_rcu(bond->dev);
> +
> +	in_dev = __in_dev_get_rcu(dev);
>  	if (in_dev) {
>  		for (im = in_dev->mc_list; im; im = im->next)
>  			ip_mc_rejoin_group(im);
> @@ -885,6 +884,48 @@ static void bond_resend_igmp_join_requests(struct bonding *bond)
>  	rcu_read_unlock();
>  }
>  
> +
> +/*
> + * Retrieve the list of registered multicast addresses for the bonding
> + * device and retransmit an IGMP JOIN request to the current active
> + * slave.
> + */
> +static void bond_resend_igmp_join_requests(struct bonding *bond)
> +{
> +	struct net_device *vlan_dev;
> +	struct vlan_entry *vlan;
> +
> +	read_lock(&bond->lock);
> +	if (bond->kill_timers)
> +		goto out;
> +
> +	/* rejoin all groups on bond device */
> +	__bond_resend_igmp_join_requests(bond->dev);
> +
> +	/* rejoin all groups on vlan devices */
> +	if (bond->vlgrp) {
> +		list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
> +			vlan_dev = vlan_group_get_device(bond->vlgrp,
> +							 vlan->vlan_id);
> +			if (vlan_dev)
> +				__bond_resend_igmp_join_requests(vlan_dev);
> +		}
> +	}
> +
> +	if (--bond->igmp_retrans > 0)
> +		queue_delayed_work(bond->wq, &bond->mcast_work, HZ/5);
> +
> +out:
> +	read_unlock(&bond->lock);
> +}
> +
> +void bond_resend_igmp_join_requests_delayed(struct work_struct *work)
> +{
> +	struct bonding *bond = container_of(work, struct bonding,
> +					    mcast_work.work);
> +	bond_resend_igmp_join_requests(bond);
> +}
> +
>  /*
>   * flush all members of flush->mc_list from device dev->mc_list
>   */
> @@ -944,7 +985,10 @@ static void bond_mc_swap(struct bonding *bond, struct slave *new_active,
>  
>  		netdev_for_each_mc_addr(ha, bond->dev)
>  			dev_mc_add(new_active->dev, ha->addr);
> -		bond_resend_igmp_join_requests(bond);
> +
> +		/* rejoin multicast groups */
> +		bond->igmp_retrans = bond->params.resend_igmp;
> +		queue_delayed_work(bond->wq, &bond->mcast_work, 0);
>  	}
>  }
>  
> @@ -3744,6 +3788,9 @@ static int bond_open(struct net_device *bond_dev)
>  
>  	bond->kill_timers = 0;
>  
> +	/* multicast retrans */
> +	INIT_DELAYED_WORK(&bond->mcast_work, bond_resend_igmp_join_requests_delayed);
> +
>  	if (bond_is_lb(bond)) {
>  		/* bond_alb_initialize must be called before the timer
>  		 * is started.
> @@ -3828,6 +3875,8 @@ static int bond_close(struct net_device *bond_dev)
>  		break;
>  	}
>  
> +	if (delayed_work_pending(&bond->mcast_work))
> +		cancel_delayed_work(&bond->ad_work);
>  
>  	if (bond_is_lb(bond)) {
>  		/* Must be called only after all
> @@ -4699,6 +4748,9 @@ static void bond_work_cancel_all(struct bonding *bond)
>  	if (bond->params.mode == BOND_MODE_8023AD &&
>  	    delayed_work_pending(&bond->ad_work))
>  		cancel_delayed_work(&bond->ad_work);
> +
> +	if (delayed_work_pending(&bond->mcast_work))
> +		cancel_delayed_work(&bond->ad_work);
>  }
>  
>  /*
> @@ -4891,6 +4943,12 @@ static int bond_check_params(struct bond_params *params)
>  		all_slaves_active = 0;
>  	}
>  
> +	if (resend_igmp < 0 || resend_igmp > 255) {
> +		pr_warning("Warning: resend_igmp (%d) should be between "
> +			   "0 and 255, resetting to %d\n",
> +			   resend_igmp, BOND_DEFAULT_RESEND_IGMP);
> +		resend_igmp = BOND_DEFAULT_RESEND_IGMP;
> +	}
>  	/* reset values for TLB/ALB */
>  	if ((bond_mode == BOND_MODE_TLB) ||
>  	    (bond_mode == BOND_MODE_ALB)) {
> @@ -5063,6 +5121,7 @@ static int bond_check_params(struct bond_params *params)
>  	params->fail_over_mac = fail_over_mac_value;
>  	params->tx_queues = tx_queues;
>  	params->all_slaves_active = all_slaves_active;
> +	params->resend_igmp = resend_igmp;
>  
>  	if (primary) {
>  		strncpy(params->primary, primary, IFNAMSIZ);
> diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
> index c6fdd85..c49bdb0 100644
> --- a/drivers/net/bonding/bonding.h
> +++ b/drivers/net/bonding/bonding.h
> @@ -136,6 +136,7 @@ struct bond_params {
>  	__be32 arp_targets[BOND_MAX_ARP_TARGETS];
>  	int tx_queues;
>  	int all_slaves_active;
> +	int resend_igmp;
>  };
>  
>  struct bond_parm_tbl {
> @@ -198,6 +199,7 @@ struct bonding {
>  	s32      slave_cnt; /* never change this value outside the attach/detach wrappers */
>  	rwlock_t lock;
>  	rwlock_t curr_slave_lock;
> +	s8       igmp_retrans;
>  	s8       kill_timers;
>  	s8	 send_grat_arp;
>  	s8	 send_unsol_na;
> @@ -223,6 +225,7 @@ struct bonding {
>  	struct   delayed_work arp_work;
>  	struct   delayed_work alb_work;
>  	struct   delayed_work ad_work;
> +	struct   delayed_work mcast_work;
>  #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
>  	struct   in6_addr master_ipv6;
>  #endif
> diff --git a/include/linux/if_bonding.h b/include/linux/if_bonding.h
> index 2c79943..d2f78b7 100644
> --- a/include/linux/if_bonding.h
> +++ b/include/linux/if_bonding.h
> @@ -84,6 +84,9 @@
>  #define BOND_DEFAULT_MAX_BONDS  1   /* Default maximum number of devices to support */
>  
>  #define BOND_DEFAULT_TX_QUEUES 16   /* Default number of tx queues per device */
> +
> +#define BOND_DEFAULT_RESEND_IGMP	1 /* Default number of IGMP membership reports
> +					     to resend on link failure. */
>  /* hashing types */
>  #define BOND_XMIT_POLICY_LAYER2		0 /* layer 2 (MAC only), default */
>  #define BOND_XMIT_POLICY_LAYER34	1 /* layer 3+4 (IP ^ (TCP || UDP)) */

-- 
Flavio

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] bonding: rejoin multicast groups on VLANs
  2010-09-29 19:35   ` Flavio Leitner
@ 2010-09-29 19:54     ` Andy Gospodarek
  2010-09-29 20:38       ` Flavio Leitner
  0 siblings, 1 reply; 17+ messages in thread
From: Andy Gospodarek @ 2010-09-29 19:54 UTC (permalink / raw)
  To: Flavio Leitner; +Cc: netdev

On Wed, Sep 29, 2010 at 04:35:39PM -0300, Flavio Leitner wrote:
> On Wed, Sep 29, 2010 at 02:44:13PM -0400, Andy Gospodarek wrote:
> > On Wed, Sep 29, 2010 at 04:12:24AM -0300, Flavio Leitner wrote:
> > > It fixes bonding to rejoin multicast groups added
> > > to VLAN devices on top of bonding when a failover
> > > happens.
> > > 
> > > The first packet may be discarded, so the timer
> > > assure that at least 3 Reports are sent.
> > > 
> > 
> > Good find, Flavio.  Clearly the fact that multicast membership is broken
> > needs to be fixed, but I would rather not see timers used at all.  We
> > worked hard in the past to eliminate timers for several reasons, so I
> > would rather see a workqueue used.
> 
> I noticed that the code is using workqueues now, just thought a
> simple thing which may run couple times would fit perfectly with
> a simple timer.
> 

Timers runs in softirq context, so I'd rather not add code that takes
locks and runs in softirq context.

> 
> > I also don't like retransmitting the membership report 3 times when it
> > may not be needed.  Though many switches can handle it, the cost of
> > receiving and processing what might be a large list of multicast
> > addresses every 200ms for 600ms doesn't seem ideal.  It also feels like
> > a hack. :)
> 
> Definitely a parameter is much better, but I wasn't sure about
> the patch approach so I was expecting a review like this and then
> do the refinements needed. Better to post early, right? :)
> 
> I see your point to change the default to one membership report,
> but we can't assure during a failover if everything has been
> received. Also, it isn't supposed to keep failing flooding the
> network, so I would rather have couple membership reports being
> send than watch an important multicast application failing.
> 
> Perhaps 3 is too much, but one sounds too few to me.
> 
> what you think?
> 

Adding a tunable parameter allows the administrator to decide how many
is enough.  I would rather keep the default at one and add the tunable
parameter (which needs to be added to bond_sysfs.c to be effective).

I have not heard loud complaints about only sending one since the code
to send retransmits of membership reports was added a few years ago, so
I'm inclined to think it is working well for most users (or no one is
using bonding).

Maybe it would be best to break this into 2 patches.  One that simply
fixes the failover code so it works with VLANs (that could be done
easily today) and another patch that can add the code to send multiple
retransmits.  Would you be willing to do that?

> Flavio
> 
> > 
> > If retransmission of the membership reports is a requirement for some
> > users, I would rather see it as a configuration option.
> > 
> > Maybe something like this?
> > 
> > diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
> > index 3b16f62..b7b4a74 100644
> > --- a/drivers/net/bonding/bond_main.c
> > +++ b/drivers/net/bonding/bond_main.c
> > @@ -109,6 +109,7 @@ static char *arp_validate;
> >  static char *fail_over_mac;
> >  static int all_slaves_active = 0;
> >  static struct bond_params bonding_defaults;
> > +static int resend_igmp = BOND_DEFAULT_RESEND_IGMP;
> >  
> >  module_param(max_bonds, int, 0);
> >  MODULE_PARM_DESC(max_bonds, "Max number of bonded devices");
> > @@ -163,6 +164,8 @@ module_param(all_slaves_active, int, 0);
> >  MODULE_PARM_DESC(all_slaves_active, "Keep all frames received on an interface"
> >  				     "by setting active flag for all slaves.  "
> >  				     "0 for never (default), 1 for always.");
> > +module_param(resend_igmp, int, 0);
> > +MODULE_PARM_DESC(resend_igmp, "Number of IGMP membership reports to send on link failure");
> >  
> >  /*----------------------------- Global variables ----------------------------*/
> >  
> > @@ -865,18 +868,14 @@ static void bond_mc_del(struct bonding *bond, void *addr)
> >  }
> >  
> >  
> > -/*
> > - * Retrieve the list of registered multicast addresses for the bonding
> > - * device and retransmit an IGMP JOIN request to the current active
> > - * slave.
> > - */
> > -static void bond_resend_igmp_join_requests(struct bonding *bond)
> > +static void __bond_resend_igmp_join_requests(struct net_device *dev)
> >  {
> >  	struct in_device *in_dev;
> >  	struct ip_mc_list *im;
> >  
> >  	rcu_read_lock();
> > -	in_dev = __in_dev_get_rcu(bond->dev);
> > +
> > +	in_dev = __in_dev_get_rcu(dev);
> >  	if (in_dev) {
> >  		for (im = in_dev->mc_list; im; im = im->next)
> >  			ip_mc_rejoin_group(im);
> > @@ -885,6 +884,48 @@ static void bond_resend_igmp_join_requests(struct bonding *bond)
> >  	rcu_read_unlock();
> >  }
> >  
> > +
> > +/*
> > + * Retrieve the list of registered multicast addresses for the bonding
> > + * device and retransmit an IGMP JOIN request to the current active
> > + * slave.
> > + */
> > +static void bond_resend_igmp_join_requests(struct bonding *bond)
> > +{
> > +	struct net_device *vlan_dev;
> > +	struct vlan_entry *vlan;
> > +
> > +	read_lock(&bond->lock);
> > +	if (bond->kill_timers)
> > +		goto out;
> > +
> > +	/* rejoin all groups on bond device */
> > +	__bond_resend_igmp_join_requests(bond->dev);
> > +
> > +	/* rejoin all groups on vlan devices */
> > +	if (bond->vlgrp) {
> > +		list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
> > +			vlan_dev = vlan_group_get_device(bond->vlgrp,
> > +							 vlan->vlan_id);
> > +			if (vlan_dev)
> > +				__bond_resend_igmp_join_requests(vlan_dev);
> > +		}
> > +	}
> > +
> > +	if (--bond->igmp_retrans > 0)
> > +		queue_delayed_work(bond->wq, &bond->mcast_work, HZ/5);
> > +
> > +out:
> > +	read_unlock(&bond->lock);
> > +}
> > +
> > +void bond_resend_igmp_join_requests_delayed(struct work_struct *work)
> > +{
> > +	struct bonding *bond = container_of(work, struct bonding,
> > +					    mcast_work.work);
> > +	bond_resend_igmp_join_requests(bond);
> > +}
> > +
> >  /*
> >   * flush all members of flush->mc_list from device dev->mc_list
> >   */
> > @@ -944,7 +985,10 @@ static void bond_mc_swap(struct bonding *bond, struct slave *new_active,
> >  
> >  		netdev_for_each_mc_addr(ha, bond->dev)
> >  			dev_mc_add(new_active->dev, ha->addr);
> > -		bond_resend_igmp_join_requests(bond);
> > +
> > +		/* rejoin multicast groups */
> > +		bond->igmp_retrans = bond->params.resend_igmp;
> > +		queue_delayed_work(bond->wq, &bond->mcast_work, 0);
> >  	}
> >  }
> >  
> > @@ -3744,6 +3788,9 @@ static int bond_open(struct net_device *bond_dev)
> >  
> >  	bond->kill_timers = 0;
> >  
> > +	/* multicast retrans */
> > +	INIT_DELAYED_WORK(&bond->mcast_work, bond_resend_igmp_join_requests_delayed);
> > +
> >  	if (bond_is_lb(bond)) {
> >  		/* bond_alb_initialize must be called before the timer
> >  		 * is started.
> > @@ -3828,6 +3875,8 @@ static int bond_close(struct net_device *bond_dev)
> >  		break;
> >  	}
> >  
> > +	if (delayed_work_pending(&bond->mcast_work))
> > +		cancel_delayed_work(&bond->ad_work);
> >  
> >  	if (bond_is_lb(bond)) {
> >  		/* Must be called only after all
> > @@ -4699,6 +4748,9 @@ static void bond_work_cancel_all(struct bonding *bond)
> >  	if (bond->params.mode == BOND_MODE_8023AD &&
> >  	    delayed_work_pending(&bond->ad_work))
> >  		cancel_delayed_work(&bond->ad_work);
> > +
> > +	if (delayed_work_pending(&bond->mcast_work))
> > +		cancel_delayed_work(&bond->ad_work);
> >  }
> >  
> >  /*
> > @@ -4891,6 +4943,12 @@ static int bond_check_params(struct bond_params *params)
> >  		all_slaves_active = 0;
> >  	}
> >  
> > +	if (resend_igmp < 0 || resend_igmp > 255) {
> > +		pr_warning("Warning: resend_igmp (%d) should be between "
> > +			   "0 and 255, resetting to %d\n",
> > +			   resend_igmp, BOND_DEFAULT_RESEND_IGMP);
> > +		resend_igmp = BOND_DEFAULT_RESEND_IGMP;
> > +	}
> >  	/* reset values for TLB/ALB */
> >  	if ((bond_mode == BOND_MODE_TLB) ||
> >  	    (bond_mode == BOND_MODE_ALB)) {
> > @@ -5063,6 +5121,7 @@ static int bond_check_params(struct bond_params *params)
> >  	params->fail_over_mac = fail_over_mac_value;
> >  	params->tx_queues = tx_queues;
> >  	params->all_slaves_active = all_slaves_active;
> > +	params->resend_igmp = resend_igmp;
> >  
> >  	if (primary) {
> >  		strncpy(params->primary, primary, IFNAMSIZ);
> > diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
> > index c6fdd85..c49bdb0 100644
> > --- a/drivers/net/bonding/bonding.h
> > +++ b/drivers/net/bonding/bonding.h
> > @@ -136,6 +136,7 @@ struct bond_params {
> >  	__be32 arp_targets[BOND_MAX_ARP_TARGETS];
> >  	int tx_queues;
> >  	int all_slaves_active;
> > +	int resend_igmp;
> >  };
> >  
> >  struct bond_parm_tbl {
> > @@ -198,6 +199,7 @@ struct bonding {
> >  	s32      slave_cnt; /* never change this value outside the attach/detach wrappers */
> >  	rwlock_t lock;
> >  	rwlock_t curr_slave_lock;
> > +	s8       igmp_retrans;
> >  	s8       kill_timers;
> >  	s8	 send_grat_arp;
> >  	s8	 send_unsol_na;
> > @@ -223,6 +225,7 @@ struct bonding {
> >  	struct   delayed_work arp_work;
> >  	struct   delayed_work alb_work;
> >  	struct   delayed_work ad_work;
> > +	struct   delayed_work mcast_work;
> >  #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
> >  	struct   in6_addr master_ipv6;
> >  #endif
> > diff --git a/include/linux/if_bonding.h b/include/linux/if_bonding.h
> > index 2c79943..d2f78b7 100644
> > --- a/include/linux/if_bonding.h
> > +++ b/include/linux/if_bonding.h
> > @@ -84,6 +84,9 @@
> >  #define BOND_DEFAULT_MAX_BONDS  1   /* Default maximum number of devices to support */
> >  
> >  #define BOND_DEFAULT_TX_QUEUES 16   /* Default number of tx queues per device */
> > +
> > +#define BOND_DEFAULT_RESEND_IGMP	1 /* Default number of IGMP membership reports
> > +					     to resend on link failure. */
> >  /* hashing types */
> >  #define BOND_XMIT_POLICY_LAYER2		0 /* layer 2 (MAC only), default */
> >  #define BOND_XMIT_POLICY_LAYER34	1 /* layer 3+4 (IP ^ (TCP || UDP)) */
> 
> -- 
> Flavio
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] bonding: rejoin multicast groups on VLANs
  2010-09-29 19:54     ` Andy Gospodarek
@ 2010-09-29 20:38       ` Flavio Leitner
  0 siblings, 0 replies; 17+ messages in thread
From: Flavio Leitner @ 2010-09-29 20:38 UTC (permalink / raw)
  To: Andy Gospodarek; +Cc: netdev

On Wed, Sep 29, 2010 at 03:54:11PM -0400, Andy Gospodarek wrote:
> On Wed, Sep 29, 2010 at 04:35:39PM -0300, Flavio Leitner wrote:
> > On Wed, Sep 29, 2010 at 02:44:13PM -0400, Andy Gospodarek wrote:
> > > On Wed, Sep 29, 2010 at 04:12:24AM -0300, Flavio Leitner wrote:
> > > > It fixes bonding to rejoin multicast groups added
> > > > to VLAN devices on top of bonding when a failover
> > > > happens.
> > > > 
> > > > The first packet may be discarded, so the timer
> > > > assure that at least 3 Reports are sent.
> > > > 
> > > 
> > > Good find, Flavio.  Clearly the fact that multicast membership is broken
> > > needs to be fixed, but I would rather not see timers used at all.  We
> > > worked hard in the past to eliminate timers for several reasons, so I
> > > would rather see a workqueue used.
> > 
> > I noticed that the code is using workqueues now, just thought a
> > simple thing which may run couple times would fit perfectly with
> > a simple timer.
> > 
> 
> Timers runs in softirq context, so I'd rather not add code that takes
> locks and runs in softirq context.
> 
> > 
> > > I also don't like retransmitting the membership report 3 times when it
> > > may not be needed.  Though many switches can handle it, the cost of
> > > receiving and processing what might be a large list of multicast
> > > addresses every 200ms for 600ms doesn't seem ideal.  It also feels like
> > > a hack. :)
> > 
> > Definitely a parameter is much better, but I wasn't sure about
> > the patch approach so I was expecting a review like this and then
> > do the refinements needed. Better to post early, right? :)
> > 
> > I see your point to change the default to one membership report,
> > but we can't assure during a failover if everything has been
> > received. Also, it isn't supposed to keep failing flooding the
> > network, so I would rather have couple membership reports being
> > send than watch an important multicast application failing.
> > 
> > Perhaps 3 is too much, but one sounds too few to me.
> > 
> > what you think?
> > 
> 
> Adding a tunable parameter allows the administrator to decide how many
> is enough.  I would rather keep the default at one and add the tunable
> parameter (which needs to be added to bond_sysfs.c to be effective).
> 
> I have not heard loud complaints about only sending one since the code
> to send retransmits of membership reports was added a few years ago, so
> I'm inclined to think it is working well for most users (or no one is
> using bonding).
> 
> Maybe it would be best to break this into 2 patches.  One that simply
> fixes the failover code so it works with VLANs (that could be done
> easily today) and another patch that can add the code to send multiple
> retransmits.  Would you be willing to do that?

Sure, I can do it and then start another testing session here.

-- 
Flavio

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH v2] bonding: rejoin multicast groups on VLANs
  2010-09-29 13:17 ` Flavio Leitner
@ 2010-09-30 20:45   ` Flavio Leitner
  2010-10-04 13:24     ` Andy Gospodarek
  2010-09-30 20:46   ` [PATCH] " Flavio Leitner
  1 sibling, 1 reply; 17+ messages in thread
From: Flavio Leitner @ 2010-09-30 20:45 UTC (permalink / raw)
  To: netdev; +Cc: Andy Gospodarek, bonding-devel, Jay Vosburgh, Flavio Leitner

It fixes bonding to rejoin multicast groups added
to VLAN devices on top of bonding when a failover
happens.

Signed-off-by: Flavio Leitner <fleitner@redhat.com>
---
 drivers/net/bonding/bond_main.c |   61 +++++++++++++++++++++++++++++++++-----
 drivers/net/bonding/bonding.h   |    1 +
 2 files changed, 54 insertions(+), 8 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 3b16f62..1016c09 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -865,18 +865,14 @@ static void bond_mc_del(struct bonding *bond, void *addr)
 }
 
 
-/*
- * Retrieve the list of registered multicast addresses for the bonding
- * device and retransmit an IGMP JOIN request to the current active
- * slave.
- */
-static void bond_resend_igmp_join_requests(struct bonding *bond)
+static void __bond_resend_igmp_join_requests(struct net_device *dev)
 {
 	struct in_device *in_dev;
 	struct ip_mc_list *im;
 
 	rcu_read_lock();
-	in_dev = __in_dev_get_rcu(bond->dev);
+
+	in_dev = __in_dev_get_rcu(dev);
 	if (in_dev) {
 		for (im = in_dev->mc_list; im; im = im->next)
 			ip_mc_rejoin_group(im);
@@ -885,6 +881,45 @@ static void bond_resend_igmp_join_requests(struct bonding *bond)
 	rcu_read_unlock();
 }
 
+
+/*
+ * Retrieve the list of registered multicast addresses for the bonding
+ * device and retransmit an IGMP JOIN request to the current active
+ * slave.
+ */
+static void bond_resend_igmp_join_requests(struct bonding *bond)
+{
+	struct net_device *vlan_dev;
+	struct vlan_entry *vlan;
+
+	read_lock(&bond->lock);
+	if (bond->kill_timers)
+		goto out;
+
+	/* rejoin all groups on bond device */
+	__bond_resend_igmp_join_requests(bond->dev);
+
+	/* rejoin all groups on vlan devices */
+	if (bond->vlgrp) {
+		list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
+			vlan_dev = vlan_group_get_device(bond->vlgrp,
+							 vlan->vlan_id);
+			if (vlan_dev)
+				__bond_resend_igmp_join_requests(vlan_dev);
+		}
+	}
+
+out:
+	read_unlock(&bond->lock);
+}
+
+void bond_resend_igmp_join_requests_delayed(struct work_struct *work)
+{
+	struct bonding *bond = container_of(work, struct bonding,
+					    mcast_work.work);
+	bond_resend_igmp_join_requests(bond);
+}
+
 /*
  * flush all members of flush->mc_list from device dev->mc_list
  */
@@ -944,7 +979,9 @@ static void bond_mc_swap(struct bonding *bond, struct slave *new_active,
 
 		netdev_for_each_mc_addr(ha, bond->dev)
 			dev_mc_add(new_active->dev, ha->addr);
-		bond_resend_igmp_join_requests(bond);
+
+		/* rejoin multicast groups */
+		queue_delayed_work(bond->wq, &bond->mcast_work, 0);
 	}
 }
 
@@ -3744,6 +3781,9 @@ static int bond_open(struct net_device *bond_dev)
 
 	bond->kill_timers = 0;
 
+	/* multicast retrans */
+	INIT_DELAYED_WORK(&bond->mcast_work, bond_resend_igmp_join_requests_delayed);
+
 	if (bond_is_lb(bond)) {
 		/* bond_alb_initialize must be called before the timer
 		 * is started.
@@ -3828,6 +3868,8 @@ static int bond_close(struct net_device *bond_dev)
 		break;
 	}
 
+	if (delayed_work_pending(&bond->mcast_work))
+		cancel_delayed_work(&bond->mcast_work);
 
 	if (bond_is_lb(bond)) {
 		/* Must be called only after all
@@ -4699,6 +4741,9 @@ static void bond_work_cancel_all(struct bonding *bond)
 	if (bond->params.mode == BOND_MODE_8023AD &&
 	    delayed_work_pending(&bond->ad_work))
 		cancel_delayed_work(&bond->ad_work);
+
+	if (delayed_work_pending(&bond->mcast_work))
+		cancel_delayed_work(&bond->mcast_work);
 }
 
 /*
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index c6fdd85..308ed10 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -223,6 +223,7 @@ struct bonding {
 	struct   delayed_work arp_work;
 	struct   delayed_work alb_work;
 	struct   delayed_work ad_work;
+	struct   delayed_work mcast_work;
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 	struct   in6_addr master_ipv6;
 #endif
-- 
1.7.3


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH] bonding: add retransmit membership reports tunable
  2010-09-29 13:17 ` Flavio Leitner
  2010-09-30 20:45   ` [PATCH v2] " Flavio Leitner
@ 2010-09-30 20:46   ` Flavio Leitner
  1 sibling, 0 replies; 17+ messages in thread
From: Flavio Leitner @ 2010-09-30 20:46 UTC (permalink / raw)
  To: netdev; +Cc: Andy Gospodarek, bonding-devel, Jay Vosburgh, Flavio Leitner

Allow admins to adjust the number of multicast membership
reports sent on a link failure.

Signed-off-by: Flavio Leitner <fleitner@redhat.com>
---
 drivers/net/bonding/bond_main.c  |   14 ++++++++++++
 drivers/net/bonding/bond_sysfs.c |   44 ++++++++++++++++++++++++++++++++++++++
 drivers/net/bonding/bonding.h    |    2 +
 include/linux/if_bonding.h       |    3 ++
 4 files changed, 63 insertions(+), 0 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 1016c09..60bfd33 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -109,6 +109,7 @@ static char *arp_validate;
 static char *fail_over_mac;
 static int all_slaves_active = 0;
 static struct bond_params bonding_defaults;
+static int resend_igmp = BOND_DEFAULT_RESEND_IGMP;
 
 module_param(max_bonds, int, 0);
 MODULE_PARM_DESC(max_bonds, "Max number of bonded devices");
@@ -163,6 +164,8 @@ module_param(all_slaves_active, int, 0);
 MODULE_PARM_DESC(all_slaves_active, "Keep all frames received on an interface"
 				     "by setting active flag for all slaves.  "
 				     "0 for never (default), 1 for always.");
+module_param(resend_igmp, int, 0);
+MODULE_PARM_DESC(resend_igmp, "Number of IGMP membership reports to send on link failure");
 
 /*----------------------------- Global variables ----------------------------*/
 
@@ -909,6 +912,9 @@ static void bond_resend_igmp_join_requests(struct bonding *bond)
 		}
 	}
 
+	if (--bond->igmp_retrans > 0)
+		queue_delayed_work(bond->wq, &bond->mcast_work, HZ/5);
+
 out:
 	read_unlock(&bond->lock);
 }
@@ -981,6 +987,7 @@ static void bond_mc_swap(struct bonding *bond, struct slave *new_active,
 			dev_mc_add(new_active->dev, ha->addr);
 
 		/* rejoin multicast groups */
+		bond->igmp_retrans = bond->params.resend_igmp;
 		queue_delayed_work(bond->wq, &bond->mcast_work, 0);
 	}
 }
@@ -4936,6 +4943,12 @@ static int bond_check_params(struct bond_params *params)
 		all_slaves_active = 0;
 	}
 
+	if (resend_igmp < 0 || resend_igmp > 255) {
+		pr_warning("Warning: resend_igmp (%d) should be between "
+			   "0 and 255, resetting to %d\n",
+			   resend_igmp, BOND_DEFAULT_RESEND_IGMP);
+		resend_igmp = BOND_DEFAULT_RESEND_IGMP;
+	}
 	/* reset values for TLB/ALB */
 	if ((bond_mode == BOND_MODE_TLB) ||
 	    (bond_mode == BOND_MODE_ALB)) {
@@ -5108,6 +5121,7 @@ static int bond_check_params(struct bond_params *params)
 	params->fail_over_mac = fail_over_mac_value;
 	params->tx_queues = tx_queues;
 	params->all_slaves_active = all_slaves_active;
+	params->resend_igmp = resend_igmp;
 
 	if (primary) {
 		strncpy(params->primary, primary, IFNAMSIZ);
diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
index c311aed..a676a28 100644
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -1592,6 +1592,49 @@ out:
 static DEVICE_ATTR(all_slaves_active, S_IRUGO | S_IWUSR,
 		   bonding_show_slaves_active, bonding_store_slaves_active);
 
+/*
+ * Show and set the number of IGMP membership reports to send on link failure
+ */
+static ssize_t bonding_show_resend_igmp(struct device *d,
+					 struct device_attribute *attr,
+					 char *buf)
+{
+	struct bonding *bond = to_bond(d);
+
+	return sprintf(buf, "%d\n", bond->params.resend_igmp);
+}
+
+static ssize_t bonding_store_resend_igmp(struct device *d,
+					  struct device_attribute *attr,
+					  const char *buf, size_t count)
+{
+	int new_value, ret = count;
+	struct bonding *bond = to_bond(d);
+
+	if (sscanf(buf, "%d", &new_value) != 1) {
+		pr_err("%s: no resend_igmp value specified.\n",
+		       bond->dev->name);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (new_value < 0) {
+		pr_err("%s: Invalid resend_igmp value %d not in range 1-%d; rejected.\n",
+		       bond->dev->name, new_value, INT_MAX);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	pr_info("%s: Setting resend_igmp to %d.\n",
+		bond->dev->name, new_value);
+	bond->params.resend_igmp = new_value;
+out:
+	return ret;
+}
+
+static DEVICE_ATTR(resend_igmp, S_IRUGO | S_IWUSR,
+		   bonding_show_resend_igmp, bonding_store_resend_igmp);
+
 static struct attribute *per_bond_attrs[] = {
 	&dev_attr_slaves.attr,
 	&dev_attr_mode.attr,
@@ -1619,6 +1662,7 @@ static struct attribute *per_bond_attrs[] = {
 	&dev_attr_ad_partner_mac.attr,
 	&dev_attr_queue_id.attr,
 	&dev_attr_all_slaves_active.attr,
+	&dev_attr_resend_igmp.attr,
 	NULL,
 };
 
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index 308ed10..c49bdb0 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -136,6 +136,7 @@ struct bond_params {
 	__be32 arp_targets[BOND_MAX_ARP_TARGETS];
 	int tx_queues;
 	int all_slaves_active;
+	int resend_igmp;
 };
 
 struct bond_parm_tbl {
@@ -198,6 +199,7 @@ struct bonding {
 	s32      slave_cnt; /* never change this value outside the attach/detach wrappers */
 	rwlock_t lock;
 	rwlock_t curr_slave_lock;
+	s8       igmp_retrans;
 	s8       kill_timers;
 	s8	 send_grat_arp;
 	s8	 send_unsol_na;
diff --git a/include/linux/if_bonding.h b/include/linux/if_bonding.h
index 2c799437..d2f78b7 100644
--- a/include/linux/if_bonding.h
+++ b/include/linux/if_bonding.h
@@ -84,6 +84,9 @@
 #define BOND_DEFAULT_MAX_BONDS  1   /* Default maximum number of devices to support */
 
 #define BOND_DEFAULT_TX_QUEUES 16   /* Default number of tx queues per device */
+
+#define BOND_DEFAULT_RESEND_IGMP	1 /* Default number of IGMP membership reports
+					     to resend on link failure. */
 /* hashing types */
 #define BOND_XMIT_POLICY_LAYER2		0 /* layer 2 (MAC only), default */
 #define BOND_XMIT_POLICY_LAYER34	1 /* layer 3+4 (IP ^ (TCP || UDP)) */
-- 
1.7.3


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH v2] bonding: rejoin multicast groups on VLANs
  2010-09-30 20:45   ` [PATCH v2] " Flavio Leitner
@ 2010-10-04 13:24     ` Andy Gospodarek
  2010-10-05 22:07       ` Flavio Leitner
  0 siblings, 1 reply; 17+ messages in thread
From: Andy Gospodarek @ 2010-10-04 13:24 UTC (permalink / raw)
  To: Flavio Leitner; +Cc: netdev, Andy Gospodarek, bonding-devel, Jay Vosburgh

On Thu, Sep 30, 2010 at 05:45:24PM -0300, Flavio Leitner wrote:
> It fixes bonding to rejoin multicast groups added
> to VLAN devices on top of bonding when a failover
> happens.
> 
> Signed-off-by: Flavio Leitner <fleitner@redhat.com>
> ---
>  drivers/net/bonding/bond_main.c |   61 +++++++++++++++++++++++++++++++++-----
>  drivers/net/bonding/bonding.h   |    1 +
>  2 files changed, 54 insertions(+), 8 deletions(-)
> 
[...]
> @@ -944,7 +979,9 @@ static void bond_mc_swap(struct bonding *bond, struct slave *new_active,
>  
>  		netdev_for_each_mc_addr(ha, bond->dev)
>  			dev_mc_add(new_active->dev, ha->addr);
> -		bond_resend_igmp_join_requests(bond);
> +
> +		/* rejoin multicast groups */
> +		queue_delayed_work(bond->wq, &bond->mcast_work, 0);
>  	}
>  }
>  

I was hoping that one patch would just make the changes so
retransmission was supported on VLANs and the second patch would queue
the work and add the tunable for multiple retransmissions, but I guess
it wasn't clear enough.

I felt like that would divide the patches up into the bug-fix (VLANs +
multicast not working) and the new feature (multiple retransmissions
from the workqueue).

I will test these out this morning and make sure things look good.


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2] bonding: rejoin multicast groups on VLANs
  2010-10-04 13:24     ` Andy Gospodarek
@ 2010-10-05 22:07       ` Flavio Leitner
  2010-10-06  0:23         ` [PATCH 1/3] " Flavio Leitner
                           ` (2 more replies)
  0 siblings, 3 replies; 17+ messages in thread
From: Flavio Leitner @ 2010-10-05 22:07 UTC (permalink / raw)
  To: Andy Gospodarek; +Cc: netdev, bonding-devel, Jay Vosburgh

On Mon, Oct 04, 2010 at 09:24:10AM -0400, Andy Gospodarek wrote:
> On Thu, Sep 30, 2010 at 05:45:24PM -0300, Flavio Leitner wrote:
> > It fixes bonding to rejoin multicast groups added
> > to VLAN devices on top of bonding when a failover
> > happens.
> > 
> > Signed-off-by: Flavio Leitner <fleitner@redhat.com>
> > ---
> >  drivers/net/bonding/bond_main.c |   61 +++++++++++++++++++++++++++++++++-----
> >  drivers/net/bonding/bonding.h   |    1 +
> >  2 files changed, 54 insertions(+), 8 deletions(-)
> > 
> [...]
> > @@ -944,7 +979,9 @@ static void bond_mc_swap(struct bonding *bond, struct slave *new_active,
> >  
> >  		netdev_for_each_mc_addr(ha, bond->dev)
> >  			dev_mc_add(new_active->dev, ha->addr);
> > -		bond_resend_igmp_join_requests(bond);
> > +
> > +		/* rejoin multicast groups */
> > +		queue_delayed_work(bond->wq, &bond->mcast_work, 0);
> >  	}
> >  }
> >  
> 
> I was hoping that one patch would just make the changes so
> retransmission was supported on VLANs and the second patch would queue
> the work and add the tunable for multiple retransmissions, but I guess
> it wasn't clear enough.
> 
> I felt like that would divide the patches up into the bug-fix (VLANs +
> multicast not working) and the new feature (multiple retransmissions
> from the workqueue).

I'm not seeing how to that because the first patch changes to send
the membership directly and not using timers anymore, so the call
trace usually is:

 write_lock_bh(&bond->curr_slave_lock); <-- hold lock
 bond_select_active_slave() 
  ->bond_change_active_slave()
     ->bond_mc_swap()
        ->tx membership reports
 

then trying to send the IGMP packet directly, it will end up at:
  bond_start_xmit()
   ->bond_xmit_activebackup() (mode=1, for example)
      ->read_lock(&bond->lock);
      ->read_lock(&bond->curr_slave_lock); <-- deadlock


> I will test these out this morning and make sure things look good.

I have a better patch sequence which I'm planning post replying to
this thread as soon as I finish up testing them. It is still using
workqueue though.

--
Flavio

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH 1/3] bonding: rejoin multicast groups on VLANs
  2010-10-05 22:07       ` Flavio Leitner
@ 2010-10-06  0:23         ` Flavio Leitner
  2010-10-06  3:28           ` David Miller
  2010-10-06  0:23         ` [PATCH 2/3] bonding: fix to rejoin multicast groups immediately Flavio Leitner
  2010-10-06  0:23         ` [PATCH 3/3] bonding: add retransmit membership reports tunable Flavio Leitner
  2 siblings, 1 reply; 17+ messages in thread
From: Flavio Leitner @ 2010-10-06  0:23 UTC (permalink / raw)
  To: netdev
  Cc: bonding-devel, Jay Vosburgh, Andy Gospodarek, David Miller,
	Flavio Leitner

During a failover, the IGMP membership is sent to update
the switch restoring the traffic, but it misses groups added
to VLAN devices running on top of bonding devices.

This patch changes it to iterate over all VLAN devices
on top of it sending IGMP memberships too.

Signed-off-by: Flavio Leitner <fleitner@redhat.com>
---
 drivers/net/bonding/bond_main.c |   60 +++++++++++++++++++++++++++++++-------
 drivers/net/bonding/bonding.h   |    1 +
 2 files changed, 50 insertions(+), 11 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 3b16f62..6554b47 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -865,18 +865,13 @@ static void bond_mc_del(struct bonding *bond, void *addr)
 }
 
 
-/*
- * Retrieve the list of registered multicast addresses for the bonding
- * device and retransmit an IGMP JOIN request to the current active
- * slave.
- */
-static void bond_resend_igmp_join_requests(struct bonding *bond)
+static void __bond_resend_igmp_join_requests(struct net_device *dev)
 {
 	struct in_device *in_dev;
 	struct ip_mc_list *im;
 
 	rcu_read_lock();
-	in_dev = __in_dev_get_rcu(bond->dev);
+	in_dev = __in_dev_get_rcu(dev);
 	if (in_dev) {
 		for (im = in_dev->mc_list; im; im = im->next)
 			ip_mc_rejoin_group(im);
@@ -886,6 +881,41 @@ static void bond_resend_igmp_join_requests(struct bonding *bond)
 }
 
 /*
+ * Retrieve the list of registered multicast addresses for the bonding
+ * device and retransmit an IGMP JOIN request to the current active
+ * slave.
+ */
+static void bond_resend_igmp_join_requests(struct bonding *bond)
+{
+	struct net_device *vlan_dev;
+	struct vlan_entry *vlan;
+
+	read_lock(&bond->lock);
+
+	/* rejoin all groups on bond device */
+	__bond_resend_igmp_join_requests(bond->dev);
+
+	/* rejoin all groups on vlan devices */
+	if (bond->vlgrp) {
+		list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
+			vlan_dev = vlan_group_get_device(bond->vlgrp,
+							 vlan->vlan_id);
+			if (vlan_dev)
+				__bond_resend_igmp_join_requests(vlan_dev);
+		}
+	}
+
+	read_unlock(&bond->lock);
+}
+
+void bond_resend_igmp_join_requests_delayed(struct work_struct *work)
+{
+	struct bonding *bond = container_of(work, struct bonding,
+							mcast_work.work);
+	bond_resend_igmp_join_requests(bond);
+}
+
+/*
  * flush all members of flush->mc_list from device dev->mc_list
  */
 static void bond_mc_list_flush(struct net_device *bond_dev,
@@ -944,7 +974,6 @@ static void bond_mc_swap(struct bonding *bond, struct slave *new_active,
 
 		netdev_for_each_mc_addr(ha, bond->dev)
 			dev_mc_add(new_active->dev, ha->addr);
-		bond_resend_igmp_join_requests(bond);
 	}
 }
 
@@ -1180,9 +1209,11 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
 		}
 	}
 
-	/* resend IGMP joins since all were sent on curr_active_slave */
-	if (bond->params.mode == BOND_MODE_ROUNDROBIN) {
-		bond_resend_igmp_join_requests(bond);
+	/* resend IGMP joins since active slave has changed or
+	 * all were sent on curr_active_slave */
+	if ((USES_PRIMARY(bond->params.mode) && new_active) ||
+	    bond->params.mode == BOND_MODE_ROUNDROBIN) {
+		queue_delayed_work(bond->wq, &bond->mcast_work, 0);
 	}
 }
 
@@ -3744,6 +3775,8 @@ static int bond_open(struct net_device *bond_dev)
 
 	bond->kill_timers = 0;
 
+	INIT_DELAYED_WORK(&bond->mcast_work, bond_resend_igmp_join_requests_delayed);
+
 	if (bond_is_lb(bond)) {
 		/* bond_alb_initialize must be called before the timer
 		 * is started.
@@ -3828,6 +3861,8 @@ static int bond_close(struct net_device *bond_dev)
 		break;
 	}
 
+	if (delayed_work_pending(&bond->mcast_work))
+		cancel_delayed_work(&bond->mcast_work);
 
 	if (bond_is_lb(bond)) {
 		/* Must be called only after all
@@ -4699,6 +4734,9 @@ static void bond_work_cancel_all(struct bonding *bond)
 	if (bond->params.mode == BOND_MODE_8023AD &&
 	    delayed_work_pending(&bond->ad_work))
 		cancel_delayed_work(&bond->ad_work);
+
+	if (delayed_work_pending(&bond->mcast_work))
+		cancel_delayed_work(&bond->mcast_work);
 }
 
 /*
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index c6fdd85..308ed10 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -223,6 +223,7 @@ struct bonding {
 	struct   delayed_work arp_work;
 	struct   delayed_work alb_work;
 	struct   delayed_work ad_work;
+	struct   delayed_work mcast_work;
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 	struct   in6_addr master_ipv6;
 #endif
-- 
1.7.0.1


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 2/3] bonding: fix to rejoin multicast groups immediately
  2010-10-05 22:07       ` Flavio Leitner
  2010-10-06  0:23         ` [PATCH 1/3] " Flavio Leitner
@ 2010-10-06  0:23         ` Flavio Leitner
  2010-10-06  3:28           ` David Miller
  2010-10-06  0:23         ` [PATCH 3/3] bonding: add retransmit membership reports tunable Flavio Leitner
  2 siblings, 1 reply; 17+ messages in thread
From: Flavio Leitner @ 2010-10-06  0:23 UTC (permalink / raw)
  To: netdev
  Cc: bonding-devel, Jay Vosburgh, Andy Gospodarek, David Miller,
	Flavio Leitner

The IGMP specs states that if the system receives a
membership report, it shouldn't send another for the
next minute. However, if a link failure happens right
after that, the backup slave and the switch connected
to this slave will not know about the multicast and
the traffic will hang for about a minute.

This patch fixes it to rejoin multicast groups immediately
after a failover restoring the multicast traffic.

Signed-off-by: Flavio Leitner <fleitner@redhat.com>
---
 net/ipv4/igmp.c |   16 ++++++++--------
 1 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 2a4bb76..25f3396 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1269,14 +1269,14 @@ void ip_mc_rejoin_group(struct ip_mc_list *im)
 	if (im->multiaddr == IGMP_ALL_HOSTS)
 		return;
 
-	if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev)) {
-		igmp_mod_timer(im, IGMP_Initial_Report_Delay);
-		return;
-	}
-	/* else, v3 */
-	im->crcount = in_dev->mr_qrv ? in_dev->mr_qrv :
-		IGMP_Unsolicited_Report_Count;
-	igmp_ifc_event(in_dev);
+	/* a failover is happening and switches
+	 * must be notified immediately */
+	if (IGMP_V1_SEEN(in_dev))
+		igmp_send_report(in_dev, im, IGMP_HOST_MEMBERSHIP_REPORT);
+	else if (IGMP_V2_SEEN(in_dev))
+		igmp_send_report(in_dev, im, IGMPV2_HOST_MEMBERSHIP_REPORT);
+	else
+		igmp_send_report(in_dev, im, IGMPV3_HOST_MEMBERSHIP_REPORT);
 #endif
 }
 EXPORT_SYMBOL(ip_mc_rejoin_group);
-- 
1.7.0.1


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 3/3] bonding: add retransmit membership reports tunable
  2010-10-05 22:07       ` Flavio Leitner
  2010-10-06  0:23         ` [PATCH 1/3] " Flavio Leitner
  2010-10-06  0:23         ` [PATCH 2/3] bonding: fix to rejoin multicast groups immediately Flavio Leitner
@ 2010-10-06  0:23         ` Flavio Leitner
  2010-10-06  3:29           ` David Miller
  2 siblings, 1 reply; 17+ messages in thread
From: Flavio Leitner @ 2010-10-06  0:23 UTC (permalink / raw)
  To: netdev
  Cc: bonding-devel, Jay Vosburgh, Andy Gospodarek, David Miller,
	Flavio Leitner

Allow sysadmins to configure the number of multicast
membership report sent on a link failure event.

Signed-off-by: Flavio Leitner <fleitner@redhat.com>
---
 Documentation/networking/bonding.txt |    8 ++++++
 drivers/net/bonding/bond_main.c      |   15 +++++++++++
 drivers/net/bonding/bond_sysfs.c     |   44 ++++++++++++++++++++++++++++++++++
 drivers/net/bonding/bonding.h        |    2 +
 include/linux/if_bonding.h           |    3 ++
 5 files changed, 72 insertions(+), 0 deletions(-)

diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt
index d2b62b7..5dc6387 100644
--- a/Documentation/networking/bonding.txt
+++ b/Documentation/networking/bonding.txt
@@ -765,6 +765,14 @@ xmit_hash_policy
 	does not exist, and the layer2 policy is the only policy.  The
 	layer2+3 value was added for bonding version 3.2.2.
 
+resend_igmp
+
+	Specifies the number of IGMP membership reports to be issued after
+	a failover event. One membership report is issued immediately after
+	the failover, subsequent packets are sent in each 200ms interval.
+
+	The valid range is 0 - 255; the default value is 1. This option
+	was added for bonding version 3.7.0.
 
 3. Configuring Bonding Devices
 ==============================
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 6554b47..19206ba 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -109,6 +109,7 @@ static char *arp_validate;
 static char *fail_over_mac;
 static int all_slaves_active = 0;
 static struct bond_params bonding_defaults;
+static int resend_igmp = BOND_DEFAULT_RESEND_IGMP;
 
 module_param(max_bonds, int, 0);
 MODULE_PARM_DESC(max_bonds, "Max number of bonded devices");
@@ -163,6 +164,8 @@ module_param(all_slaves_active, int, 0);
 MODULE_PARM_DESC(all_slaves_active, "Keep all frames received on an interface"
 				     "by setting active flag for all slaves.  "
 				     "0 for never (default), 1 for always.");
+module_param(resend_igmp, int, 0);
+MODULE_PARM_DESC(resend_igmp, "Number of IGMP membership reports to send on link failure");
 
 /*----------------------------- Global variables ----------------------------*/
 
@@ -905,6 +908,9 @@ static void bond_resend_igmp_join_requests(struct bonding *bond)
 		}
 	}
 
+	if (--bond->igmp_retrans > 0)
+		queue_delayed_work(bond->wq, &bond->mcast_work, HZ/5);
+
 	read_unlock(&bond->lock);
 }
 
@@ -1213,6 +1219,7 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
 	 * all were sent on curr_active_slave */
 	if ((USES_PRIMARY(bond->params.mode) && new_active) ||
 	    bond->params.mode == BOND_MODE_ROUNDROBIN) {
+		bond->igmp_retrans = bond->params.resend_igmp;
 		queue_delayed_work(bond->wq, &bond->mcast_work, 0);
 	}
 }
@@ -4929,6 +4936,13 @@ static int bond_check_params(struct bond_params *params)
 		all_slaves_active = 0;
 	}
 
+	if (resend_igmp < 0 || resend_igmp > 255) {
+		pr_warning("Warning: resend_igmp (%d) should be between "
+			   "0 and 255, resetting to %d\n",
+			   resend_igmp, BOND_DEFAULT_RESEND_IGMP);
+		resend_igmp = BOND_DEFAULT_RESEND_IGMP;
+	}
+
 	/* reset values for TLB/ALB */
 	if ((bond_mode == BOND_MODE_TLB) ||
 	    (bond_mode == BOND_MODE_ALB)) {
@@ -5101,6 +5115,7 @@ static int bond_check_params(struct bond_params *params)
 	params->fail_over_mac = fail_over_mac_value;
 	params->tx_queues = tx_queues;
 	params->all_slaves_active = all_slaves_active;
+	params->resend_igmp = resend_igmp;
 
 	if (primary) {
 		strncpy(params->primary, primary, IFNAMSIZ);
diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
index c311aed..01b4c3f 100644
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -1592,6 +1592,49 @@ out:
 static DEVICE_ATTR(all_slaves_active, S_IRUGO | S_IWUSR,
 		   bonding_show_slaves_active, bonding_store_slaves_active);
 
+/*
+ * Show and set the number of IGMP membership reports to send on link failure
+ */
+static ssize_t bonding_show_resend_igmp(struct device *d,
+					 struct device_attribute *attr,
+					 char *buf)
+{
+	struct bonding *bond = to_bond(d);
+
+	return sprintf(buf, "%d\n", bond->params.resend_igmp);
+}
+
+static ssize_t bonding_store_resend_igmp(struct device *d,
+					  struct device_attribute *attr,
+					  const char *buf, size_t count)
+{
+	int new_value, ret = count;
+	struct bonding *bond = to_bond(d);
+
+	if (sscanf(buf, "%d", &new_value) != 1) {
+		pr_err("%s: no resend_igmp value specified.\n",
+		       bond->dev->name);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (new_value < 0) {
+		pr_err("%s: Invalid resend_igmp value %d not in range 0-255; rejected.\n",
+		       bond->dev->name, new_value);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	pr_info("%s: Setting resend_igmp to %d.\n",
+		bond->dev->name, new_value);
+	bond->params.resend_igmp = new_value;
+out:
+	return ret;
+}
+
+static DEVICE_ATTR(resend_igmp, S_IRUGO | S_IWUSR,
+		   bonding_show_resend_igmp, bonding_store_resend_igmp);
+
 static struct attribute *per_bond_attrs[] = {
 	&dev_attr_slaves.attr,
 	&dev_attr_mode.attr,
@@ -1619,6 +1662,7 @@ static struct attribute *per_bond_attrs[] = {
 	&dev_attr_ad_partner_mac.attr,
 	&dev_attr_queue_id.attr,
 	&dev_attr_all_slaves_active.attr,
+	&dev_attr_resend_igmp.attr,
 	NULL,
 };
 
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index 308ed10..c15f213 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -136,6 +136,7 @@ struct bond_params {
 	__be32 arp_targets[BOND_MAX_ARP_TARGETS];
 	int tx_queues;
 	int all_slaves_active;
+	int resend_igmp;
 };
 
 struct bond_parm_tbl {
@@ -202,6 +203,7 @@ struct bonding {
 	s8	 send_grat_arp;
 	s8	 send_unsol_na;
 	s8	 setup_by_slave;
+	s8       igmp_retrans;
 #ifdef CONFIG_PROC_FS
 	struct   proc_dir_entry *proc_entry;
 	char     proc_file_name[IFNAMSIZ];
diff --git a/include/linux/if_bonding.h b/include/linux/if_bonding.h
index 2c79943..a17edda 100644
--- a/include/linux/if_bonding.h
+++ b/include/linux/if_bonding.h
@@ -84,6 +84,9 @@
 #define BOND_DEFAULT_MAX_BONDS  1   /* Default maximum number of devices to support */
 
 #define BOND_DEFAULT_TX_QUEUES 16   /* Default number of tx queues per device */
+
+#define BOND_DEFAULT_RESEND_IGMP	1 /* Default number of IGMP membership reports */
+
 /* hashing types */
 #define BOND_XMIT_POLICY_LAYER2		0 /* layer 2 (MAC only), default */
 #define BOND_XMIT_POLICY_LAYER34	1 /* layer 3+4 (IP ^ (TCP || UDP)) */
-- 
1.7.0.1


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH 1/3] bonding: rejoin multicast groups on VLANs
  2010-10-06  0:23         ` [PATCH 1/3] " Flavio Leitner
@ 2010-10-06  3:28           ` David Miller
  2010-10-06 12:12             ` Andy Gospodarek
  0 siblings, 1 reply; 17+ messages in thread
From: David Miller @ 2010-10-06  3:28 UTC (permalink / raw)
  To: fleitner; +Cc: netdev, bonding-devel, fubar, andy

From: Flavio Leitner <fleitner@redhat.com>
Date: Tue,  5 Oct 2010 21:23:57 -0300

> During a failover, the IGMP membership is sent to update
> the switch restoring the traffic, but it misses groups added
> to VLAN devices running on top of bonding devices.
> 
> This patch changes it to iterate over all VLAN devices
> on top of it sending IGMP memberships too.
> 
> Signed-off-by: Flavio Leitner <fleitner@redhat.com>

This seems to address Andy's feedback properly, and otherwise
looks good to me, so applied to net-next-2.6

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 2/3] bonding: fix to rejoin multicast groups immediately
  2010-10-06  0:23         ` [PATCH 2/3] bonding: fix to rejoin multicast groups immediately Flavio Leitner
@ 2010-10-06  3:28           ` David Miller
  0 siblings, 0 replies; 17+ messages in thread
From: David Miller @ 2010-10-06  3:28 UTC (permalink / raw)
  To: fleitner; +Cc: netdev, bonding-devel, fubar, andy

From: Flavio Leitner <fleitner@redhat.com>
Date: Tue,  5 Oct 2010 21:23:58 -0300

> The IGMP specs states that if the system receives a
> membership report, it shouldn't send another for the
> next minute. However, if a link failure happens right
> after that, the backup slave and the switch connected
> to this slave will not know about the multicast and
> the traffic will hang for about a minute.
> 
> This patch fixes it to rejoin multicast groups immediately
> after a failover restoring the multicast traffic.
> 
> Signed-off-by: Flavio Leitner <fleitner@redhat.com>

Much better commit message :-)

Applied to net-next-2.6

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 3/3] bonding: add retransmit membership reports tunable
  2010-10-06  0:23         ` [PATCH 3/3] bonding: add retransmit membership reports tunable Flavio Leitner
@ 2010-10-06  3:29           ` David Miller
  0 siblings, 0 replies; 17+ messages in thread
From: David Miller @ 2010-10-06  3:29 UTC (permalink / raw)
  To: fleitner; +Cc: netdev, bonding-devel, fubar, andy

From: Flavio Leitner <fleitner@redhat.com>
Date: Tue,  5 Oct 2010 21:23:59 -0300

> Allow sysadmins to configure the number of multicast
> membership report sent on a link failure event.
> 
> Signed-off-by: Flavio Leitner <fleitner@redhat.com>

Also applied to net-next-2.6, thanks.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 1/3] bonding: rejoin multicast groups on VLANs
  2010-10-06  3:28           ` David Miller
@ 2010-10-06 12:12             ` Andy Gospodarek
  0 siblings, 0 replies; 17+ messages in thread
From: Andy Gospodarek @ 2010-10-06 12:12 UTC (permalink / raw)
  To: David Miller
  Cc: fleitner@redhat.com, netdev@vger.kernel.org,
	bonding-devel@lists.sourceforge.net, fubar@us.ibm.com

On Oct 5, 2010, at 11:28 PM, David Miller <davem@davemloft.net> wrote:

> From: Flavio Leitner <fleitner@redhat.com>
> Date: Tue,  5 Oct 2010 21:23:57 -0300
>
>> During a failover, the IGMP membership is sent to update
>> the switch restoring the traffic, but it misses groups added
>> to VLAN devices running on top of bonding devices.
>>
>> This patch changes it to iterate over all VLAN devices
>> on top of it sending IGMP memberships too.
>>
>> Signed-off-by: Flavio Leitner <fleitner@redhat.com>
>
> This seems to address Andy's feedback properly, and otherwise
> looks good to me, so applied to net-next-2.6

Agreed. Thanks for making those changes, Flavio.

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2010-10-06 12:12 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-09-29  7:12 [PATCH] bonding: rejoin multicast groups on VLANs Flavio Leitner
2010-09-29 13:17 ` Flavio Leitner
2010-09-30 20:45   ` [PATCH v2] " Flavio Leitner
2010-10-04 13:24     ` Andy Gospodarek
2010-10-05 22:07       ` Flavio Leitner
2010-10-06  0:23         ` [PATCH 1/3] " Flavio Leitner
2010-10-06  3:28           ` David Miller
2010-10-06 12:12             ` Andy Gospodarek
2010-10-06  0:23         ` [PATCH 2/3] bonding: fix to rejoin multicast groups immediately Flavio Leitner
2010-10-06  3:28           ` David Miller
2010-10-06  0:23         ` [PATCH 3/3] bonding: add retransmit membership reports tunable Flavio Leitner
2010-10-06  3:29           ` David Miller
2010-09-30 20:46   ` [PATCH] " Flavio Leitner
2010-09-29 18:44 ` [PATCH] bonding: rejoin multicast groups on VLANs Andy Gospodarek
2010-09-29 19:35   ` Flavio Leitner
2010-09-29 19:54     ` Andy Gospodarek
2010-09-29 20:38       ` Flavio Leitner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).