netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [SET 2][PATCH 2/8][bonding] Propagating master's settings to slaves
@ 2003-08-08 14:44 Shmulik Hen
  2003-08-08 22:01 ` jamal
  0 siblings, 1 reply; 8+ messages in thread
From: Shmulik Hen @ 2003-08-08 14:44 UTC (permalink / raw)
  To: bonding-devel, netdev

2 - Change monitoring function use the new functionality.

diff -Nuarp linux-2.4.22-rc1/drivers/net/bonding/bond_main.c linux-2.4.22-rc1-devel/drivers/net/bonding/bond_main.c
--- linux-2.4.22-rc1/drivers/net/bonding/bond_main.c	Fri Aug  8 14:03:16 2003
+++ linux-2.4.22-rc1-devel/drivers/net/bonding/bond_main.c	Fri Aug  8 14:03:17 2003
@@ -2207,8 +2207,9 @@ out:
 static void bond_mii_monitor(struct net_device *master)
 {
 	bonding_t *bond = (struct bonding *) master->priv;
-	slave_t *slave, *bestslave, *oldcurrent;
+	slave_t *slave, *oldcurrent;
 	int slave_died = 0;
+	int do_failover = 0;
 
 	read_lock(&bond->lock);
 
@@ -2218,7 +2219,6 @@ static void bond_mii_monitor(struct net_
 	 * program could monitor the link itself if needed.
 	 */
 
-	bestslave = NULL;
 	slave = (slave_t *)bond;
 
 	read_lock(&bond->ptrlock);
@@ -2226,8 +2226,6 @@ static void bond_mii_monitor(struct net_
 	read_unlock(&bond->ptrlock);
 
 	while ((slave = slave->prev) != (slave_t *)bond) {
-		/* use updelay+1 to match an UP slave even when updelay is 0 */
-		int mindelay = updelay + 1;
 		struct net_device *dev = slave->dev;
 		int link_state;
 		u16 old_speed = slave->speed;
@@ -2238,14 +2236,7 @@ static void bond_mii_monitor(struct net_
 		switch (slave->link) {
 		case BOND_LINK_UP:	/* the link was up */
 			if (link_state == BMSR_LSTATUS) {
-				/* link stays up, tell that this one
-				   is immediately available */
-				if (IS_UP(dev) && (mindelay > -2)) {
-					/* -2 is the best case :
-					   this slave was already up */
-					mindelay = -2;
-					bestslave = slave;
-				}
+				/* link stays up, nothing more to do */
 				break;
 			}
 			else { /* link going down */
@@ -2285,6 +2276,7 @@ static void bond_mii_monitor(struct net_
 					    (bond_mode == BOND_MODE_8023AD)) {
 						bond_set_slave_inactive_flags(slave);
 					}
+
 					printk(KERN_INFO
 						"%s: link status definitely down "
 						"for interface %s, disabling it",
@@ -2301,12 +2293,10 @@ static void bond_mii_monitor(struct net_
 						bond_alb_handle_link_change(bond, slave, BOND_LINK_DOWN);
 					}
 
-					write_lock(&bond->ptrlock);
-					if (slave == bond->current_slave) {
-						/* find a new interface and be verbose */
-						reselect_active_interface(bond);
+					if (slave == oldcurrent) {
+						do_failover = 1;
 					}
-					write_unlock(&bond->ptrlock);
+
 					slave_died = 1;
 				} else {
 					slave->delay--;
@@ -2321,13 +2311,6 @@ static void bond_mii_monitor(struct net_
 					master->name,
 					(downdelay - slave->delay) * miimon,
 					dev->name);
-
-				if (IS_UP(dev) && (mindelay > -1)) {
-					/* -1 is a good case : this slave went
-					   down only for a short time */
-					mindelay = -1;
-					bestslave = slave;
-				}
 			}
 			break;
 		case BOND_LINK_DOWN:	/* the link was down */
@@ -2397,26 +2380,12 @@ static void bond_mii_monitor(struct net_
 						bond_alb_handle_link_change(bond, slave, BOND_LINK_UP);
 					}
 
-					write_lock(&bond->ptrlock);
-					if ( (bond->primary_slave != NULL)
-					  && (slave == bond->primary_slave) )
-						reselect_active_interface(bond); 
-					write_unlock(&bond->ptrlock);
-				}
-				else
+					if ((oldcurrent == NULL) ||
+					    (slave == bond->primary_slave)) {
+						do_failover = 1;
+					}
+				} else {
 					slave->delay--;
-
-				/* we'll also look for the mostly eligible slave */
-				if (bond->primary_slave == NULL)  {
-				    if (IS_UP(dev) && (slave->delay < mindelay)) {
-					mindelay = slave->delay;
-					bestslave = slave;
-				    } 
-				} else if ( (IS_UP(bond->primary_slave->dev))  || 
-				          ( (!IS_UP(bond->primary_slave->dev))  && 
-				          (IS_UP(dev) && (slave->delay < mindelay)) ) ) {
-					mindelay = slave->delay;
-					bestslave = slave;
 				}
 			}
 			break;
@@ -2435,26 +2404,17 @@ static void bond_mii_monitor(struct net_
 
 	} /* end of while */
 
-	/* 
-	 * if there's no active interface and we discovered that one
-	 * of the slaves could be activated earlier, so we do it.
-	 */
-	read_lock(&bond->ptrlock);
-	oldcurrent = bond->current_slave;
-	read_unlock(&bond->ptrlock);
+	if (do_failover) {
+		write_lock(&bond->ptrlock);
 
-	/* no active interface at the moment or need to bring up the primary */
-	if (oldcurrent == NULL)  { /* no active interface at the moment */
-		if (bestslave != NULL) { /* last chance to find one ? */
-			write_lock(&bond->ptrlock);
-			change_active_interface(bond, bestslave);
-			write_unlock(&bond->ptrlock);
-		} else if (slave_died) {
-			/* print this message only once a slave has just died */
+		reselect_active_interface(bond);
+		if (oldcurrent && !bond->current_slave) {
 			printk(KERN_INFO
 				"%s: now running without any active interface !\n",
 				master->name);
 		}
+
+		write_unlock(&bond->ptrlock);
 	}
 
 	read_unlock(&bond->lock);
@@ -2472,9 +2432,10 @@ static void bond_mii_monitor(struct net_
 static void loadbalance_arp_monitor(struct net_device *master)
 {
 	bonding_t *bond;
-	slave_t *slave;
+	slave_t *slave, *oldcurrent;
 	int the_delta_in_ticks =  arp_interval * HZ / 1000;
 	int next_timer = jiffies + (arp_interval * HZ / 1000);
+	int do_failover = 0;
 
 	bond = (struct bonding *) master->priv; 
 	if (master->priv == NULL) {
@@ -2498,6 +2459,10 @@ static void loadbalance_arp_monitor(stru
 
 	read_lock(&bond->lock);
 
+	read_lock(&bond->ptrlock);
+	oldcurrent = bond->current_slave;
+	read_unlock(&bond->ptrlock);
+
 	/* see if any of the previous devices are up now (i.e. they have
 	 * xmt and rcv traffic). the current_slave does not come into
 	 * the picture unless it is null. also, slave->jiffies is not needed
@@ -2524,21 +2489,19 @@ static void loadbalance_arp_monitor(stru
 				 * current_slave being null after enslaving
 				 * is closed.
 				 */
-				write_lock(&bond->ptrlock);
-				if (bond->current_slave == NULL) {
+				if (oldcurrent == NULL) {
 					printk(KERN_INFO
 						"%s: link status definitely up "
 						"for interface %s, ",
 						master->name,
 						slave->dev->name);
-					reselect_active_interface(bond); 
+					do_failover = 1;
 				} else {
 					printk(KERN_INFO
 						"%s: interface %s is now up\n",
 						master->name,
 						slave->dev->name);
 				}
-				write_unlock(&bond->ptrlock);
 			} 
 		} else {
 			/* slave->link == BOND_LINK_UP */
@@ -2561,11 +2524,9 @@ static void loadbalance_arp_monitor(stru
 				       master->name,
 				       slave->dev->name);
 
-				write_lock(&bond->ptrlock);
-				if (slave == bond->current_slave) {
-					reselect_active_interface(bond);
+				if (slave == oldcurrent) {
+					do_failover = 1;
 				}
-				write_unlock(&bond->ptrlock);
 			}
 		} 
 
@@ -2579,6 +2540,19 @@ static void loadbalance_arp_monitor(stru
 		if (IS_UP(slave->dev)) {
 			arp_send_all(slave);
 		}
+	}
+
+	if (do_failover) {
+		write_lock(&bond->ptrlock);
+
+		reselect_active_interface(bond);
+		if (oldcurrent && !bond->current_slave) {
+			printk(KERN_INFO
+				"%s: now running without any active interface !\n",
+				master->name);
+		}
+
+		write_unlock(&bond->ptrlock);
 	}
 
 	read_unlock(&bond->lock);

-- 
| Shmulik Hen   Advanced Network Services  |
| Israel Design Center, Jerusalem          |
| LAN Access Division, Platform Networking |
| Intel Communications Group, Intel corp.  |

^ permalink raw reply	[flat|nested] 8+ messages in thread
* RE: [SET 2][PATCH 2/8][bonding] Propagating master's settings to slaves
@ 2003-08-09 10:29 Hen, Shmulik
  2003-08-11  2:51 ` jamal
  0 siblings, 1 reply; 8+ messages in thread
From: Hen, Shmulik @ 2003-08-09 10:29 UTC (permalink / raw)
  To: hadi; +Cc: bonding-devel, netdev

> -----Original Message-----
> From: jamal [mailto:hadi@cyberus.ca]
> Sent: Saturday, August 09, 2003 1:01 AM
> To: Hen, Shmulik
> Cc: bonding-devel@lists.sourceforge.net; netdev@oss.sgi.com
> Subject: Re: [SET 2][PATCH 2/8][bonding] Propagating master's settings
> to slaves
> 
> Shmulik,
> 
> Some of this bonding stuff is pretty scary. Lotsa policies in the 
> kernel and communication seems to be centred around /proc.
> Shouldnt policies on failover be really driven from user space?
> Also shouldnt communication be using something like netlink?
> 
> cheers,
> jamal
> 
> On Fri, 2003-08-08 at 10:44, Shmulik Hen wrote:
> > 2 - Change monitoring function use the new functionality.
> > 
> 

Not sure I fully understood the concerns above, but I'll try
to explain what the change was all about.

By monitoring, I meant the 3 timer function running in bonding
to monitor link changes and act once a link fail/recovery is
detected. The old code used to do all the activity related to
changing the current active slave separately in each timer
function and it seemed redundant since it was basically the
same thing repeated 3 times. Instead, we thought it would be
best if we put that into 3 new functions - reselect_active,
find_best_slave and change_active that does all the actual stuff
of swapping an old current with the new one.

The change we did in /proc was to reduce the amount of data
extarcted each time the proc entry is polled. Instead of dumping
all the data of all the bond devices that exist, each bond returns
just data that is relevant to itself.

In the lonf term, the drive is to move any *smart* code done in
the config application into the driver itself and be left with
the smallest, most compact application as possible. This is the
trend we've seen in the VLAN config app, and the bridge module.
All the "brain" is in the kernel module and very little should be
done in the application.


	Shmulik.

^ permalink raw reply	[flat|nested] 8+ messages in thread
* Re: [Bonding-devel] Re: [SET 2][PATCH 2/8][bonding] Propagating master's settings toslaves
@ 2003-08-11 21:41 Jay Vosburgh
  2003-08-11 23:15 ` [SET 2][PATCH 2/8][bonding] Propagating master's settings to slaves Shmulik Hen
  0 siblings, 1 reply; 8+ messages in thread
From: Jay Vosburgh @ 2003-08-11 21:41 UTC (permalink / raw)
  To: Jeff Garzik; +Cc: shmulik.hen, hadi, Laurent DENIEL, bonding-devel, netdev

>The answer is, like life, it's a balance.
[...]
>This is why I push for a "bonding-utils" package from Jay.... because of 
>the general rule above:  put it into userspace, where possible.

	Hmm.  My impression from our prior discussions was that your
interest in moving ifenslave out of the kernel source and into its own
package was more of a source code management concern rather than
moving functionality from the kernel into user space (because
ifenslave is in user space to begin with).

	Anyway, for most of the core bonding failover logic, I don't
see how a user space daemon implementation can perform equivalently to
a kernel-only implementation.  I could be wrong (I haven't done any
testing) but for the core "eth0 is dead, enable eth1" type stuff, it
seems to me that in-kernel beats "user space yakking with kernel" for
reliability and speed, particularly on heavily loaded systems.

	Now, that said, I can see a use for a user space monitoring /
control program, for the "strategic" problems (as opposed to the
"tactical" problems, like the previous paragraph).  If we want to,
e.g., monitor bandwidth usage and add or remove links from the
aggregation, that is (a) not as time critical, and (b) somewhat
fuzzier in definition.  Such a user space program could also interface
with various system management or HA thingies and report status for
its activities as well as the activities that bonding performs
independent of it.

	One thought I've had (which dovetails somewhat with an earlier
comment from Laurent) is a tcpdump/bpf-style "policy engine" blob in
the kernel, which is programmed from user space with enough brains to
handle the "tactical" level problems (the "strategic" problems might
be more than such a blob could handle, and if its easy enough to yak
with user space for those problems, it may not be necessary).  I
haven't done much more than think about this, though; it may very well
be overkill for the basic stuff.

	-J

---
	-Jay Vosburgh, IBM Linux Technology Center, fubar@us.ibm.com

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2003-08-12  2:36 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2003-08-08 14:44 [SET 2][PATCH 2/8][bonding] Propagating master's settings to slaves Shmulik Hen
2003-08-08 22:01 ` jamal
  -- strict thread matches above, loose matches on Subject: below --
2003-08-09 10:29 Hen, Shmulik
2003-08-11  2:51 ` jamal
2003-08-11 10:08   ` Shmulik Hen
2003-08-11 13:47     ` jamal
2003-08-11 21:41 [Bonding-devel] Re: [SET 2][PATCH 2/8][bonding] Propagating master's settings toslaves Jay Vosburgh
2003-08-11 23:15 ` [SET 2][PATCH 2/8][bonding] Propagating master's settings to slaves Shmulik Hen
2003-08-12  2:36   ` jamal

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).