netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC] wrr (weighted round-robin) bonding
@ 2006-10-16 18:21 Dawid Ciezarkiewicz
  2006-10-16 18:27 ` Dawid Ciezarkiewicz
  0 siblings, 1 reply; 12+ messages in thread
From: Dawid Ciezarkiewicz @ 2006-10-16 18:21 UTC (permalink / raw)
  To: netdev

This patch is little thinner then the previous one.


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC] wrr (weighted round-robin) bonding
  2006-10-16 18:21 [RFC] wrr (weighted round-robin) bonding Dawid Ciezarkiewicz
@ 2006-10-16 18:27 ` Dawid Ciezarkiewicz
  2006-10-16 18:50   ` Jay Vosburgh
  0 siblings, 1 reply; 12+ messages in thread
From: Dawid Ciezarkiewicz @ 2006-10-16 18:27 UTC (permalink / raw)
  To: netdev

On Monday, 16 October 2006 20:21, Dawid Ciezarkiewicz wrote:
> This patch is little thinner then the previous one.

I'm sorry for that. I've just ... nevermind. Here goes the patch.

Should I post patch for ifenslave here, too?



diff -Nur linux-2.6.17.orig/Documentation/networking/bonding.txt linux-2.6.17/Documentation/networking/bonding.txt
--- linux-2.6.17.orig/Documentation/networking/bonding.txt	2006-06-18 03:49:35.000000000 +0200
+++ linux-2.6.17/Documentation/networking/bonding.txt	2006-07-28 15:47:55.000000000 +0200
@@ -398,6 +398,19 @@
 		swapped with the new curr_active_slave that was
 		chosen.
 
+	weighted-rr or 7
+
+		Weighted round-robin bonding. In this mode bonding
+		interface will use weights assigned to it's slaves.
+
+		Each slave can have weight assigned via ioctl (ifenslave).
+		These values will be used at the start of each "cycle".
+		Each slave will have token counter restored to it's weight.
+		Then using round-robin mechanism those tokens are "used"
+		to pay for emitted frames. When all token counters are
+		zeroed - new "cycle" begins.
+		
+
 primary
 
 	A string (eth0, eth2, etc) specifying which slave is the
diff -Nur linux-2.6.17.orig/drivers/net/bonding/bond_main.c linux-2.6.17/drivers/net/bonding/bond_main.c
--- linux-2.6.17.orig/drivers/net/bonding/bond_main.c	2006-06-18 03:49:35.000000000 +0200
+++ linux-2.6.17/drivers/net/bonding/bond_main.c	2006-07-28 15:31:44.000000000 +0200
@@ -115,7 +115,7 @@
 MODULE_PARM_DESC(mode, "Mode of operation : 0 for balance-rr, "
 		       "1 for active-backup, 2 for balance-xor, "
 		       "3 for broadcast, 4 for 802.3ad, 5 for balance-tlb, "
-		       "6 for balance-alb");
+		       "6 for balance-alb, 7 for weighted-rr");
 module_param(primary, charp, 0);
 MODULE_PARM_DESC(primary, "Primary network device to use");
 module_param(lacp_rate, charp, 0);
@@ -162,6 +162,7 @@
 {	"802.3ad",		BOND_MODE_8023AD},
 {	"balance-tlb",		BOND_MODE_TLB},
 {	"balance-alb",		BOND_MODE_ALB},
+{	"weighted-rr",		BOND_MODE_WEIGHTED_RR},
 {	NULL,			-1},
 };
 
@@ -194,6 +195,8 @@
 		return "transmit load balancing";
 	case BOND_MODE_ALB:
 		return "adaptive load balancing";
+	case BOND_MODE_WEIGHTED_RR:
+		return "weighted round robin (weighted-rr)";
 	default:
 		return "unknown";
 	}
@@ -1198,6 +1201,24 @@
 	return 0;
 }
 
+int bond_set_weight(struct net_device *bond_dev, struct net_device *slave_dev,
+		u16 weight)
+{
+	struct slave* slave;
+	slave = bond_get_slave_by_dev(bond_dev->priv, slave_dev);
+	if (!slave) {
+		return -EINVAL;
+	}
+
+	slave->weight = weight;
+
+	if (weight) {
+		slave->link = BOND_LINK_UP;
+		slave->state = BOND_STATE_ACTIVE;
+	}
+	return 0;
+}
+
 #define BOND_INTERSECT_FEATURES \
 	(NETIF_F_SG|NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM|\
 	NETIF_F_TSO|NETIF_F_UFO)
@@ -1336,6 +1352,9 @@
 	 */
 	new_slave->original_flags = slave_dev->flags;
 
+	/* slave default weight = 1 */
+	new_slave->weight = 1;
+
 	/*
 	 * Save slave's original ("permanent") mac address for modes
 	 * that need it, and for restoring it upon release, and then
@@ -3601,7 +3620,10 @@
 	}
 
 	down_write(&(bonding_rwsem));
-	slave_dev = dev_get_by_name(ifr->ifr_slave);
+	if (cmd != SIOCBONDSETWEIGHT)
+		slave_dev = dev_get_by_name(ifr->ifr_slave);
+	else
+		slave_dev = dev_get_by_name(ifr->ifr_weight_slave);
 
 	dprintk("slave_dev=%p: \n", slave_dev);
 
@@ -3626,6 +3648,9 @@
 		case SIOCBONDCHANGEACTIVE:
 			res = bond_ioctl_change_active(bond_dev, slave_dev);
 			break;
+		case SIOCBONDSETWEIGHT:
+			res = bond_set_weight(bond_dev, slave_dev, ifr->ifr_weight_weight);
+			break;
 		default:
 			res = -EOPNOTSUPP;
 		}
@@ -3881,6 +3906,67 @@
 	return 0;
 }
 
+static int bond_xmit_weighted_rr(struct sk_buff *skb, struct net_device *bond_dev)
+{
+	struct bonding *bond = bond_dev->priv;
+	struct slave *slave, *start_at;
+	int i;
+	int res = 1;
+	int were_weight_tokens_recharged = 0;
+
+	read_lock(&bond->lock);
+
+	if (!BOND_IS_OK(bond)) {
+		goto out;
+	}
+
+	read_lock(&bond->curr_slave_lock);
+	slave = start_at = bond->curr_active_slave;
+	read_unlock(&bond->curr_slave_lock);
+
+	if (!slave) {
+		goto out;
+	}
+
+try_send:
+	bond_for_each_slave_from(bond, slave, i, start_at) {
+		if (IS_UP(slave->dev) &&
+		    (slave->weight_tokens > 0) &&
+			(slave->link == BOND_LINK_UP) &&
+		    (slave->state == BOND_STATE_ACTIVE)) {
+			
+			res = bond_dev_queue_xmit(bond, skb, slave->dev);
+			(slave->weight_tokens)--;
+			write_lock(&bond->curr_slave_lock);
+			bond->curr_active_slave = slave->next;
+			write_unlock(&bond->curr_slave_lock);
+
+			goto out;
+		}
+	}
+	
+	if (were_weight_tokens_recharged == 0) {
+		read_lock(&bond->curr_slave_lock);
+		slave = start_at = bond->curr_active_slave;
+		read_unlock(&bond->curr_slave_lock);
+
+		bond_for_each_slave_from(bond, slave, i, start_at) {
+			slave->weight_tokens = slave->weight;
+		}
+
+		were_weight_tokens_recharged = 1;
+		goto try_send;
+	}
+
+out:
+	if (res) {
+		/* no suitable interface, frame not sent */
+		dev_kfree_skb(skb);
+	}
+	read_unlock(&bond->lock);
+	return 0;
+}
+
 static void bond_activebackup_xmit_copy(struct sk_buff *skb,
                                         struct bonding *bond,
                                         struct slave *slave)
@@ -4088,6 +4174,9 @@
 	case BOND_MODE_ROUNDROBIN:
 		bond_dev->hard_start_xmit = bond_xmit_roundrobin;
 		break;
+	case BOND_MODE_WEIGHTED_RR:
+		bond_dev->hard_start_xmit = bond_xmit_weighted_rr;
+		break;
 	case BOND_MODE_ACTIVEBACKUP:
 		bond_dev->hard_start_xmit = bond_xmit_activebackup;
 		break;
diff -Nur linux-2.6.17.orig/drivers/net/bonding/bond_sysfs.c linux-2.6.17/drivers/net/bonding/bond_sysfs.c
--- linux-2.6.17.orig/drivers/net/bonding/bond_sysfs.c	2006-06-18 03:49:35.000000000 +0200
+++ linux-2.6.17/drivers/net/bonding/bond_sysfs.c	2006-07-28 15:25:11.000000000 +0200
@@ -241,10 +241,14 @@
 			res += sprintf(buf + res, "++more++");
 			break;
 		}
-		res += sprintf(buf + res, "%s ", slave->dev->name);
+		if (bond->params.mode == BOND_MODE_WEIGHTED_RR) {
+			res += sprintf(buf + res, "%s %d\n", slave->dev->name,
+					slave->weight);
+		} else {
+			res += sprintf(buf + res, "%s\n", slave->dev->name);
+		}
 	}
 	read_unlock_bh(&bond->lock);
-	res += sprintf(buf + res, "\n");
 	res++;
 	return res;
 }
diff -Nur linux-2.6.17.orig/drivers/net/bonding/bonding.h linux-2.6.17/drivers/net/bonding/bonding.h
--- linux-2.6.17.orig/drivers/net/bonding/bonding.h	2006-06-18 03:49:35.000000000 +0200
+++ linux-2.6.17/drivers/net/bonding/bonding.h	2006-07-28 15:25:11.000000000 +0200
@@ -150,6 +150,8 @@
 	struct slave *next;
 	struct slave *prev;
 	s16    delay;
+	u16    weight_tokens;
+	u16    weight;
 	u32    jiffies;
 	s8     link;    /* one of BOND_LINK_XXXX */
 	s8     state;   /* one of BOND_STATE_XXXX */
diff -Nur linux-2.6.17.orig/include/linux/if.h linux-2.6.17/include/linux/if.h
--- linux-2.6.17.orig/include/linux/if.h	2006-06-18 03:49:35.000000000 +0200
+++ linux-2.6.17/include/linux/if.h	2006-07-28 15:25:11.000000000 +0200
@@ -172,6 +172,11 @@
 		char	ifru_newname[IFNAMSIZ];
 		void __user *	ifru_data;
 		struct	if_settings ifru_settings;
+		struct {
+			__u16 weight;
+			char slave[IFNAMSIZ];	
+		} ifru_weight;
+
 	} ifr_ifru;
 };
 
@@ -180,6 +185,8 @@
 #define	ifr_addr	ifr_ifru.ifru_addr	/* address		*/
 #define	ifr_dstaddr	ifr_ifru.ifru_dstaddr	/* other end of p-p lnk	*/
 #define	ifr_broadaddr	ifr_ifru.ifru_broadaddr	/* broadcast address	*/
+#define	ifr_weight_weight ifr_ifru.ifru_weight.weight /* bonding weight*/
+#define	ifr_weight_slave  ifr_ifru.ifru_weight.slave  /* bonding weight slave */
 #define	ifr_netmask	ifr_ifru.ifru_netmask	/* interface net mask	*/
 #define	ifr_flags	ifr_ifru.ifru_flags	/* flags		*/
 #define	ifr_metric	ifr_ifru.ifru_ivalue	/* metric		*/
diff -Nur linux-2.6.17.orig/include/linux/if_bonding.h linux-2.6.17/include/linux/if_bonding.h
--- linux-2.6.17.orig/include/linux/if_bonding.h	2006-06-18 03:49:35.000000000 +0200
+++ linux-2.6.17/include/linux/if_bonding.h	2006-07-28 15:25:11.000000000 +0200
@@ -70,6 +70,7 @@
 #define BOND_MODE_8023AD        4
 #define BOND_MODE_TLB           5
 #define BOND_MODE_ALB		6 /* TLB + RLB (receive load balancing) */
+#define BOND_MODE_WEIGHTED_RR   7
 
 /* each slave's link has 4 states */
 #define BOND_LINK_UP    0           /* link is up and running */
diff -Nur linux-2.6.17.orig/include/linux/sockios.h linux-2.6.17/include/linux/sockios.h
--- linux-2.6.17.orig/include/linux/sockios.h	2006-06-18 03:49:35.000000000 +0200
+++ linux-2.6.17/include/linux/sockios.h	2006-07-28 15:25:11.000000000 +0200
@@ -115,6 +115,7 @@
 #define SIOCBONDSLAVEINFOQUERY 0x8993   /* rtn info about slave state   */
 #define SIOCBONDINFOQUERY      0x8994	/* rtn info about bond state    */
 #define SIOCBONDCHANGEACTIVE   0x8995   /* update to a new active slave */
+#define SIOCBONDSETWEIGHT      0x899e   /* update weight */
 			
 /* bridge calls */
 #define SIOCBRADDBR     0x89a0		/* create new bridge device     */
diff -Nur linux-2.6.17.orig/net/core/dev.c linux-2.6.17/net/core/dev.c
--- linux-2.6.17.orig/net/core/dev.c	2006-06-18 03:49:35.000000000 +0200
+++ linux-2.6.17/net/core/dev.c	2006-07-28 15:25:11.000000000 +0200
@@ -2495,6 +2495,7 @@
 			    cmd <= SIOCDEVPRIVATE + 15) ||
 			    cmd == SIOCBONDENSLAVE ||
 			    cmd == SIOCBONDRELEASE ||
+			    cmd == SIOCBONDSETWEIGHT ||
 			    cmd == SIOCBONDSETHWADDR ||
 			    cmd == SIOCBONDSLAVEINFOQUERY ||
 			    cmd == SIOCBONDINFOQUERY ||
@@ -2656,6 +2657,7 @@
 		case SIOCBONDRELEASE:
 		case SIOCBONDSETHWADDR:
 		case SIOCBONDCHANGEACTIVE:
+		case SIOCBONDSETWEIGHT:
 		case SIOCBRADDIF:
 		case SIOCBRDELIF:
 			if (!capable(CAP_NET_ADMIN))


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC] wrr (weighted round-robin) bonding
  2006-10-16 18:27 ` Dawid Ciezarkiewicz
@ 2006-10-16 18:50   ` Jay Vosburgh
  2006-10-16 19:07     ` Dawid Ciezarkiewicz
  0 siblings, 1 reply; 12+ messages in thread
From: Jay Vosburgh @ 2006-10-16 18:50 UTC (permalink / raw)
  To: Dawid Ciezarkiewicz; +Cc: netdev


Dawid Ciezarkiewicz <dpc@asn.pl> wrote:
[...]
>+	weighted-rr or 7
>+
>+		Weighted round-robin bonding. In this mode bonding
>+		interface will use weights assigned to it's slaves.
>+
>+		Each slave can have weight assigned via ioctl (ifenslave).
>+		These values will be used at the start of each "cycle".
>+		Each slave will have token counter restored to it's weight.
>+		Then using round-robin mechanism those tokens are "used"
>+		to pay for emitted frames. When all token counters are
>+		zeroed - new "cycle" begins.

	Before getting into the technical bits of the patch, what's the
reason for wanting to do this, and why is this rather complex manual
weight assignment better than an automatic system based on, e.g., link
speed of the slaves?

	-J

---
	-Jay Vosburgh, IBM Linux Technology Center, fubar@us.ibm.com

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC] wrr (weighted round-robin) bonding
  2006-10-16 18:50   ` Jay Vosburgh
@ 2006-10-16 19:07     ` Dawid Ciezarkiewicz
  2006-10-16 21:30       ` Andy Gospodarek
  0 siblings, 1 reply; 12+ messages in thread
From: Dawid Ciezarkiewicz @ 2006-10-16 19:07 UTC (permalink / raw)
  To: Jay Vosburgh; +Cc: netdev

On Monday, 16 October 2006 20:50, you wrote:
> 
> Dawid Ciezarkiewicz <dpc@asn.pl> wrote:
> [...]
> >+	weighted-rr or 7
> >+
> >+		Weighted round-robin bonding. In this mode bonding
> >+		interface will use weights assigned to it's slaves.
> >+
> >+		Each slave can have weight assigned via ioctl (ifenslave).
> >+		These values will be used at the start of each "cycle".
> >+		Each slave will have token counter restored to it's weight.
> >+		Then using round-robin mechanism those tokens are "used"
> >+		to pay for emitted frames. When all token counters are
> >+		zeroed - new "cycle" begins.
> 
> 	Before getting into the technical bits of the patch, what's the
> reason for wanting to do this, and why is this rather complex manual
> weight assignment better than an automatic system based on, e.g., link
> speed of the slaves?

In short:
It was designed as a solution for wireless links bonding - where link quality 
can change rather quickly in time. By using wrr bonding, userspace tools can 
measure current bandwidth and change bonding slave weights in realtime.

It was written for Lintrack, and you can read about it's usage here:
http://lintrack.org/index.php/about/advantage

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC] wrr (weighted round-robin) bonding
  2006-10-16 19:07     ` Dawid Ciezarkiewicz
@ 2006-10-16 21:30       ` Andy Gospodarek
  2006-10-17  8:16         ` Dawid Ciezarkiewicz
  0 siblings, 1 reply; 12+ messages in thread
From: Andy Gospodarek @ 2006-10-16 21:30 UTC (permalink / raw)
  To: Dawid Ciezarkiewicz; +Cc: Jay Vosburgh, netdev

On Mon, Oct 16, 2006 at 09:07:57PM +0200, Dawid Ciezarkiewicz wrote:
> > 
> > 	Before getting into the technical bits of the patch, what's the
> > reason for wanting to do this, and why is this rather complex manual
> > weight assignment better than an automatic system based on, e.g., link
> > speed of the slaves?
> 
> In short:
> It was designed as a solution for wireless links bonding - where link quality 
> can change rather quickly in time. By using wrr bonding, userspace tools can 
> measure current bandwidth and change bonding slave weights in realtime.

Since this is so similar to mode 0, it would seem there would be a way
to extend it rather than creating yet another mode that is so similar.
What would be the reason not to enhance that mode?


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC] wrr (weighted round-robin) bonding
  2006-10-16 21:30       ` Andy Gospodarek
@ 2006-10-17  8:16         ` Dawid Ciezarkiewicz
  2006-10-19 19:04           ` Andy Gospodarek
  0 siblings, 1 reply; 12+ messages in thread
From: Dawid Ciezarkiewicz @ 2006-10-17  8:16 UTC (permalink / raw)
  To: Andy Gospodarek; +Cc: Jay Vosburgh, netdev

On Monday, 16 October 2006 23:30, Andy Gospodarek wrote:
> On Mon, Oct 16, 2006 at 09:07:57PM +0200, Dawid Ciezarkiewicz wrote:
> > > 
> > > 	Before getting into the technical bits of the patch, what's the
> > > reason for wanting to do this, and why is this rather complex manual
> > > weight assignment better than an automatic system based on, e.g., link
> > > speed of the slaves?
> > 
> > In short:
> > It was designed as a solution for wireless links bonding - where link 
quality 
> > can change rather quickly in time. By using wrr bonding, userspace tools 
can 
> > measure current bandwidth and change bonding slave weights in realtime.
> 
> Since this is so similar to mode 0, it would seem there would be a way
> to extend it rather than creating yet another mode that is so similar.
> What would be the reason not to enhance that mode?

In fact - as default weight is being set to 1, without changing it wrr bonding 
mode works like plain round-robin one. But it have little more overhead 
(recharging tokens), and code is a bit more complicated. I was not sure if 
some tools could assume that in mode 0 all interfaces work with same weights 
and because of that behave strange with this patch in use. 

It was written as a solution for some problem, and I'm still not sure if such 
change will always be patch to linux kernel or may some day go into mainline. 
For compatibility I've decided to have those modes separated.

Because of that I haven't replaced mode 0. If this patch will be considered 
useful, and my concerns are not a problem - I'd like to replace 0 mode if 
possible.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC] wrr (weighted round-robin) bonding
  2006-10-17  8:16         ` Dawid Ciezarkiewicz
@ 2006-10-19 19:04           ` Andy Gospodarek
  2006-10-20 19:41             ` Dawid Ciezarkiewicz
  0 siblings, 1 reply; 12+ messages in thread
From: Andy Gospodarek @ 2006-10-19 19:04 UTC (permalink / raw)
  To: Dawid Ciezarkiewicz; +Cc: Andy Gospodarek, Jay Vosburgh, netdev

On Tue, Oct 17, 2006 at 10:16:21AM +0200, Dawid Ciezarkiewicz wrote:
> 
> In fact - as default weight is being set to 1, without changing it wrr bonding 
> mode works like plain round-robin one. But it have little more overhead 
> (recharging tokens), and code is a bit more complicated. I was not sure if 
> some tools could assume that in mode 0 all interfaces work with same weights 
> and because of that behave strange with this patch in use. 
> 
> It was written as a solution for some problem, and I'm still not sure if such 
> change will always be patch to linux kernel or may some day go into mainline. 
> For compatibility I've decided to have those modes separated.
> 
> Because of that I haven't replaced mode 0. If this patch will be considered 
> useful, and my concerns are not a problem - I'd like to replace 0 mode if 
> possible.
> -

It would seem to me that extending an existing mode would be more
desirable than adding yet another mode to worry about.  I don't even
like the fact that there are as many as there are, but I understand why
they are there.  

I recently extended rr mode to allow an additional parameter called that
rr_repeat that would allow someone to send more than a single frame out
of each device before moving to the next one.  It seemed this could be
helpful when dealing with switches that constantly re-learned source MAC
addresses.  Network performance would suffer whenever rr_repeat was >1,
but box performance might be better if there weren't so many locks
taken.

This patch is pretty bad (in-fact even the math could be done better to
avoid the expensive modulo), but since I did did it as a proof of
concept I wasn't too worried about it at the time.  The functionality
might be interesting to add to your weighted rr concept.  It's also
against an older kernel, but it should apply to an upstream one with
minimal, if any, porting.


--- linux/drivers/net/bonding/bond_main.c.orig	2006-10-11 10:41:07.611562000 -0400
+++ linux/drivers/net/bonding/bond_main.c	2006-10-11 13:40:54.767425000 -0400
@@ -543,6 +543,7 @@
 /* monitor all links that often (in milliseconds). <=0 disables monitoring */
 #define BOND_LINK_MON_INTERV	0
 #define BOND_LINK_ARP_INTERV	0
+#define BOND_RR_REPEAT		1
 
 static int max_bonds	= BOND_DEFAULT_MAX_BONDS;
 static int miimon	= BOND_LINK_MON_INTERV;
@@ -555,6 +556,8 @@ static char *lacp_rate	= NULL;
 static char *xmit_hash_policy = NULL;
 static int arp_interval = BOND_LINK_ARP_INTERV;
 static char *arp_ip_target[BOND_MAX_ARP_TARGETS] = { NULL, };
+static int rr_repeat   = BOND_RR_REPEAT;
+static int rr_repeat_count = 0;
 
 MODULE_PARM(max_bonds, "i");
 MODULE_PARM_DESC(max_bonds, "Max number of bonded devices");
@@ -578,6 +581,8 @@ MODULE_PARM(arp_interval, "i");
 MODULE_PARM_DESC(arp_interval, "arp interval in milliseconds");
 MODULE_PARM(arp_ip_target, "1-" __MODULE_STRING(BOND_MAX_ARP_TARGETS) "s");
 MODULE_PARM_DESC(arp_ip_target, "arp targets in n.n.n.n form");
+MODULE_PARM(rr_repeat, "i");
+MODULE_PARM_DESC(rr_repeat, "number of frames to send on round-robin bonds before switching interfaces");
 
 /*----------------------------- Global variables ----------------------------*/
 
@@ -4390,21 +4395,27 @@ static int bond_xmit_roundrobin(struct s
 		goto out;
 	}
 
-	bond_for_each_slave_from(bond, slave, i, start_at) {
-		if (IS_UP(slave->dev) &&
-		    (slave->link == BOND_LINK_UP) &&
-		    (slave->state == BOND_STATE_ACTIVE)) {
-			res = bond_dev_queue_xmit(bond, skb, slave->dev);
+	/* just xmit if we haven't hit the repeat val */
+	if (!(++rr_repeat_count % rr_repeat)) {
 
-			write_lock(&bond->curr_slave_lock);
-			bond->curr_active_slave = slave->next;
-			write_unlock(&bond->curr_slave_lock);
+		rr_repeat_count = 0;
+		bond_for_each_slave_from(bond, slave, i, start_at) {
+			if (IS_UP(slave->dev) &&
+			    (slave->link == BOND_LINK_UP) &&
+			    (slave->state == BOND_STATE_ACTIVE)) {
+				res = bond_dev_queue_xmit(bond, skb, slave->dev);
 
-			break;
+				write_lock(&bond->curr_slave_lock);
+				bond->curr_active_slave = slave->next;
+				write_unlock(&bond->curr_slave_lock);
+
+				break;
+			}
 		}
+	} else {
+		res = bond_dev_queue_xmit(bond, skb, slave->dev);
 	}
 
-
 out:
 	if (res) {
 		/* no suitable interface, frame not sent */

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC] wrr (weighted round-robin) bonding
  2006-10-19 19:04           ` Andy Gospodarek
@ 2006-10-20 19:41             ` Dawid Ciezarkiewicz
  2006-10-20 19:53               ` Jay Vosburgh
  0 siblings, 1 reply; 12+ messages in thread
From: Dawid Ciezarkiewicz @ 2006-10-20 19:41 UTC (permalink / raw)
  To: Andy Gospodarek; +Cc: netdev

On Thursday, 19 October 2006 21:04, Andy Gospodarek wrote:
> It would seem to me that extending an existing mode would be more
> desirable than adding yet another mode to worry about.  I don't even
> like the fact that there are as many as there are, but I understand why
> they are there.  

Ack. I will probably update wrr bonding patch to replace rr mode.

> I recently extended rr mode to allow an additional parameter called that
> rr_repeat that would allow someone to send more than a single frame out
> of each device before moving to the next one.  It seemed this could be
> helpful when dealing with switches that constantly re-learned source MAC
> addresses.  Network performance would suffer whenever rr_repeat was >1,
> but box performance might be better if there weren't so many locks
> taken.

Thanks. I'll consider adding such functionality.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC] wrr (weighted round-robin) bonding
  2006-10-20 19:41             ` Dawid Ciezarkiewicz
@ 2006-10-20 19:53               ` Jay Vosburgh
  2006-10-20 20:52                 ` Dawid Ciezarkiewicz
  0 siblings, 1 reply; 12+ messages in thread
From: Jay Vosburgh @ 2006-10-20 19:53 UTC (permalink / raw)
  To: Dawid Ciezarkiewicz; +Cc: Andy Gospodarek, netdev

Dawid Ciezarkiewicz <dpc@asn.pl> wrote:

>On Thursday, 19 October 2006 21:04, Andy Gospodarek wrote:
>> It would seem to me that extending an existing mode would be more
>> desirable than adding yet another mode to worry about.  I don't even
>> like the fact that there are as many as there are, but I understand why
>> they are there.  
>
>Ack. I will probably update wrr bonding patch to replace rr mode.

	Also, if acceptance into the mainline is your ultimate goal, the
ioctl control interface will be a very difficult sell.  You'd want to
look into some other control mechanism, most likely an additional sysfs
entry.

	-J

---
	-Jay Vosburgh, IBM Linux Technology Center, fubar@us.ibm.com

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC] wrr (weighted round-robin) bonding
  2006-10-20 19:53               ` Jay Vosburgh
@ 2006-10-20 20:52                 ` Dawid Ciezarkiewicz
  2006-10-20 21:35                   ` Andy Gospodarek
  2006-10-20 21:55                   ` Jay Vosburgh
  0 siblings, 2 replies; 12+ messages in thread
From: Dawid Ciezarkiewicz @ 2006-10-20 20:52 UTC (permalink / raw)
  To: Jay Vosburgh; +Cc: Andy Gospodarek, netdev

On Friday, 20 October 2006 21:53, Jay Vosburgh wrote:
> Dawid Ciezarkiewicz <dpc@asn.pl> wrote:
> 
> >On Thursday, 19 October 2006 21:04, Andy Gospodarek wrote:
> >> It would seem to me that extending an existing mode would be more
> >> desirable than adding yet another mode to worry about.  I don't even
> >> like the fact that there are as many as there are, but I understand why
> >> they are there.  
> >
> >Ack. I will probably update wrr bonding patch to replace rr mode.
> 
> 	Also, if acceptance into the mainline is your ultimate goal, the
> ioctl control interface will be a very difficult sell.  You'd want to
> look into some other control mechanism, most likely an additional sysfs
> entry.

Oh. I'm quite puzzled here. What is current policy? I'd like sysfs interfaces 
better than ioctl - they are much cleaner etc. - but I thought ioctl will be 
better here because current bonding control uses ioctl and extending it is 
much simpler.

More generally: should I use sysfs always when adding anything to kernel and 
use it even when extending older functionality?

If wrr bonding have chances for inclusion into mainline I'll do any necessary 
changes to let it meet requirements. I'd like to know if functionality from 
Andy's patch is desired to be a part of such round-robin bonding extension. I 
think I could easily integrate it with wrr.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC] wrr (weighted round-robin) bonding
  2006-10-20 20:52                 ` Dawid Ciezarkiewicz
@ 2006-10-20 21:35                   ` Andy Gospodarek
  2006-10-20 21:55                   ` Jay Vosburgh
  1 sibling, 0 replies; 12+ messages in thread
From: Andy Gospodarek @ 2006-10-20 21:35 UTC (permalink / raw)
  To: Dawid Ciezarkiewicz; +Cc: Jay Vosburgh, netdev

On Fri, Oct 20, 2006 at 10:52:00PM +0200, Dawid Ciezarkiewicz wrote:
> 
> Oh. I'm quite puzzled here. What is current policy? I'd like sysfs interfaces 
> better than ioctl - they are much cleaner etc. - but I thought ioctl will be 
> better here because current bonding control uses ioctl and extending it is 
> much simpler.

I can't tell you for sure if there is a 'policy' about this, but I would
encourage you to focus energy on sysfs rather than the ioctl interface.
In the future I would guess more will begin to use sysfs rather than
ifenslave.



^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC] wrr (weighted round-robin) bonding
  2006-10-20 20:52                 ` Dawid Ciezarkiewicz
  2006-10-20 21:35                   ` Andy Gospodarek
@ 2006-10-20 21:55                   ` Jay Vosburgh
  1 sibling, 0 replies; 12+ messages in thread
From: Jay Vosburgh @ 2006-10-20 21:55 UTC (permalink / raw)
  To: Dawid Ciezarkiewicz; +Cc: Andy Gospodarek, netdev

Dawid Ciezarkiewicz <dpc@asn.pl> wrote:
[...]
>Oh. I'm quite puzzled here. What is current policy? I'd like sysfs interfaces 
>better than ioctl - they are much cleaner etc. - but I thought ioctl will be 
>better here because current bonding control uses ioctl and extending it is 
>much simpler.

	The bonding control stuff is (slowly but surely) moving to
sysfs.  There really isn't anything you can't do to bonding via sysfs
now; the only real issue is that it's difficult to get success / failure
status from a sysfs request.

>More generally: should I use sysfs always when adding anything to kernel and 
>use it even when extending older functionality?

	Well, I can't speak for the kernel as a whole, but for bonding,
I see no reason to add any new functionality to ifenslave at this point
in time, including new ioctl paths into the driver.

>If wrr bonding have chances for inclusion into mainline I'll do any necessary 
>changes to let it meet requirements. I'd like to know if functionality from 
>Andy's patch is desired to be a part of such round-robin bonding extension. I 
>think I could easily integrate it with wrr.

	I'm unfamiliar with Andy's patch (other than his description).
If there's interest in, and justification for, functionality to
manipulate the way the round-robin mode doles out packets it would be
best to have a single, generic dingus rather that two special-purpose
thingies.

	-J

---
	-Jay Vosburgh, IBM Linux Technology Center, fubar@us.ibm.com

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2006-10-20 21:55 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-10-16 18:21 [RFC] wrr (weighted round-robin) bonding Dawid Ciezarkiewicz
2006-10-16 18:27 ` Dawid Ciezarkiewicz
2006-10-16 18:50   ` Jay Vosburgh
2006-10-16 19:07     ` Dawid Ciezarkiewicz
2006-10-16 21:30       ` Andy Gospodarek
2006-10-17  8:16         ` Dawid Ciezarkiewicz
2006-10-19 19:04           ` Andy Gospodarek
2006-10-20 19:41             ` Dawid Ciezarkiewicz
2006-10-20 19:53               ` Jay Vosburgh
2006-10-20 20:52                 ` Dawid Ciezarkiewicz
2006-10-20 21:35                   ` Andy Gospodarek
2006-10-20 21:55                   ` Jay Vosburgh

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).