netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH, RFC] bonding: prevent outgoing packets on inactive slaves
@ 2009-08-05 16:24 Jiri Bohac
  2009-08-05 16:57 ` Eric Dumazet
  2009-08-05 17:01 ` Jay Vosburgh
  0 siblings, 2 replies; 7+ messages in thread
From: Jiri Bohac @ 2009-08-05 16:24 UTC (permalink / raw)
  To: fubar; +Cc: davem, netdev

Applications exist that broadcast/multicast packets on all
devices in the system (e.g. avahi-mdns).

When a device is an inactive slave of a bond in active-backup
mode, its MAC address may be set identical to other divecies in
the bond. The broadcast/multicast packets may then confuse
switches to direct packets to the inactive slave, rather than the
active one.

This patch makes sure the TX queues on inactive slaves are
deactivated.

Signed-off-by: Jiri Bohac <jbohac@suse.cz>

--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -20,6 +20,7 @@
 #include <linux/if_bonding.h>
 #include <linux/kobject.h>
 #include <linux/in6.h>
+#include <net/sch_generic.h>
 #include "bond_3ad.h"
 #include "bond_alb.h"
 
@@ -291,12 +292,21 @@ static inline void bond_set_slave_inactive_flags(struct slave *slave)
 	slave->dev->priv_flags |= IFF_SLAVE_INACTIVE;
 	if (slave_do_arp_validate(bond, slave))
 		slave->dev->priv_flags |= IFF_SLAVE_NEEDARP;
+
+	/* prevent outgoing frames on inactive slaves from confusing switches */
+	if (bond->params.mode == BOND_MODE_ACTIVEBACKUP)
+		dev_deactivate_tx(slave->dev);
 }
 
 static inline void bond_set_slave_active_flags(struct slave *slave)
 {
+	struct bonding *bond = netdev_priv(slave->dev->master);
+
 	slave->state = BOND_STATE_ACTIVE;
 	slave->dev->priv_flags &= ~(IFF_SLAVE_INACTIVE | IFF_SLAVE_NEEDARP);
+
+	if (bond->params.mode == BOND_MODE_ACTIVEBACKUP)
+		dev_activate_tx(slave->dev);
 }
 
 static inline void bond_set_master_3ad_flags(struct bonding *bond)
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 964ffa0..eee006a 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -296,6 +296,8 @@ extern void dev_init_scheduler(struct net_device *dev);
 extern void dev_shutdown(struct net_device *dev);
 extern void dev_activate(struct net_device *dev);
 extern void dev_deactivate(struct net_device *dev);
+extern void dev_activate_tx(struct net_device *dev);
+extern void dev_deactivate_tx(struct net_device *dev);
 extern void qdisc_reset(struct Qdisc *qdisc);
 extern void qdisc_destroy(struct Qdisc *qdisc);
 extern void qdisc_tree_decrease_qlen(struct Qdisc *qdisc, unsigned int n);
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 27d0381..514c73d 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -628,7 +628,7 @@ static void transition_one_qdisc(struct net_device *dev,
 	}
 }
 
-void dev_activate(struct net_device *dev)
+void dev_activate_tx(struct net_device *dev)
 {
 	int need_watchdog;
 
@@ -647,13 +647,19 @@ void dev_activate(struct net_device *dev)
 
 	need_watchdog = 0;
 	netdev_for_each_tx_queue(dev, transition_one_qdisc, &need_watchdog);
-	transition_one_qdisc(dev, &dev->rx_queue, NULL);
 
 	if (need_watchdog) {
 		dev->trans_start = jiffies;
 		dev_watchdog_up(dev);
 	}
 }
+EXPORT_SYMBOL(dev_activate_tx);
+
+void dev_activate(struct net_device *dev)
+{
+	dev_activate_tx(dev);
+	transition_one_qdisc(dev, &dev->rx_queue, NULL);
+}
 
 static void dev_deactivate_queue(struct net_device *dev,
 				 struct netdev_queue *dev_queue,
@@ -703,12 +709,18 @@ static bool some_qdisc_is_busy(struct net_device *dev)
 	return false;
 }
 
-void dev_deactivate(struct net_device *dev)
+void dev_deactivate_tx(struct net_device *dev)
 {
 	netdev_for_each_tx_queue(dev, dev_deactivate_queue, &noop_qdisc);
-	dev_deactivate_queue(dev, &dev->rx_queue, &noop_qdisc);
 
 	dev_watchdog_down(dev);
+}
+EXPORT_SYMBOL(dev_deactivate_tx);
+
+void dev_deactivate(struct net_device *dev)
+{
+	dev_deactivate_tx(dev);
+	dev_deactivate_queue(dev, &dev->rx_queue, &noop_qdisc);
 
 	/* Wait for outstanding qdisc-less dev_queue_xmit calls. */
 	synchronize_rcu();



-- 
Jiri Bohac <jbohac@suse.cz>
SUSE Labs, SUSE CZ


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH, RFC] bonding: prevent outgoing packets on inactive slaves
  2009-08-05 16:24 [PATCH, RFC] bonding: prevent outgoing packets on inactive slaves Jiri Bohac
@ 2009-08-05 16:57 ` Eric Dumazet
  2009-08-06 10:43   ` Jiri Bohac
  2009-08-05 17:01 ` Jay Vosburgh
  1 sibling, 1 reply; 7+ messages in thread
From: Eric Dumazet @ 2009-08-05 16:57 UTC (permalink / raw)
  To: Jiri Bohac; +Cc: fubar, davem, netdev

Jiri Bohac a écrit :
> Applications exist that broadcast/multicast packets on all
> devices in the system (e.g. avahi-mdns).
> 
> When a device is an inactive slave of a bond in active-backup
> mode, its MAC address may be set identical to other divecies in
> the bond. The broadcast/multicast packets may then confuse
> switches to direct packets to the inactive slave, rather than the
> active one.
> 
> This patch makes sure the TX queues on inactive slaves are
> deactivated.
> 

Wont this break ARP link monitoring ?

> Signed-off-by: Jiri Bohac <jbohac@suse.cz>
> 
> --- a/drivers/net/bonding/bonding.h
> +++ b/drivers/net/bonding/bonding.h
> @@ -20,6 +20,7 @@
>  #include <linux/if_bonding.h>
>  #include <linux/kobject.h>
>  #include <linux/in6.h>
> +#include <net/sch_generic.h>
>  #include "bond_3ad.h"
>  #include "bond_alb.h"
>  
> @@ -291,12 +292,21 @@ static inline void bond_set_slave_inactive_flags(struct slave *slave)
>  	slave->dev->priv_flags |= IFF_SLAVE_INACTIVE;
>  	if (slave_do_arp_validate(bond, slave))
>  		slave->dev->priv_flags |= IFF_SLAVE_NEEDARP;
> +
> +	/* prevent outgoing frames on inactive slaves from confusing switches */
> +	if (bond->params.mode == BOND_MODE_ACTIVEBACKUP)
> +		dev_deactivate_tx(slave->dev);
>  }
>  
>  static inline void bond_set_slave_active_flags(struct slave *slave)
>  {
> +	struct bonding *bond = netdev_priv(slave->dev->master);
> +
>  	slave->state = BOND_STATE_ACTIVE;
>  	slave->dev->priv_flags &= ~(IFF_SLAVE_INACTIVE | IFF_SLAVE_NEEDARP);
> +
> +	if (bond->params.mode == BOND_MODE_ACTIVEBACKUP)
> +		dev_activate_tx(slave->dev);
>  }
>  
>  static inline void bond_set_master_3ad_flags(struct bonding *bond)
> diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
> index 964ffa0..eee006a 100644
> --- a/include/net/sch_generic.h
> +++ b/include/net/sch_generic.h
> @@ -296,6 +296,8 @@ extern void dev_init_scheduler(struct net_device *dev);
>  extern void dev_shutdown(struct net_device *dev);
>  extern void dev_activate(struct net_device *dev);
>  extern void dev_deactivate(struct net_device *dev);
> +extern void dev_activate_tx(struct net_device *dev);
> +extern void dev_deactivate_tx(struct net_device *dev);
>  extern void qdisc_reset(struct Qdisc *qdisc);
>  extern void qdisc_destroy(struct Qdisc *qdisc);
>  extern void qdisc_tree_decrease_qlen(struct Qdisc *qdisc, unsigned int n);
> diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
> index 27d0381..514c73d 100644
> --- a/net/sched/sch_generic.c
> +++ b/net/sched/sch_generic.c
> @@ -628,7 +628,7 @@ static void transition_one_qdisc(struct net_device *dev,
>  	}
>  }
>  
> -void dev_activate(struct net_device *dev)
> +void dev_activate_tx(struct net_device *dev)
>  {
>  	int need_watchdog;
>  
> @@ -647,13 +647,19 @@ void dev_activate(struct net_device *dev)
>  
>  	need_watchdog = 0;
>  	netdev_for_each_tx_queue(dev, transition_one_qdisc, &need_watchdog);
> -	transition_one_qdisc(dev, &dev->rx_queue, NULL);
>  
>  	if (need_watchdog) {
>  		dev->trans_start = jiffies;
>  		dev_watchdog_up(dev);
>  	}
>  }
> +EXPORT_SYMBOL(dev_activate_tx);
> +
> +void dev_activate(struct net_device *dev)
> +{
> +	dev_activate_tx(dev);
> +	transition_one_qdisc(dev, &dev->rx_queue, NULL);
> +}
>  
>  static void dev_deactivate_queue(struct net_device *dev,
>  				 struct netdev_queue *dev_queue,
> @@ -703,12 +709,18 @@ static bool some_qdisc_is_busy(struct net_device *dev)
>  	return false;
>  }
>  
> -void dev_deactivate(struct net_device *dev)
> +void dev_deactivate_tx(struct net_device *dev)
>  {
>  	netdev_for_each_tx_queue(dev, dev_deactivate_queue, &noop_qdisc);
> -	dev_deactivate_queue(dev, &dev->rx_queue, &noop_qdisc);
>  
>  	dev_watchdog_down(dev);
> +}
> +EXPORT_SYMBOL(dev_deactivate_tx);
> +
> +void dev_deactivate(struct net_device *dev)
> +{
> +	dev_deactivate_tx(dev);
> +	dev_deactivate_queue(dev, &dev->rx_queue, &noop_qdisc);
>  
>  	/* Wait for outstanding qdisc-less dev_queue_xmit calls. */
>  	synchronize_rcu();
> 
> 
> 


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH, RFC] bonding: prevent outgoing packets on inactive slaves
  2009-08-05 16:24 [PATCH, RFC] bonding: prevent outgoing packets on inactive slaves Jiri Bohac
  2009-08-05 16:57 ` Eric Dumazet
@ 2009-08-05 17:01 ` Jay Vosburgh
  2009-08-06 22:56   ` Jiri Bohac
  1 sibling, 1 reply; 7+ messages in thread
From: Jay Vosburgh @ 2009-08-05 17:01 UTC (permalink / raw)
  To: Jiri Bohac; +Cc: davem, netdev

Jiri Bohac <jbohac@suse.cz> wrote:

>Applications exist that broadcast/multicast packets on all
>devices in the system (e.g. avahi-mdns).
>
>When a device is an inactive slave of a bond in active-backup
>mode, its MAC address may be set identical to other divecies in
>the bond. The broadcast/multicast packets may then confuse
>switches to direct packets to the inactive slave, rather than the
>active one.
>
>This patch makes sure the TX queues on inactive slaves are
>deactivated.
>
>Signed-off-by: Jiri Bohac <jbohac@suse.cz>

	I'd love to include this patch; many times I've tracked down
"bonding" problems to some errant dingus confusing the switch, but I
think this patch will break some things, and therefore has to be a NAK.

	Specifically, I suspect this will break users of some protocols
that intentionally (and legitimately) bind directly to the slave
underneath bonding, LLDP for one.  I'm fairly sure there are such users,
because the inactive slave rx path was changed last year to permit
explicit binds to the inactive slaves to receive packets that normally
would be dropped:

commit 0d7a3681232f545c6a59f77e60f7667673ef0e93
Author: Joe Eykholt <jre@nuovasystems.com>
Date:   Wed Jul 2 18:22:01 2008 -0700

    net/core: Allow certain receives on inactive slave.
    
    Allow a packet_type that specifies the exact device to receive
    even on an inactive bonding slave devices.  This is important for some
    L2 protocols such as LLDP and FCoE.  This can eventually be used
    for the bonding special cases as well.
    
    Signed-off-by: Joe Eykholt <jre@nuovasystems.com>
    Signed-off-by: Jay Vosburgh <fubar@us.ibm.com>
    Signed-off-by: Jeff Garzik <jgarzik@redhat.com>

	The fact that they're receiving on the inactive slave suggests
that there may be transmits on the same slave, and a quick read of the
LLDP spec seems to agree.  I'm also unsure of exactly how FCoE operates
in this regard (whether it does anything that will break due to this
patch).

	Anybody have better information about LLDP or FCoE?

	-J

---
	-Jay Vosburgh, IBM Linux Technology Center, fubar@us.ibm.com

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH, RFC] bonding: prevent outgoing packets on inactive slaves
  2009-08-05 16:57 ` Eric Dumazet
@ 2009-08-06 10:43   ` Jiri Bohac
  2009-08-06 12:28     ` Eric Dumazet
  0 siblings, 1 reply; 7+ messages in thread
From: Jiri Bohac @ 2009-08-06 10:43 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Jiri Bohac, fubar, davem, netdev

On Wed, Aug 05, 2009 at 06:57:37PM +0200, Eric Dumazet wrote:
> Jiri Bohac a écrit :
> > This patch makes sure the TX queues on inactive slaves are
> > deactivated.
> > 
> 
> Wont this break ARP link monitoring ?

No, ARP monitoring sends the ARP requests from the active slave.

-- 
Jiri Bohac <jbohac@suse.cz>
SUSE Labs, SUSE CZ


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH, RFC] bonding: prevent outgoing packets on inactive slaves
  2009-08-06 10:43   ` Jiri Bohac
@ 2009-08-06 12:28     ` Eric Dumazet
  0 siblings, 0 replies; 7+ messages in thread
From: Eric Dumazet @ 2009-08-06 12:28 UTC (permalink / raw)
  To: Jiri Bohac; +Cc: fubar, davem, netdev

Jiri Bohac a écrit :
> On Wed, Aug 05, 2009 at 06:57:37PM +0200, Eric Dumazet wrote:
>> Jiri Bohac a écrit :
>>> This patch makes sure the TX queues on inactive slaves are
>>> deactivated.
>>>
>> Wont this break ARP link monitoring ?
> 
> No, ARP monitoring sends the ARP requests from the active slave.
> 

Yes, this makes sense now you say it :)

Thanks

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH, RFC] bonding: prevent outgoing packets on inactive slaves
  2009-08-05 17:01 ` Jay Vosburgh
@ 2009-08-06 22:56   ` Jiri Bohac
       [not found]     ` <20090819155509.GA11829@midget.suse.cz>
  0 siblings, 1 reply; 7+ messages in thread
From: Jiri Bohac @ 2009-08-06 22:56 UTC (permalink / raw)
  To: Jay Vosburgh; +Cc: Jiri Bohac, davem, netdev

On Wed, Aug 05, 2009 at 10:01:16AM -0700, Jay Vosburgh wrote:
> Jiri Bohac <jbohac@suse.cz> wrote:
> >This patch makes sure the TX queues on inactive slaves are
> >deactivated.
> 
> 	I'd love to include this patch; many times I've tracked down
> "bonding" problems to some errant dingus confusing the switch, but I
> think this patch will break some things, and therefore has to be a NAK.
> 
> 	Specifically, I suspect this will break users of some protocols
> that intentionally (and legitimately) bind directly to the slave
> underneath bonding, LLDP for one.  I'm fairly sure there are such users,
> because the inactive slave rx path was changed last year to permit
> explicit binds to the inactive slaves to receive packets that normally
> would be dropped:
> 
> commit 0d7a3681232f545c6a59f77e60f7667673ef0e93
> Author: Joe Eykholt <jre@nuovasystems.com>
> Date:   Wed Jul 2 18:22:01 2008 -0700
> 
>     net/core: Allow certain receives on inactive slave.
>     
>     Allow a packet_type that specifies the exact device to receive
>     even on an inactive bonding slave devices.  This is important for some
>     L2 protocols such as LLDP and FCoE.  This can eventually be used
>     for the bonding special cases as well.

OK. I checked FCoE and it really seems to bind to a specific
interface. I checked openlldp and it does bind to individual
interfaces specifically, so checking the ptype really seems like
it should work. How about the following patch, then?

I think it even is cleaner than the original, because
bond_set_slave_active_flags() really only sets flags instead of
calling non-trivial functions while holding locks. If some
protocol does not work with the ptype heuristics, it can easilly
be "whitelisted" in skb_bond_should_drop_tx():


Signed-off-by: Jiri Bohac <jbohac@suse.cz>


--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1955,7 +1955,7 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev)
 
 	slave_dev->priv_flags &= ~(IFF_MASTER_8023AD | IFF_MASTER_ALB |
 				   IFF_SLAVE_INACTIVE | IFF_BONDING |
-				   IFF_SLAVE_NEEDARP);
+				   IFF_SLAVE_NEEDARP | IFF_SLAVE_FILTER_TX);
 
 	kfree(slave);
 
@@ -2081,7 +2081,7 @@ static int bond_release_all(struct net_device *bond_dev)
 		}
 
 		slave_dev->priv_flags &= ~(IFF_MASTER_8023AD | IFF_MASTER_ALB |
-					   IFF_SLAVE_INACTIVE);
+					   IFF_SLAVE_INACTIVE | IFF_SLAVE_FILTER_TX);
 
 		kfree(slave);
 
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index 6290a50..7d0e0bb 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -291,12 +291,15 @@ static inline void bond_set_slave_inactive_flags(struct slave *slave)
 	slave->dev->priv_flags |= IFF_SLAVE_INACTIVE;
 	if (slave_do_arp_validate(bond, slave))
 		slave->dev->priv_flags |= IFF_SLAVE_NEEDARP;
+	if (bond->params.mode == BOND_MODE_ACTIVEBACKUP)
+		slave->dev->priv_flags |= IFF_SLAVE_FILTER_TX;
 }
 
 static inline void bond_set_slave_active_flags(struct slave *slave)
 {
 	slave->state = BOND_STATE_ACTIVE;
-	slave->dev->priv_flags &= ~(IFF_SLAVE_INACTIVE | IFF_SLAVE_NEEDARP);
+	slave->dev->priv_flags &= ~(IFF_SLAVE_INACTIVE | IFF_SLAVE_NEEDARP |
+				    IFF_SLAVE_FILTER_TX);
 }
 
 static inline void bond_set_master_3ad_flags(struct bonding *bond)
diff --git a/include/linux/if.h b/include/linux/if.h
index b9a6229..40d5c56 100644
--- a/include/linux/if.h
+++ b/include/linux/if.h
@@ -70,6 +70,7 @@
 #define IFF_XMIT_DST_RELEASE 0x400	/* dev_hard_start_xmit() is allowed to
 					 * release skb->dst
 					 */
+#define IFF_SLAVE_FILTER_TX 0x800	/* filter tx on bonding slaves	*/
 
 #define IF_GET_IFACE	0x0001		/* for querying only */
 #define IF_GET_PROTO	0x0002
diff --git a/net/core/dev.c b/net/core/dev.c
index 70c27e0..7018ba7 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1786,6 +1786,25 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev,
 	return netdev_get_tx_queue(dev, queue_index);
 }
 
+/* In active-backup mode, on bonding slaves other than the currently active slave,
+ * suppress outgoing packets, except for special L2 protocols.
+ */
+static inline int skb_bond_should_drop_tx(struct sk_buff *skb)
+{
+	struct packet_type *ptype;
+	__be16 type;
+
+	/* allow protocols specifically bound to this interface */
+	type = skb->protocol;
+	list_for_each_entry_rcu(ptype,
+			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
+		if (ptype->type == type && ptype->dev == skb->dev)
+			return 0;
+	}
+
+	return 1;
+}
+
 /**
  *	dev_queue_xmit - transmit a buffer
  *	@skb: buffer to transmit
@@ -1818,6 +1837,12 @@ int dev_queue_xmit(struct sk_buff *skb)
 	struct Qdisc *q;
 	int rc = -ENOMEM;
 
+	if ((dev->priv_flags & IFF_SLAVE_FILTER_TX) &&
+	    skb_bond_should_drop_tx(skb)) {
+		rc = NET_XMIT_DROP;
+		goto out_kfree_skb;
+	}
+
 	/* GSO will handle the following emulations directly. */
 	if (netif_needs_gso(dev, skb))
 		goto gso;

-- 
Jiri Bohac <jbohac@suse.cz>
SUSE Labs, SUSE CZ


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH, RFC] bonding: prevent outgoing packets on inactive slaves
       [not found]           ` <10377.1251221782@death.nxdomain.ibm.com>
@ 2009-09-17 11:14             ` Jiri Bohac
  0 siblings, 0 replies; 7+ messages in thread
From: Jiri Bohac @ 2009-09-17 11:14 UTC (permalink / raw)
  To: Jay Vosburgh; +Cc: Jiri Bohac, davem, netdev

Hi,

after a small off-list (my fault, sorry) discussion with Jay, I am re-sending the
patch with a minor modification. See below.

On Tue, Aug 25, 2009 at 10:36:22AM -0700, Jay Vosburgh wrote (off-list):
> Jiri Bohac <jbohac@suse.cz> wrote:
> >+	if (unlikely(dev->priv_flags & IFF_SLAVE_FILTER_TX) &&
> >+	    skb_bond_should_drop_tx(skb)) {
> >+		rc = NET_XMIT_DROP;
> >+		goto out_kfree_skb;
> >+	}
> >+
> 
> 	The priv_flags test is hidden inside the skb_bond_should_drop
> that already exists, I see no reason to do this differently.  The
> function is an inline, so in terms of the generated code, it should be
> about the same.

OK, fixed in the new version below.


> 	Also, your patch won't prevent a VLAN configured directly above
> the slave from transmitting.  I mention this because I've occasionally seen
> configurations of the form:
> 
> 	bond0 -> eth0.555 -> eth0
> 
> 	I.e., where the bonding slave is the VLAN interface, not the
> actual interface.  The other bonding slave was on a different VLAN, if
> memory serves.  I don't know if this is really an issue or not for your
> purpose.

Right, the patch won't prevent transmission from the real device
on which a VLAN is configured. After some thinking, however, I am now
convinced this is the right thing to do:

1) the problem can be prevented when setting up the bond.  VLAN
interfaces can have their MAC address changed, independently from
the real device the VLAN is configured on (and from any other VLAN
interfaces). The problem occurs when a VLAN interface, with a MAC
address identical to the real device (or other VLAN interfaces)
is added as the first slave to the bond, making the bond inherit
this address and force it to subsequently enslaved devices. If a
slave, other than the first VLAN, is then made the active slave,
switches could be confused. The VLAN's MAC address can, however,
easily be changed prior to enslaving the VLAN interface and the
problem will then never occur.

2) filtering outgoing frames from the VLAN's real device could
break legitimate traffic. If the network topology ensures that
non-tagged (or tagged with a different VLAN id) frames going out
from the VLAN interface never get on the same L2 network as the
frames from the other bonding slaves, the setup can work well
and filtering the frames will break that.


Signed-off-by: Jiri Bohac <jbohac@suse.cz>

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index aa1be1f..56b8a8e 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1955,7 +1955,7 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev)
 
 	slave_dev->priv_flags &= ~(IFF_MASTER_8023AD | IFF_MASTER_ALB |
 				   IFF_SLAVE_INACTIVE | IFF_BONDING |
-				   IFF_SLAVE_NEEDARP);
+				   IFF_SLAVE_NEEDARP | IFF_SLAVE_FILTER_TX);
 
 	kfree(slave);
 
@@ -2081,7 +2081,7 @@ static int bond_release_all(struct net_device *bond_dev)
 		}
 
 		slave_dev->priv_flags &= ~(IFF_MASTER_8023AD | IFF_MASTER_ALB |
-					   IFF_SLAVE_INACTIVE);
+					   IFF_SLAVE_INACTIVE | IFF_SLAVE_FILTER_TX);
 
 		kfree(slave);
 
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index 6290a50..7d0e0bb 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -291,12 +291,15 @@ static inline void bond_set_slave_inactive_flags(struct slave *slave)
 	slave->dev->priv_flags |= IFF_SLAVE_INACTIVE;
 	if (slave_do_arp_validate(bond, slave))
 		slave->dev->priv_flags |= IFF_SLAVE_NEEDARP;
+	if (bond->params.mode == BOND_MODE_ACTIVEBACKUP)
+		slave->dev->priv_flags |= IFF_SLAVE_FILTER_TX;
 }
 
 static inline void bond_set_slave_active_flags(struct slave *slave)
 {
 	slave->state = BOND_STATE_ACTIVE;
-	slave->dev->priv_flags &= ~(IFF_SLAVE_INACTIVE | IFF_SLAVE_NEEDARP);
+	slave->dev->priv_flags &= ~(IFF_SLAVE_INACTIVE | IFF_SLAVE_NEEDARP |
+				    IFF_SLAVE_FILTER_TX);
 }
 
 static inline void bond_set_master_3ad_flags(struct bonding *bond)
diff --git a/include/linux/if.h b/include/linux/if.h
index b9a6229..40d5c56 100644
--- a/include/linux/if.h
+++ b/include/linux/if.h
@@ -70,6 +70,7 @@
 #define IFF_XMIT_DST_RELEASE 0x400	/* dev_hard_start_xmit() is allowed to
 					 * release skb->dst
 					 */
+#define IFF_SLAVE_FILTER_TX 0x800	/* filter tx on bonding slaves	*/
 
 #define IF_GET_IFACE	0x0001		/* for querying only */
 #define IF_GET_PROTO	0x0002
diff --git a/net/core/dev.c b/net/core/dev.c
index 6a94475..2d92c93 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1786,6 +1786,28 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev,
 	return netdev_get_tx_queue(dev, queue_index);
 }
 
+/* In active-backup mode, on bonding slaves other than the currently active slave,
+ * suppress outgoing packets, except for special L2 protocols.
+ */
+static inline int skb_bond_should_drop_tx(struct sk_buff *skb)
+{
+	struct packet_type *ptype;
+	__be16 type;
+
+	if (likely(!(skb->dev->priv_flags & IFF_SLAVE_FILTER_TX)))
+		return 0;
+
+	/* allow protocols specifically bound to this interface */
+	type = skb->protocol;
+	list_for_each_entry_rcu(ptype,
+			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
+		if (ptype->type == type && ptype->dev == skb->dev)
+			return 0;
+	}
+
+	return 1;
+}
+
 /**
  *	dev_queue_xmit - transmit a buffer
  *	@skb: buffer to transmit
@@ -1818,6 +1840,11 @@ int dev_queue_xmit(struct sk_buff *skb)
 	struct Qdisc *q;
 	int rc = -ENOMEM;
 
+	if (skb_bond_should_drop_tx(skb)) {
+		rc = NET_XMIT_DROP;
+		goto out_kfree_skb;
+	}
+
 	/* GSO will handle the following emulations directly. */
 	if (netif_needs_gso(dev, skb))
 		goto gso;


-- 
Jiri Bohac <jbohac@suse.cz>
SUSE Labs, SUSE CZ


^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2009-09-17 11:14 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-08-05 16:24 [PATCH, RFC] bonding: prevent outgoing packets on inactive slaves Jiri Bohac
2009-08-05 16:57 ` Eric Dumazet
2009-08-06 10:43   ` Jiri Bohac
2009-08-06 12:28     ` Eric Dumazet
2009-08-05 17:01 ` Jay Vosburgh
2009-08-06 22:56   ` Jiri Bohac
     [not found]     ` <20090819155509.GA11829@midget.suse.cz>
     [not found]       ` <2495.1250729618@death.nxdomain.ibm.com>
     [not found]         ` <20090825133701.GC23138@midget.suse.cz>
     [not found]           ` <10377.1251221782@death.nxdomain.ibm.com>
2009-09-17 11:14             ` Jiri Bohac

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).