* bonding: bug in balance-alb mode (incorrect update-ARP-replies)
@ 2007-01-23 17:52 JUNG, Christian
2007-01-25 1:27 ` Jay Vosburgh
2009-07-20 17:12 ` Franck Chionna
0 siblings, 2 replies; 3+ messages in thread
From: JUNG, Christian @ 2007-01-23 17:52 UTC (permalink / raw)
To: 'netdev@vger.kernel.org'
Hello,
I've discovered a bug in the bonding module of the Linux Kernel, which
appears
only in bonding-mode balance-alb.
Description:
You have to setup a box with at least two NICs, a bonding device
enslaving
those, assign at least two IPs to the bond and make some traffic from a
different machine to one of those IPs.
If you delete that IP, the box will regardlessly send ARP-replies to the
machine which communicated to that IP before removing it.
This comes from the rx_hashtbl and the receive load balancing algorithm.
The bug is very serious if bonding is used in a cluster-environment
using
two nodes which are connected to the same subnet. If an IP-bound service
has
to failover to the other node, the old node would announce its
MAC-address
for the IP which isn't owned by the node anymore. So client-traffic in
the
same net would hit the old node.
A possible workaround could be the usage of balance-tlb instead of
balance-alb.
I've made a little patch which removes every entry from the rx_hashtbl, if
the
according IP is removed from the bond. The patch was made for Linux Kernel
version 2.6.19.
---8<---
diff -ur linux-2.6.19/drivers/net/bonding/bond_alb.c
linux/drivers/net/bonding/bond_alb.c
--- linux-2.6.19/drivers/net/bonding/bond_alb.c 2006-11-29
22:57:37.000000000 +0100
+++ linux/drivers/net/bonding/bond_alb.c 2007-01-16
17:23:53.000000000 +0100
@@ -1677,3 +1677,38 @@
}
}
+void bond_alb_remove_ip_from_rlb(struct bonding *bond, u32 ip) {
+ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
+ u32 curr_index;
+
+ dprintk("%s: removing entries from rx_hashtbl for IP %lx\n",
bond->dev->name, ip);
+ _lock_rx_hashtbl(bond);
+
+ curr_index = bond_info->rx_hashtbl_head;
+ while (curr_index != RLB_NULL_INDEX) {
+ struct rlb_client_info *curr =
&(bond_info->rx_hashtbl[curr_index]);
+ u32 next_index = bond_info->rx_hashtbl[curr_index].next;
+ u32 prev_index = bond_info->rx_hashtbl[curr_index].prev;
+
+ if (curr->ip_src == ip) {
+ dprintk("%s: entry %u matched\n", bond->dev->name,
curr_index);
+
+ if (curr_index == bond_info->rx_hashtbl_head) {
+ bond_info->rx_hashtbl_head = next_index;
+ }
+ if (prev_index != RLB_NULL_INDEX) {
+ bond_info->rx_hashtbl[prev_index].next =
next_index;
+ }
+ if (next_index != RLB_NULL_INDEX) {
+ bond_info->rx_hashtbl[next_index].prev =
prev_index;
+ }
+
+ rlb_init_table_entry(curr);
+ }
+
+ curr_index = next_index;
+ }
+
+ _unlock_rx_hashtbl(bond);
+}
+
diff -ur linux-2.6.19/drivers/net/bonding/bond_alb.h
linux/drivers/net/bonding/bond_alb.h
--- linux-2.6.19/drivers/net/bonding/bond_alb.h 2006-11-29
22:57:37.000000000 +0100
+++ linux/drivers/net/bonding/bond_alb.h 2007-01-16
17:23:53.000000000 +0100
@@ -128,5 +128,6 @@
void bond_alb_monitor(struct bonding *bond);
int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr);
void bond_alb_clear_vlan(struct bonding *bond, unsigned short vlan_id);
+void bond_alb_remove_ip_from_rlb(struct bonding *bond, u32 ip);
#endif /* __BOND_ALB_H__ */
diff -ur linux-2.6.19/drivers/net/bonding/bond_main.c
linux/drivers/net/bonding/bond_main.c
--- linux-2.6.19/drivers/net/bonding/bond_main.c 2006-11-29
22:57:37.000000000 +0100
+++ linux/drivers/net/bonding/bond_main.c 2007-01-16
17:30:49.000000000 +0100
@@ -3356,6 +3356,12 @@
return NOTIFY_OK;
case NETDEV_DOWN:
bond->master_ip =
bond_glean_dev_ip(bond->dev);
+
+ /* remove IP from RLB hashtable if using
balance-alb mode: */
+ if (bond->params.mode == BOND_MODE_ALB) {
+ bond_alb_remove_ip_from_rlb(bond,
ifa->ifa_local);
+ }
+
return NOTIFY_OK;
default:
return NOTIFY_DONE;
---8<---
The function bond_alb_remove_ip_from_rlb is heavily based on the function
rlb_clear_vlan.
And here's a useful patch for debugging purposes (it outputs the rx_hashtbl
in
the proc-file of the bond):
---8<---
diff -ur linux-2.6.19/drivers/net/bonding/bond_alb.c
linux/drivers/net/bonding/bond_alb.c
--- linux-2.6.19/drivers/net/bonding/bond_alb.c 2007-01-16
18:59:32.000000000 +0100
+++ linux/drivers/net/bonding/bond_alb.c 2007-01-16
18:48:15.000000000 +0100
@@ -26,6 +26,7 @@
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/pkt_sched.h>
+#include <linux/seq_file.h>
#include <linux/spinlock.h>
#include <linux/slab.h>
#include <linux/timer.h>
@@ -1677,6 +1678,45 @@
}
}
+void bond_alb_info_show(struct seq_file *seq) {
+ struct bonding *bond = seq->private;
+ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
+ struct rlb_client_info *rx_hash_table;
+ u32 index;
+ u32 src, dst;
+
+ seq_puts(seq, "\nALB info\n\n");
+ seq_puts(seq, " Receive Load Balancing table:\n\n");
+ seq_puts(seq, " Index Slave Server Client
Client-MAC Asgnd\n");
+
+ _lock_rx_hashtbl(bond);
+
+ rx_hash_table = bond_info->rx_hashtbl;
+
+ if (rx_hash_table != NULL) {
+ for (index = bond_info->rx_hashtbl_head;
+ index != RLB_NULL_INDEX;
+ index = rx_hash_table[index].next) {
+ src = ntohl(rx_hash_table[index].ip_src);
+ dst = ntohl(rx_hash_table[index].ip_dst);
+
+ seq_printf(seq,
+ " %03u %8s %03u.%03u.%03u.%03u
%03u.%03u.%03u.%03u %02x:%02x:%02x:%02x:%02x:%02x %3s\n",
+ index,
+ (rx_hash_table[index].slave != NULL
? rx_hash_table[index].slave->dev->name : "none"),
+ ((src >> 24) & 0xff), ((src >> 16) &
0xff), ((src >> 8) & 0xff), (src & 0xff),
+ ((dst >> 24) & 0xff), ((dst >> 16) &
0xff), ((dst >> 8) & 0xff), (dst & 0xff),
+ rx_hash_table[index].mac_dst[0],
rx_hash_table[index].mac_dst[1],
+ rx_hash_table[index].mac_dst[2],
rx_hash_table[index].mac_dst[3],
+ rx_hash_table[index].mac_dst[4],
rx_hash_table[index].mac_dst[5],
+ (rx_hash_table[index].assigned ?
"yes" : "no")
+ );
+ }
+ }
+
+ _unlock_rx_hashtbl(bond);
+}
+
void bond_alb_remove_ip_from_rlb(struct bonding *bond, u32 ip) {
struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
u32 curr_index;
diff -ur linux-2.6.19/drivers/net/bonding/bond_alb.h
linux/drivers/net/bonding/bond_alb.h
--- linux-2.6.19/drivers/net/bonding/bond_alb.h 2007-01-16
18:59:32.000000000 +0100
+++ linux/drivers/net/bonding/bond_alb.h 2007-01-16
19:01:46.000000000 +0100
@@ -128,6 +128,7 @@
void bond_alb_monitor(struct bonding *bond);
int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr);
void bond_alb_clear_vlan(struct bonding *bond, unsigned short vlan_id);
+void bond_alb_info_show(struct seq_file *seq);
void bond_alb_remove_ip_from_rlb(struct bonding *bond, u32 ip);
#endif /* __BOND_ALB_H__ */
diff -ur linux-2.6.19/drivers/net/bonding/bond_main.c
linux/drivers/net/bonding/bond_main.c
--- linux-2.6.19/drivers/net/bonding/bond_main.c 2007-01-16
18:59:32.000000000 +0100
+++ linux/drivers/net/bonding/bond_main.c 2007-01-16
18:48:15.000000000 +0100
@@ -3048,6 +3048,10 @@
ad_info.partner_system[5]);
}
}
+ else
+ if (bond->params.mode == BOND_MODE_ALB) {
+ bond_alb_info_show(seq);
+ }
}
static void bond_info_show_slave(struct seq_file *seq, const struct slave
*slave)
---8<---
I attach this example to visualize the bug. The box is named 'linux' (which
has
the two IPs 10.0.91.128 and 10.0.91.129) and the other machine (which makes
some traffic) is called 'dave'. Their clocks are synchronized via NTP.
---8<---
linux:~ # modprobe bonding miimon=100 updelay=200 mode=balance-alb
use_carrier=0
linux:~ # ifconfig bond0 10.0.91.128 netmask 255.255.255.0 up
linux:~ # ifenslave bond0 eth1
linux:~ # ifenslave bond0 eth2
linux:~ # ip addr add 10.0.91.129 dev bond0
linux:~ # ip addr sh bond0
18: bond0: <BROADCAST,MULTICAST,MASTER,UP> mtu 1500 qdisc noqueue
link/ether 00:02:b3:55:2e:b1 brd ff:ff:ff:ff:ff:ff
inet 10.0.91.128/24 brd 10.255.255.255 scope global bond0
inet 10.0.91.129/32 scope global bond0
inet6 fe80::200:ff:fe00:0/64 scope link
valid_lft forever preferred_lft forever
---
dave:~ # ping 10.0.91.129
PING 10.0.91.129 (10.0.91.129) 56(84) bytes of data.
64 bytes from 10.0.91.129: icmp_seq=1 ttl=64 time=3.83 ms
64 bytes from 10.0.91.129: icmp_seq=2 ttl=64 time=0.205 ms
[...]
dave:~ # tcpdump -i bond0 arp host 10.0.91.129
tcpdump: verbose output suppressed, use -v or -vv for full protocol decode
listening on bond0, link-type EN10MB (Ethernet), capture size 96 bytes
11:55:41.829735 arp reply 10.0.91.129 is-at 00:02:b3:55:2e:b1 (oui Unknown)
11:55:41.830993 arp reply 10.0.91.129 is-at 00:02:b3:55:2e:b1 (oui Unknown)
11:55:44.047261 arp reply 10.0.91.129 is-at 00:02:b3:55:2e:b1 (oui Unknown)
11:55:44.047276 arp reply 10.0.91.129 is-at 00:02:b3:55:2e:b1 (oui Unknown)
[...]
---
linux:~ # ip addr del 10.0.91.129 dev bond0
linux:~ # ip addr sh bond0
18: bond0: <BROADCAST,MULTICAST,MASTER,UP> mtu 1500 qdisc noqueue
link/ether 00:02:b3:55:2e:b1 brd ff:ff:ff:ff:ff:ff
inet 10.0.91.128/24 brd 10.255.255.255 scope global bond0
inet6 fe80::200:ff:fe00:0/64 scope link
valid_lft forever preferred_lft forever
linux:~ # date
Tue Jan 16 11:55:57 CET 2007
---
dave:~ # date
Tue Jan 16 11:56:59 CET 2007
dave:~ # tcpdump -i bond0 arp host 10.0.91.129
tcpdump: verbose output suppressed, use -v or -vv for full protocol decode
listening on bond0, link-type EN10MB (Ethernet), capture size 96 bytes
11:57:04.305078 arp reply 10.0.91.129 is-at 00:02:b3:55:2e:b1 (oui Unknown)
11:57:04.306248 arp reply 10.0.91.129 is-at 00:02:b3:55:2e:b1 (oui Unknown)
11:57:06.704552 arp reply 10.0.91.129 is-at 00:02:b3:55:2e:b1 (oui Unknown)
11:57:06.704569 arp reply 10.0.91.129 is-at 00:02:b3:55:2e:b1 (oui Unknown)
[...]
---8<---
Bye
Christian Jung
PS I'm sorry but I have to use a mailer which has some handicaps. If the
whitespaces of the patches are munged in any way I can send you the patches
as
attachment.
Another thing: When shutting down a bond (e.g. ifconfig bond0 0.0.0.0 down)
the
slaves keep the master IP address of the bond. Is there a special reason for
this behaviour?
phone: +49 6898/10-4987
fax: +49 6898/10-54987
http://www.saarstahl.de
^ permalink raw reply [flat|nested] 3+ messages in thread* Re: bonding: bug in balance-alb mode (incorrect update-ARP-replies)
2007-01-23 17:52 bonding: bug in balance-alb mode (incorrect update-ARP-replies) JUNG, Christian
@ 2007-01-25 1:27 ` Jay Vosburgh
2009-07-20 17:12 ` Franck Chionna
1 sibling, 0 replies; 3+ messages in thread
From: Jay Vosburgh @ 2007-01-25 1:27 UTC (permalink / raw)
To: JUNG, Christian; +Cc: 'netdev@vger.kernel.org'
JUNG, Christian <christian.jung@saarstahl.com> wrote:
> You have to setup a box with at least two NICs, a bonding device
>enslaving
> those, assign at least two IPs to the bond and make some traffic from a
> different machine to one of those IPs.
>
> If you delete that IP, the box will regardlessly send ARP-replies to the
> machine which communicated to that IP before removing it.
I've fooled around with this for a while today, and I see
generically what you're describing: after a local IP address is removed,
the corresponding entries in the bonding receive balance hash table are
not removed. However, I'm not able induce the ill effects you describe
from this (following the script you supplied); I don't see the
(apparently) unsolicited ARP replies you show in your tcpdump traces.
I do see some other weirdness when the receive balance hash
table becomes heavily populated, but nothing that is an ARP reply for an
address not assigned to the system.
Is your test occuring on an isolated network, and is there other
concurrent network traffic that might be affecting things?
-J
---
-Jay Vosburgh, IBM Linux Technology Center, fubar@us.ibm.com
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: bonding: bug in balance-alb mode (incorrect update-ARP-replies)
2007-01-23 17:52 bonding: bug in balance-alb mode (incorrect update-ARP-replies) JUNG, Christian
2007-01-25 1:27 ` Jay Vosburgh
@ 2009-07-20 17:12 ` Franck Chionna
1 sibling, 0 replies; 3+ messages in thread
From: Franck Chionna @ 2009-07-20 17:12 UTC (permalink / raw)
To: netdev
JUNG, Christian wrote:
>
> Hello,
>
> I've discovered a bug in the bonding module of the Linux Kernel, which
> appears
> only in bonding-mode balance-alb.
>
> Description:
>
> You have to setup a box with at least two NICs, a bonding device
> enslaving
> those, assign at least two IPs to the bond and make some traffic from
> a
> different machine to one of those IPs.
>
> If you delete that IP, the box will regardlessly send ARP-replies to
> the
> machine which communicated to that IP before removing it.
>
> This comes from the rx_hashtbl and the receive load balancing
> algorithm.
>
> The bug is very serious if bonding is used in a cluster-environment
> using
> two nodes which are connected to the same subnet. If an IP-bound
> service
> has
> to failover to the other node, the old node would announce its
> MAC-address
> for the IP which isn't owned by the node anymore. So client-traffic in
> the
> same net would hit the old node.
>
> A possible workaround could be the usage of balance-tlb instead of
> balance-alb.
>
> I've made a little patch which removes every entry from the rx_hashtbl, if
> the
> according IP is removed from the bond. The patch was made for Linux Kernel
> version 2.6.19.
>
> ---8<---
> diff -ur linux-2.6.19/drivers/net/bonding/bond_alb.c
> linux/drivers/net/bonding/bond_alb.c
> --- linux-2.6.19/drivers/net/bonding/bond_alb.c 2006-11-29
> 22:57:37.000000000 +0100
> +++ linux/drivers/net/bonding/bond_alb.c 2007-01-16
> 17:23:53.000000000 +0100
> @@ -1677,3 +1677,38 @@
> }
> }
>
> +void bond_alb_remove_ip_from_rlb(struct bonding *bond, u32 ip) {
> + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
> + u32 curr_index;
> +
> + dprintk("%s: removing entries from rx_hashtbl for IP %lx\n",
> bond->dev->name, ip);
> + _lock_rx_hashtbl(bond);
> +
> + curr_index = bond_info->rx_hashtbl_head;
> + while (curr_index != RLB_NULL_INDEX) {
> + struct rlb_client_info *curr =
> &(bond_info->rx_hashtbl[curr_index]);
> + u32 next_index = bond_info->rx_hashtbl[curr_index].next;
> + u32 prev_index = bond_info->rx_hashtbl[curr_index].prev;
> +
> + if (curr->ip_src == ip) {
> + dprintk("%s: entry %u matched\n", bond->dev->name,
> curr_index);
> +
> + if (curr_index == bond_info->rx_hashtbl_head) {
> + bond_info->rx_hashtbl_head = next_index;
> + }
> + if (prev_index != RLB_NULL_INDEX) {
> + bond_info->rx_hashtbl[prev_index].next =
> next_index;
> + }
> + if (next_index != RLB_NULL_INDEX) {
> + bond_info->rx_hashtbl[next_index].prev =
> prev_index;
> + }
> +
> + rlb_init_table_entry(curr);
> + }
> +
> + curr_index = next_index;
> + }
> +
> + _unlock_rx_hashtbl(bond);
> +}
> +
> diff -ur linux-2.6.19/drivers/net/bonding/bond_alb.h
> linux/drivers/net/bonding/bond_alb.h
> --- linux-2.6.19/drivers/net/bonding/bond_alb.h 2006-11-29
> 22:57:37.000000000 +0100
> +++ linux/drivers/net/bonding/bond_alb.h 2007-01-16
> 17:23:53.000000000 +0100
> @@ -128,5 +128,6 @@
> void bond_alb_monitor(struct bonding *bond);
> int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr);
> void bond_alb_clear_vlan(struct bonding *bond, unsigned short vlan_id);
> +void bond_alb_remove_ip_from_rlb(struct bonding *bond, u32 ip);
> #endif /* __BOND_ALB_H__ */
>
> diff -ur linux-2.6.19/drivers/net/bonding/bond_main.c
> linux/drivers/net/bonding/bond_main.c
> --- linux-2.6.19/drivers/net/bonding/bond_main.c 2006-11-29
> 22:57:37.000000000 +0100
> +++ linux/drivers/net/bonding/bond_main.c 2007-01-16
> 17:30:49.000000000 +0100
> @@ -3356,6 +3356,12 @@
> return NOTIFY_OK;
> case NETDEV_DOWN:
> bond->master_ip =
> bond_glean_dev_ip(bond->dev);
> +
> + /* remove IP from RLB hashtable if using
> balance-alb mode: */
> + if (bond->params.mode == BOND_MODE_ALB) {
> + bond_alb_remove_ip_from_rlb(bond,
> ifa->ifa_local);
> + }
> +
> return NOTIFY_OK;
> default:
> return NOTIFY_DONE;
> ---8<---
>
> The function bond_alb_remove_ip_from_rlb is heavily based on the function
> rlb_clear_vlan.
>
> And here's a useful patch for debugging purposes (it outputs the
> rx_hashtbl
> in
> the proc-file of the bond):
>
> ---8<---
> diff -ur linux-2.6.19/drivers/net/bonding/bond_alb.c
> linux/drivers/net/bonding/bond_alb.c
> --- linux-2.6.19/drivers/net/bonding/bond_alb.c 2007-01-16
> 18:59:32.000000000 +0100
> +++ linux/drivers/net/bonding/bond_alb.c 2007-01-16
> 18:48:15.000000000 +0100
> @@ -26,6 +26,7 @@
> #include <linux/netdevice.h>
> #include <linux/etherdevice.h>
> #include <linux/pkt_sched.h>
> +#include <linux/seq_file.h>
> #include <linux/spinlock.h>
> #include <linux/slab.h>
> #include <linux/timer.h>
> @@ -1677,6 +1678,45 @@
> }
> }
>
> +void bond_alb_info_show(struct seq_file *seq) {
> + struct bonding *bond = seq->private;
> + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
> + struct rlb_client_info *rx_hash_table;
> + u32 index;
> + u32 src, dst;
> +
> + seq_puts(seq, "\nALB info\n\n");
> + seq_puts(seq, " Receive Load Balancing table:\n\n");
> + seq_puts(seq, " Index Slave Server Client
> Client-MAC Asgnd\n");
> +
> + _lock_rx_hashtbl(bond);
> +
> + rx_hash_table = bond_info->rx_hashtbl;
> +
> + if (rx_hash_table != NULL) {
> + for (index = bond_info->rx_hashtbl_head;
> + index != RLB_NULL_INDEX;
> + index = rx_hash_table[index].next) {
> + src = ntohl(rx_hash_table[index].ip_src);
> + dst = ntohl(rx_hash_table[index].ip_dst);
> +
> + seq_printf(seq,
> + " %03u %8s %03u.%03u.%03u.%03u
> %03u.%03u.%03u.%03u %02x:%02x:%02x:%02x:%02x:%02x %3s\n",
> + index,
> + (rx_hash_table[index].slave != NULL
> ? rx_hash_table[index].slave->dev->name : "none"),
> + ((src >> 24) & 0xff), ((src >> 16) &
> 0xff), ((src >> 8) & 0xff), (src & 0xff),
> + ((dst >> 24) & 0xff), ((dst >> 16) &
> 0xff), ((dst >> 8) & 0xff), (dst & 0xff),
> + rx_hash_table[index].mac_dst[0],
> rx_hash_table[index].mac_dst[1],
> + rx_hash_table[index].mac_dst[2],
> rx_hash_table[index].mac_dst[3],
> + rx_hash_table[index].mac_dst[4],
> rx_hash_table[index].mac_dst[5],
> + (rx_hash_table[index].assigned ?
> "yes" : "no")
> + );
> + }
> + }
> +
> + _unlock_rx_hashtbl(bond);
> +}
> +
> void bond_alb_remove_ip_from_rlb(struct bonding *bond, u32 ip) {
> struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
> u32 curr_index;
> diff -ur linux-2.6.19/drivers/net/bonding/bond_alb.h
> linux/drivers/net/bonding/bond_alb.h
> --- linux-2.6.19/drivers/net/bonding/bond_alb.h 2007-01-16
> 18:59:32.000000000 +0100
> +++ linux/drivers/net/bonding/bond_alb.h 2007-01-16
> 19:01:46.000000000 +0100
> @@ -128,6 +128,7 @@
> void bond_alb_monitor(struct bonding *bond);
> int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr);
> void bond_alb_clear_vlan(struct bonding *bond, unsigned short vlan_id);
> +void bond_alb_info_show(struct seq_file *seq);
> void bond_alb_remove_ip_from_rlb(struct bonding *bond, u32 ip);
> #endif /* __BOND_ALB_H__ */
>
> diff -ur linux-2.6.19/drivers/net/bonding/bond_main.c
> linux/drivers/net/bonding/bond_main.c
> --- linux-2.6.19/drivers/net/bonding/bond_main.c 2007-01-16
> 18:59:32.000000000 +0100
> +++ linux/drivers/net/bonding/bond_main.c 2007-01-16
> 18:48:15.000000000 +0100
> @@ -3048,6 +3048,10 @@
> ad_info.partner_system[5]);
> }
> }
> + else
> + if (bond->params.mode == BOND_MODE_ALB) {
> + bond_alb_info_show(seq);
> + }
> }
>
> static void bond_info_show_slave(struct seq_file *seq, const struct slave
> *slave)
> ---8<---
>
> I attach this example to visualize the bug. The box is named 'linux'
> (which
> has
> the two IPs 10.0.91.128 and 10.0.91.129) and the other machine (which
> makes
> some traffic) is called 'dave'. Their clocks are synchronized via NTP.
>
> ---8<---
> linux:~ # modprobe bonding miimon=100 updelay=200 mode=balance-alb
> use_carrier=0
> linux:~ # ifconfig bond0 10.0.91.128 netmask 255.255.255.0 up
> linux:~ # ifenslave bond0 eth1
> linux:~ # ifenslave bond0 eth2
> linux:~ # ip addr add 10.0.91.129 dev bond0
> linux:~ # ip addr sh bond0
> 18: bond0: <BROADCAST,MULTICAST,MASTER,UP> mtu 1500 qdisc noqueue
> link/ether 00:02:b3:55:2e:b1 brd ff:ff:ff:ff:ff:ff
> inet 10.0.91.128/24 brd 10.255.255.255 scope global bond0
> inet 10.0.91.129/32 scope global bond0
> inet6 fe80::200:ff:fe00:0/64 scope link
> valid_lft forever preferred_lft forever
> ---
>
> dave:~ # ping 10.0.91.129
> PING 10.0.91.129 (10.0.91.129) 56(84) bytes of data.
> 64 bytes from 10.0.91.129: icmp_seq=1 ttl=64 time=3.83 ms
> 64 bytes from 10.0.91.129: icmp_seq=2 ttl=64 time=0.205 ms
> [...]
> dave:~ # tcpdump -i bond0 arp host 10.0.91.129
> tcpdump: verbose output suppressed, use -v or -vv for full protocol decode
> listening on bond0, link-type EN10MB (Ethernet), capture size 96 bytes
> 11:55:41.829735 arp reply 10.0.91.129 is-at 00:02:b3:55:2e:b1 (oui
> Unknown)
> 11:55:41.830993 arp reply 10.0.91.129 is-at 00:02:b3:55:2e:b1 (oui
> Unknown)
> 11:55:44.047261 arp reply 10.0.91.129 is-at 00:02:b3:55:2e:b1 (oui
> Unknown)
> 11:55:44.047276 arp reply 10.0.91.129 is-at 00:02:b3:55:2e:b1 (oui
> Unknown)
> [...]
>
> ---
>
> linux:~ # ip addr del 10.0.91.129 dev bond0
> linux:~ # ip addr sh bond0
> 18: bond0: <BROADCAST,MULTICAST,MASTER,UP> mtu 1500 qdisc noqueue
> link/ether 00:02:b3:55:2e:b1 brd ff:ff:ff:ff:ff:ff
> inet 10.0.91.128/24 brd 10.255.255.255 scope global bond0
> inet6 fe80::200:ff:fe00:0/64 scope link
> valid_lft forever preferred_lft forever
> linux:~ # date
> Tue Jan 16 11:55:57 CET 2007
>
> ---
>
> dave:~ # date
> Tue Jan 16 11:56:59 CET 2007
> dave:~ # tcpdump -i bond0 arp host 10.0.91.129
> tcpdump: verbose output suppressed, use -v or -vv for full protocol decode
> listening on bond0, link-type EN10MB (Ethernet), capture size 96 bytes
> 11:57:04.305078 arp reply 10.0.91.129 is-at 00:02:b3:55:2e:b1 (oui
> Unknown)
> 11:57:04.306248 arp reply 10.0.91.129 is-at 00:02:b3:55:2e:b1 (oui
> Unknown)
> 11:57:06.704552 arp reply 10.0.91.129 is-at 00:02:b3:55:2e:b1 (oui
> Unknown)
> 11:57:06.704569 arp reply 10.0.91.129 is-at 00:02:b3:55:2e:b1 (oui
> Unknown)
> [...]
> ---8<---
>
>
> Bye
> Christian Jung
>
> PS I'm sorry but I have to use a mailer which has some handicaps. If the
> whitespaces of the patches are munged in any way I can send you the
> patches
> as
> attachment.
>
> Another thing: When shutting down a bond (e.g. ifconfig bond0 0.0.0.0
> down)
> the
> slaves keep the master IP address of the bond. Is there a special reason
> for
> this behaviour?
>
> phone: +49 6898/10-4987
> fax: +49 6898/10-54987
> http://www.saarstahl.de
> -
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
>
The problem seems to be existing yet in the last kernel today (2.6.30.1)
why your patch has not been integrated in kernel community ?
--
View this message in context: http://www.nabble.com/bonding%3A-bug-in-balance-alb-mode-%28incorrect-update-ARP-replies%29-tp8527082p24573842.html
Sent from the netdev mailing list archive at Nabble.com.
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2009-07-20 17:12 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-01-23 17:52 bonding: bug in balance-alb mode (incorrect update-ARP-replies) JUNG, Christian
2007-01-25 1:27 ` Jay Vosburgh
2009-07-20 17:12 ` Franck Chionna
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).