* [PATCH 2/4] bonding: make sure tx and rx hash tables stay in sync when using alb mode
@ 2009-09-11 21:11 Andy Gospodarek
2009-09-16 23:36 ` Jay Vosburgh
0 siblings, 1 reply; 8+ messages in thread
From: Andy Gospodarek @ 2009-09-11 21:11 UTC (permalink / raw)
To: netdev, fubar, bonding-devel
Subject: [PATCH] bonding: make sure tx and rx hash tables stay in sync when using alb mode
I noticed that it was easy for alb (mode 6) bonding to get into a state
where the tx hash-table and rx hash-table are out of sync (there is
really nothing to keep them synchronized), and we will transmit traffic
destined for a host on one slave and send ARP frames to the same slave
from another interface using a different source MAC.
There is no compelling reason to do this, so this patch makes sure the
rx hash-table changes whenever the tx hash-table is updated based on
device load. This patch also drops the code that does rlb re-balancing
since the balancing will not be controlled by the tx hash-table based on
transmit load.
Long-term it would be nice to reduce these two tables into one, but
until that is done (as well as some significant re-factoring on the alb
code) they should be kept in sync.
Signed-off-by: Andy Gospodarek <andy@greyhouse.net>
---
drivers/net/bonding/bond_alb.c | 123 ++++++++++++++++------------------------
1 files changed, 49 insertions(+), 74 deletions(-)
diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index bcf25c6..a88d0ec 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -111,6 +111,7 @@ static inline struct arp_pkt *arp_pkt(const struct sk_buff *skb)
/* Forward declaration */
static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[]);
+static struct slave *rlb_update_rx_table(struct bonding *bond, struct slave *next_slave, u32 hash_index);
static inline u8 _simple_hash(const u8 *hash_start, int hash_size)
{
@@ -124,7 +125,18 @@ static inline u8 _simple_hash(const u8 *hash_start, int hash_size)
return hash;
}
-/*********************** tlb specific functions ***************************/
+
+/********************* rlb and tlb lock functions *************************/
+static inline void _lock_rx_hashtbl(struct bonding *bond)
+{
+ spin_lock_bh(&(BOND_ALB_INFO(bond).rx_hashtbl_lock));
+}
+
+static inline void _unlock_rx_hashtbl(struct bonding *bond)
+{
+ spin_unlock_bh(&(BOND_ALB_INFO(bond).rx_hashtbl_lock));
+}
+
static inline void _lock_tx_hashtbl(struct bonding *bond)
{
@@ -136,6 +148,7 @@ static inline void _unlock_tx_hashtbl(struct bonding *bond)
spin_unlock_bh(&(BOND_ALB_INFO(bond).tx_hashtbl_lock));
}
+/*********************** tlb specific functions ***************************/
/* Caller must hold tx_hashtbl lock */
static inline void tlb_init_table_entry(struct tlb_client_info *entry, int save_load)
{
@@ -296,6 +309,12 @@ static struct slave *tlb_choose_channel(struct bonding *bond, u32 hash_index, u3
if (!assigned_slave) {
assigned_slave = tlb_get_best_slave(bond, hash_index);
+ if (bond_info->rlb_enabled) {
+ _lock_rx_hashtbl(bond);
+ rlb_update_rx_table(bond, assigned_slave, hash_index);
+ _unlock_rx_hashtbl(bond);
+ }
+
if (assigned_slave) {
struct tlb_slave_info *slave_info =
&(SLAVE_TLB_INFO(assigned_slave));
@@ -325,14 +344,37 @@ static struct slave *tlb_choose_channel(struct bonding *bond, u32 hash_index, u3
}
/*********************** rlb specific functions ***************************/
-static inline void _lock_rx_hashtbl(struct bonding *bond)
+
+/* Caller must hold bond lock for read and hashtbl lock */
+static struct slave *rlb_update_rx_table(struct bonding *bond, struct slave *next_slave, u32 hash_index)
{
- spin_lock_bh(&(BOND_ALB_INFO(bond).rx_hashtbl_lock));
+ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
+
+ /* check rlb table and correct it if wrong */
+ if (bond_info->rlb_enabled) {
+ struct rlb_client_info *rx_client_info = &(bond_info->rx_hashtbl[hash_index]);
+
+ /* if the new slave computed by tlb checks doesn't match rlb, stop rlb from using it */
+ if (next_slave && (next_slave != rx_client_info->slave))
+ rx_client_info->slave = next_slave;
+ }
+ return next_slave;
}
-static inline void _unlock_rx_hashtbl(struct bonding *bond)
+/* Caller must hold bond lock for read and hashtbl lock */
+static struct slave *alb_get_best_slave(struct bonding *bond, u32 hash_index)
{
- spin_unlock_bh(&(BOND_ALB_INFO(bond).rx_hashtbl_lock));
+ struct slave *next_slave = NULL;
+
+ _lock_tx_hashtbl(bond);
+
+ next_slave = tlb_get_best_slave(bond, hash_index);
+
+ _unlock_tx_hashtbl(bond);
+
+ rlb_update_rx_table(bond, next_slave, hash_index);
+
+ return next_slave;
}
/* when an ARP REPLY is received from a client update its info
@@ -402,38 +444,6 @@ out:
return res;
}
-/* Caller must hold bond lock for read */
-static struct slave *rlb_next_rx_slave(struct bonding *bond)
-{
- struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
- struct slave *rx_slave, *slave, *start_at;
- int i = 0;
-
- if (bond_info->next_rx_slave) {
- start_at = bond_info->next_rx_slave;
- } else {
- start_at = bond->first_slave;
- }
-
- rx_slave = NULL;
-
- bond_for_each_slave_from(bond, slave, i, start_at) {
- if (SLAVE_IS_OK(slave)) {
- if (!rx_slave) {
- rx_slave = slave;
- } else if (slave->speed > rx_slave->speed) {
- rx_slave = slave;
- }
- }
- }
-
- if (rx_slave) {
- bond_info->next_rx_slave = rx_slave->next;
- }
-
- return rx_slave;
-}
-
/* teach the switch the mac of a disabled slave
* on the primary for fault tolerance
*
@@ -475,7 +485,7 @@ static void rlb_clear_slave(struct bonding *bond, struct slave *slave)
for (; index != RLB_NULL_INDEX; index = next_index) {
next_index = rx_hash_table[index].next;
if (rx_hash_table[index].slave == slave) {
- struct slave *assigned_slave = rlb_next_rx_slave(bond);
+ struct slave *assigned_slave = alb_get_best_slave(bond, index);
if (assigned_slave) {
rx_hash_table[index].slave = assigned_slave;
@@ -687,7 +697,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
}
}
/* assign a new slave */
- assigned_slave = rlb_next_rx_slave(bond);
+ assigned_slave = alb_get_best_slave(bond, hash_index);
if (assigned_slave) {
client_info->ip_src = arp->ip_src;
@@ -771,36 +781,6 @@ static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond)
return tx_slave;
}
-/* Caller must hold bond lock for read */
-static void rlb_rebalance(struct bonding *bond)
-{
- struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
- struct slave *assigned_slave;
- struct rlb_client_info *client_info;
- int ntt;
- u32 hash_index;
-
- _lock_rx_hashtbl(bond);
-
- ntt = 0;
- hash_index = bond_info->rx_hashtbl_head;
- for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) {
- client_info = &(bond_info->rx_hashtbl[hash_index]);
- assigned_slave = rlb_next_rx_slave(bond);
- if (assigned_slave && (client_info->slave != assigned_slave)) {
- client_info->slave = assigned_slave;
- client_info->ntt = 1;
- ntt = 1;
- }
- }
-
- /* update the team's flag only after the whole iteration */
- if (ntt) {
- bond_info->rx_ntt = 1;
- }
- _unlock_rx_hashtbl(bond);
-}
-
/* Caller must hold rx_hashtbl lock */
static void rlb_init_table_entry(struct rlb_client_info *entry)
{
@@ -1521,11 +1501,6 @@ void bond_alb_monitor(struct work_struct *work)
read_lock(&bond->lock);
}
- if (bond_info->rlb_rebalance) {
- bond_info->rlb_rebalance = 0;
- rlb_rebalance(bond);
- }
-
/* check if clients need updating */
if (bond_info->rx_ntt) {
if (bond_info->rlb_update_delay_counter) {
--
1.5.5.6
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH 2/4] bonding: make sure tx and rx hash tables stay in sync when using alb mode
2009-09-11 21:11 [PATCH 2/4] bonding: make sure tx and rx hash tables stay in sync when using alb mode Andy Gospodarek
@ 2009-09-16 23:36 ` Jay Vosburgh
2009-09-16 23:44 ` Andy Gospodarek
2009-09-18 15:36 ` Andy Gospodarek
0 siblings, 2 replies; 8+ messages in thread
From: Jay Vosburgh @ 2009-09-16 23:36 UTC (permalink / raw)
To: Andy Gospodarek; +Cc: netdev, bonding-devel
Andy Gospodarek <andy@greyhouse.net> wrote:
>
>Subject: [PATCH] bonding: make sure tx and rx hash tables stay in sync when using alb mode
When testing this, I'm getting a lockdep warning. It appears to
be unhappy that tlb_choose_channel acquires the tx / rx hash table locks
in the order tx then rx, but rlb_choose_channel -> alb_get_best_slave
acquires the locks in the other order. I applied all four patches, but
it looks like the change that trips lockdep is in this patch (#2).
I haven't gotten an actual deadlock from this, although it seems
plausible if there are two cpus in bond_alb_xmit at the same time, and
one of them is sending an ARP.
One fairly straightforward fix would be to combine the rx and tx
hash table locks into a single lock. I suspect that wouldn't have any
real performance penalty, since the rx hash table lock is generally not
acquired very often (unlike the tx lock, which is taken for every packet
that goes out).
Also, FYI, two of the four patches had trailing whitespace. I
believe it was #2 and #4.
Thoughts?
Here's the lockdep warning:
Ethernet Channel Bonding Driver: v3.5.0 (November 4, 2008)
bonding: In ALB mode you might experience client disconnections upon reconnection of a link if the bonding module updelay parameter (0 msec) is incompatible with the forwarding delay time of the switch
bonding: MII link monitoring set to 10 ms
ADDRCONF(NETDEV_UP): bond0: link is not ready
tg3 0000:01:07.1: PME# disabled
bonding: bond0: enslaving eth0 as an active interface with a down link.
tg3 0000:01:07.0: PME# enabled
tg3 0000:01:07.0: PME# disabled
bonding: bond0: enslaving eth1 as an active interface with a down link.
tg3: eth0: Link is up at 1000 Mbps, full duplex.
tg3: eth0: Flow control is off for TX and off for RX.
bonding: bond0: link status definitely up for interface eth0.
bonding: bond0: making interface eth0 the new active one.
bonding: bond0: first active interface up!
ADDRCONF(NETDEV_CHANGE): bond0: link becomes ready
tg3: eth1: Link is up at 1000 Mbps, full duplex.
tg3: eth1: Flow control is off for TX and off for RX.
bonding: bond0: link status definitely up for interface eth1.
bonding: bond0: enslaving eth2 as an active interface with a down link.
e1000: eth2 NIC Link is Up 1000 Mbps Full Duplex, Flow Control: RX
bonding: bond0: link status definitely up for interface eth2.
=======================================================
[ INFO: possible circular locking dependency detected ]
2.6.31-locking #10
-------------------------------------------------------
swapper/0 is trying to acquire lock:
(&(bond_info->tx_hashtbl_lock)){+.-...}, at: [<e1863ec1>] alb_get_best_slave+0x1b/0x58 [bonding]
but task is already holding lock:
(&(bond_info->rx_hashtbl_lock)){+.-...}, at: [<e1864fe5>] bond_alb_xmit+0x1b7/0x60b [bonding]
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #1 (&(bond_info->rx_hashtbl_lock)){+.-...}:
[<c014fcbb>] __lock_acquire+0x109f/0x13a0
[<c0150064>] lock_acquire+0xa8/0xbf
[<c0343678>] _spin_lock_bh+0x2a/0x39
[<e186532f>] bond_alb_xmit+0x501/0x60b [bonding]
[<e1861a6b>] bond_start_xmit+0x2e9/0x32e [bonding]
[<c02dc369>] dev_hard_start_xmit+0x281/0x314
[<c02dc81a>] dev_queue_xmit+0x338/0x41b
[<c02e1ddc>] neigh_resolve_output+0x260/0x28d
[<e170dece>] ip6_output2+0x2dc/0x32a [ipv6]
[<e170edaf>] ip6_output+0xe93/0xea0 [ipv6]
[<e171be2e>] ndisc_send_skb+0x19d/0x320 [ipv6]
[<e171bfeb>] __ndisc_send+0x3a/0x45 [ipv6]
[<e171d3df>] ndisc_send_rs+0x34/0x3c [ipv6]
[<e171127c>] addrconf_dad_completed+0x5e/0x99 [ipv6]
[<e17120df>] addrconf_dad_timer+0x5d/0xe1 [ipv6]
[<c0136692>] run_timer_softirq+0x1a0/0x219
[<c0132cd9>] __do_softirq+0xd6/0x1a3
[<c0132dd1>] do_softirq+0x2b/0x43
[<c0132f4e>] irq_exit+0x38/0x74
[<c0113669>] smp_apic_timer_interrupt+0x6e/0x7c
[<c01032fb>] apic_timer_interrupt+0x2f/0x34
[<c0101b43>] cpu_idle+0x49/0x76
[<c03335f3>] rest_init+0x67/0x69
[<c04937cd>] start_kernel+0x2c1/0x2c6
[<c049306a>] __init_begin+0x6a/0x6f
-> #0 (&(bond_info->tx_hashtbl_lock)){+.-...}:
[<c014fa46>] __lock_acquire+0xe2a/0x13a0
[<c0150064>] lock_acquire+0xa8/0xbf
[<c0343678>] _spin_lock_bh+0x2a/0x39
[<e1863ec1>] alb_get_best_slave+0x1b/0x58 [bonding]
[<e1865080>] bond_alb_xmit+0x252/0x60b [bonding]
[<e1861a6b>] bond_start_xmit+0x2e9/0x32e [bonding]
[<c02dc369>] dev_hard_start_xmit+0x281/0x314
[<c02dc81a>] dev_queue_xmit+0x338/0x41b
[<c03157a8>] arp_send+0x32/0x37
[<c0316033>] arp_solicit+0x18a/0x1a1
[<c02e3ce3>] neigh_timer_handler+0x1c9/0x20a
[<c0136692>] run_timer_softirq+0x1a0/0x219
[<c0132cd9>] __do_softirq+0xd6/0x1a3
[<c0132dd1>] do_softirq+0x2b/0x43
[<c0132f4e>] irq_exit+0x38/0x74
[<c0113669>] smp_apic_timer_interrupt+0x6e/0x7c
[<c01032fb>] apic_timer_interrupt+0x2f/0x34
[<c0101b43>] cpu_idle+0x49/0x76
[<c033e289>] start_secondary+0x1ab/0x1b2
other info that might help us debug this:
5 locks held by swapper/0:
#0: (&n->timer){+.-...}, at: [<c0136638>] run_timer_softirq+0x146/0x219
#1: (rcu_read_lock){.+.+..}, at: [<c02dc696>] dev_queue_xmit+0x1b4/0x41b
#2: (&bond->lock){++.?..}, at: [<e1864e6b>] bond_alb_xmit+0x3d/0x60b [bonding]
#3: (&bond->curr_slave_lock){++.?..}, at: [<e1864e7e>] bond_alb_xmit+0x50/0x60b [bonding]
#4: (&(bond_info->rx_hashtbl_lock)){+.-...}, at: [<e1864fe5>] bond_alb_xmit+0x1b7/0x60b [bonding]
stack backtrace:
Pid: 0, comm: swapper Not tainted 2.6.31-locking #10
Call Trace:
[<c03408d6>] ? printk+0xf/0x11
[<c014e7d5>] print_circular_bug+0x90/0x9c
[<c014fa46>] __lock_acquire+0xe2a/0x13a0
[<e1864fe5>] ? bond_alb_xmit+0x1b7/0x60b [bonding]
[<c0150064>] lock_acquire+0xa8/0xbf
[<e1863ec1>] ? alb_get_best_slave+0x1b/0x58 [bonding]
[<c0343678>] _spin_lock_bh+0x2a/0x39
[<e1863ec1>] ? alb_get_best_slave+0x1b/0x58 [bonding]
[<e1863ec1>] alb_get_best_slave+0x1b/0x58 [bonding]
[<e1865080>] bond_alb_xmit+0x252/0x60b [bonding]
[<c014dff7>] ? mark_held_locks+0x43/0x5b
[<c014e211>] ? trace_hardirqs_on_caller+0xe6/0x120
[<e1861a6b>] bond_start_xmit+0x2e9/0x32e [bonding]
[<c02dc369>] dev_hard_start_xmit+0x281/0x314
[<c02dc81a>] dev_queue_xmit+0x338/0x41b
[<c031579c>] ? arp_send+0x26/0x37
[<c03157a8>] arp_send+0x32/0x37
[<c0316033>] arp_solicit+0x18a/0x1a1
[<c02e3ce3>] neigh_timer_handler+0x1c9/0x20a
[<c0136692>] run_timer_softirq+0x1a0/0x219
[<c0136638>] ? run_timer_softirq+0x146/0x219
[<c02e3b1a>] ? neigh_timer_handler+0x0/0x20a
[<c0132cd9>] __do_softirq+0xd6/0x1a3
[<c0132dd1>] do_softirq+0x2b/0x43
[<c0132f4e>] irq_exit+0x38/0x74
[<c0113669>] smp_apic_timer_interrupt+0x6e/0x7c
[<c01032fb>] apic_timer_interrupt+0x2f/0x34
[<c0101b3d>] ? cpu_idle+0x43/0x76
[<c014007b>] ? sys_timer_create+0x205/0x304
[<c0108125>] ? default_idle+0x8a/0xef
[<c014b25c>] ? tick_nohz_restart_sched_tick+0x12f/0x138
[<c0101b43>] cpu_idle+0x49/0x76
[<c033e289>] start_secondary+0x1ab/0x1b2
-J
---
-Jay Vosburgh, IBM Linux Technology Center, fubar@us.ibm.com
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 2/4] bonding: make sure tx and rx hash tables stay in sync when using alb mode
2009-09-16 23:36 ` Jay Vosburgh
@ 2009-09-16 23:44 ` Andy Gospodarek
2009-09-18 15:36 ` Andy Gospodarek
1 sibling, 0 replies; 8+ messages in thread
From: Andy Gospodarek @ 2009-09-16 23:44 UTC (permalink / raw)
To: Jay Vosburgh; +Cc: Andy Gospodarek, netdev, bonding-devel
On Wed, Sep 16, 2009 at 04:36:09PM -0700, Jay Vosburgh wrote:
> Andy Gospodarek <andy@greyhouse.net> wrote:
>
> >
> >Subject: [PATCH] bonding: make sure tx and rx hash tables stay in sync when using alb mode
>
> When testing this, I'm getting a lockdep warning. It appears to
> be unhappy that tlb_choose_channel acquires the tx / rx hash table locks
> in the order tx then rx, but rlb_choose_channel -> alb_get_best_slave
> acquires the locks in the other order. I applied all four patches, but
> it looks like the change that trips lockdep is in this patch (#2).
Interesting. I specifically enabled lockdep (or so I thought) because I
wanted to be sure by more than my inspection that there were no deadlock
possibilities.
>
> I haven't gotten an actual deadlock from this, although it seems
> plausible if there are two cpus in bond_alb_xmit at the same time, and
> one of them is sending an ARP.
>
> One fairly straightforward fix would be to combine the rx and tx
> hash table locks into a single lock. I suspect that wouldn't have any
> real performance penalty, since the rx hash table lock is generally not
> acquired very often (unlike the tx lock, which is taken for every packet
> that goes out).
>
That will probably work. I'll take a look at this right away and see
how feasible that is.
> Also, FYI, two of the four patches had trailing whitespace. I
> believe it was #2 and #4.
Grrr, I can't believe I did that. :-/
>
> Thoughts?
>
> Here's the lockdep warning:
>
> Ethernet Channel Bonding Driver: v3.5.0 (November 4, 2008)
> bonding: In ALB mode you might experience client disconnections upon reconnection of a link if the bonding module updelay parameter (0 msec) is incompatible with the forwarding delay time of the switch
> bonding: MII link monitoring set to 10 ms
> ADDRCONF(NETDEV_UP): bond0: link is not ready
> tg3 0000:01:07.1: PME# disabled
> bonding: bond0: enslaving eth0 as an active interface with a down link.
> tg3 0000:01:07.0: PME# enabled
> tg3 0000:01:07.0: PME# disabled
> bonding: bond0: enslaving eth1 as an active interface with a down link.
> tg3: eth0: Link is up at 1000 Mbps, full duplex.
> tg3: eth0: Flow control is off for TX and off for RX.
> bonding: bond0: link status definitely up for interface eth0.
> bonding: bond0: making interface eth0 the new active one.
> bonding: bond0: first active interface up!
> ADDRCONF(NETDEV_CHANGE): bond0: link becomes ready
> tg3: eth1: Link is up at 1000 Mbps, full duplex.
> tg3: eth1: Flow control is off for TX and off for RX.
> bonding: bond0: link status definitely up for interface eth1.
> bonding: bond0: enslaving eth2 as an active interface with a down link.
> e1000: eth2 NIC Link is Up 1000 Mbps Full Duplex, Flow Control: RX
> bonding: bond0: link status definitely up for interface eth2.
>
> =======================================================
> [ INFO: possible circular locking dependency detected ]
> 2.6.31-locking #10
> -------------------------------------------------------
> swapper/0 is trying to acquire lock:
> (&(bond_info->tx_hashtbl_lock)){+.-...}, at: [<e1863ec1>] alb_get_best_slave+0x1b/0x58 [bonding]
>
> but task is already holding lock:
> (&(bond_info->rx_hashtbl_lock)){+.-...}, at: [<e1864fe5>] bond_alb_xmit+0x1b7/0x60b [bonding]
>
> which lock already depends on the new lock.
>
>
> the existing dependency chain (in reverse order) is:
>
> -> #1 (&(bond_info->rx_hashtbl_lock)){+.-...}:
> [<c014fcbb>] __lock_acquire+0x109f/0x13a0
> [<c0150064>] lock_acquire+0xa8/0xbf
> [<c0343678>] _spin_lock_bh+0x2a/0x39
> [<e186532f>] bond_alb_xmit+0x501/0x60b [bonding]
> [<e1861a6b>] bond_start_xmit+0x2e9/0x32e [bonding]
> [<c02dc369>] dev_hard_start_xmit+0x281/0x314
> [<c02dc81a>] dev_queue_xmit+0x338/0x41b
> [<c02e1ddc>] neigh_resolve_output+0x260/0x28d
> [<e170dece>] ip6_output2+0x2dc/0x32a [ipv6]
> [<e170edaf>] ip6_output+0xe93/0xea0 [ipv6]
> [<e171be2e>] ndisc_send_skb+0x19d/0x320 [ipv6]
> [<e171bfeb>] __ndisc_send+0x3a/0x45 [ipv6]
> [<e171d3df>] ndisc_send_rs+0x34/0x3c [ipv6]
> [<e171127c>] addrconf_dad_completed+0x5e/0x99 [ipv6]
> [<e17120df>] addrconf_dad_timer+0x5d/0xe1 [ipv6]
> [<c0136692>] run_timer_softirq+0x1a0/0x219
> [<c0132cd9>] __do_softirq+0xd6/0x1a3
> [<c0132dd1>] do_softirq+0x2b/0x43
> [<c0132f4e>] irq_exit+0x38/0x74
> [<c0113669>] smp_apic_timer_interrupt+0x6e/0x7c
> [<c01032fb>] apic_timer_interrupt+0x2f/0x34
> [<c0101b43>] cpu_idle+0x49/0x76
> [<c03335f3>] rest_init+0x67/0x69
> [<c04937cd>] start_kernel+0x2c1/0x2c6
> [<c049306a>] __init_begin+0x6a/0x6f
>
> -> #0 (&(bond_info->tx_hashtbl_lock)){+.-...}:
> [<c014fa46>] __lock_acquire+0xe2a/0x13a0
> [<c0150064>] lock_acquire+0xa8/0xbf
> [<c0343678>] _spin_lock_bh+0x2a/0x39
> [<e1863ec1>] alb_get_best_slave+0x1b/0x58 [bonding]
> [<e1865080>] bond_alb_xmit+0x252/0x60b [bonding]
> [<e1861a6b>] bond_start_xmit+0x2e9/0x32e [bonding]
> [<c02dc369>] dev_hard_start_xmit+0x281/0x314
> [<c02dc81a>] dev_queue_xmit+0x338/0x41b
> [<c03157a8>] arp_send+0x32/0x37
> [<c0316033>] arp_solicit+0x18a/0x1a1
> [<c02e3ce3>] neigh_timer_handler+0x1c9/0x20a
> [<c0136692>] run_timer_softirq+0x1a0/0x219
> [<c0132cd9>] __do_softirq+0xd6/0x1a3
> [<c0132dd1>] do_softirq+0x2b/0x43
> [<c0132f4e>] irq_exit+0x38/0x74
> [<c0113669>] smp_apic_timer_interrupt+0x6e/0x7c
> [<c01032fb>] apic_timer_interrupt+0x2f/0x34
> [<c0101b43>] cpu_idle+0x49/0x76
> [<c033e289>] start_secondary+0x1ab/0x1b2
>
> other info that might help us debug this:
>
> 5 locks held by swapper/0:
> #0: (&n->timer){+.-...}, at: [<c0136638>] run_timer_softirq+0x146/0x219
> #1: (rcu_read_lock){.+.+..}, at: [<c02dc696>] dev_queue_xmit+0x1b4/0x41b
> #2: (&bond->lock){++.?..}, at: [<e1864e6b>] bond_alb_xmit+0x3d/0x60b [bonding]
> #3: (&bond->curr_slave_lock){++.?..}, at: [<e1864e7e>] bond_alb_xmit+0x50/0x60b [bonding]
> #4: (&(bond_info->rx_hashtbl_lock)){+.-...}, at: [<e1864fe5>] bond_alb_xmit+0x1b7/0x60b [bonding]
>
> stack backtrace:
> Pid: 0, comm: swapper Not tainted 2.6.31-locking #10
> Call Trace:
> [<c03408d6>] ? printk+0xf/0x11
> [<c014e7d5>] print_circular_bug+0x90/0x9c
> [<c014fa46>] __lock_acquire+0xe2a/0x13a0
> [<e1864fe5>] ? bond_alb_xmit+0x1b7/0x60b [bonding]
> [<c0150064>] lock_acquire+0xa8/0xbf
> [<e1863ec1>] ? alb_get_best_slave+0x1b/0x58 [bonding]
> [<c0343678>] _spin_lock_bh+0x2a/0x39
> [<e1863ec1>] ? alb_get_best_slave+0x1b/0x58 [bonding]
> [<e1863ec1>] alb_get_best_slave+0x1b/0x58 [bonding]
> [<e1865080>] bond_alb_xmit+0x252/0x60b [bonding]
> [<c014dff7>] ? mark_held_locks+0x43/0x5b
> [<c014e211>] ? trace_hardirqs_on_caller+0xe6/0x120
> [<e1861a6b>] bond_start_xmit+0x2e9/0x32e [bonding]
> [<c02dc369>] dev_hard_start_xmit+0x281/0x314
> [<c02dc81a>] dev_queue_xmit+0x338/0x41b
> [<c031579c>] ? arp_send+0x26/0x37
> [<c03157a8>] arp_send+0x32/0x37
> [<c0316033>] arp_solicit+0x18a/0x1a1
> [<c02e3ce3>] neigh_timer_handler+0x1c9/0x20a
> [<c0136692>] run_timer_softirq+0x1a0/0x219
> [<c0136638>] ? run_timer_softirq+0x146/0x219
> [<c02e3b1a>] ? neigh_timer_handler+0x0/0x20a
> [<c0132cd9>] __do_softirq+0xd6/0x1a3
> [<c0132dd1>] do_softirq+0x2b/0x43
> [<c0132f4e>] irq_exit+0x38/0x74
> [<c0113669>] smp_apic_timer_interrupt+0x6e/0x7c
> [<c01032fb>] apic_timer_interrupt+0x2f/0x34
> [<c0101b3d>] ? cpu_idle+0x43/0x76
> [<c014007b>] ? sys_timer_create+0x205/0x304
> [<c0108125>] ? default_idle+0x8a/0xef
> [<c014b25c>] ? tick_nohz_restart_sched_tick+0x12f/0x138
> [<c0101b43>] cpu_idle+0x49/0x76
> [<c033e289>] start_secondary+0x1ab/0x1b2
>
>
> -J
>
> ---
> -Jay Vosburgh, IBM Linux Technology Center, fubar@us.ibm.com
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 2/4] bonding: make sure tx and rx hash tables stay in sync when using alb mode
2009-09-16 23:36 ` Jay Vosburgh
2009-09-16 23:44 ` Andy Gospodarek
@ 2009-09-18 15:36 ` Andy Gospodarek
2009-09-18 15:56 ` [PATCH 2/4 v3] " Andy Gospodarek
1 sibling, 1 reply; 8+ messages in thread
From: Andy Gospodarek @ 2009-09-18 15:36 UTC (permalink / raw)
To: Jay Vosburgh; +Cc: netdev, bonding-devel
On Wed, Sep 16, 2009 at 04:36:09PM -0700, Jay Vosburgh wrote:
> Andy Gospodarek <andy@greyhouse.net> wrote:
>
> >
> >Subject: [PATCH] bonding: make sure tx and rx hash tables stay in sync when using alb mode
>
> When testing this, I'm getting a lockdep warning. It appears to
> be unhappy that tlb_choose_channel acquires the tx / rx hash table locks
> in the order tx then rx, but rlb_choose_channel -> alb_get_best_slave
> acquires the locks in the other order. I applied all four patches, but
> it looks like the change that trips lockdep is in this patch (#2).
>
> I haven't gotten an actual deadlock from this, although it seems
> plausible if there are two cpus in bond_alb_xmit at the same time, and
> one of them is sending an ARP.
>
> One fairly straightforward fix would be to combine the rx and tx
> hash table locks into a single lock. I suspect that wouldn't have any
> real performance penalty, since the rx hash table lock is generally not
> acquired very often (unlike the tx lock, which is taken for every packet
> that goes out).
>
> Also, FYI, two of the four patches had trailing whitespace. I
> believe it was #2 and #4.
>
> Thoughts?
Jay,
This patch should address both the the deadlock and whitespace conerns.
I ran a kernel with LOCKDEP enabled and saw no warnings while passing
traffic on the bond while pulling cables and while removing the module.
Here it is....
[PATCH] bonding: make sure tx and rx hash tables stay in sync when using alb mode
I noticed that it was easy for alb (mode 6) bonding to get into a state
where the tx hash-table and rx hash-table are out of sync (there is
really nothing to keep them synchronized), and we will transmit traffic
destined for a host on one slave and send ARP frames to the same slave
from another interface using a different source MAC.
There is no compelling reason to do this, so this patch makes sure the
rx hash-table changes whenever the tx hash-table is updated based on
device load. This patch also drops the code that does rlb re-balancing
since the balancing will not be controlled by the tx hash-table based on
transmit load. In order to address an issue found with the initial
patch, I have also combined the rx and tx hash table lock into a single
lock. This will facilitate moving these into a single table at some
point.
---
drivers/net/bonding/bond_alb.c | 203 +++++++++++++++-------------------------
drivers/net/bonding/bond_alb.h | 3 +-
2 files changed, 75 insertions(+), 131 deletions(-)
diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index bcf25c6..04b7055 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -111,6 +111,7 @@ static inline struct arp_pkt *arp_pkt(const struct sk_buff *skb)
/* Forward declaration */
static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[]);
+static struct slave *alb_get_best_slave(struct bonding *bond, u32 hash_index);
static inline u8 _simple_hash(const u8 *hash_start, int hash_size)
{
@@ -124,18 +125,18 @@ static inline u8 _simple_hash(const u8 *hash_start, int hash_size)
return hash;
}
-/*********************** tlb specific functions ***************************/
-
-static inline void _lock_tx_hashtbl(struct bonding *bond)
+/********************* hash table lock functions *************************/
+static inline void _lock_hashtbl(struct bonding *bond)
{
- spin_lock_bh(&(BOND_ALB_INFO(bond).tx_hashtbl_lock));
+ spin_lock_bh(&(BOND_ALB_INFO(bond).hashtbl_lock));
}
-static inline void _unlock_tx_hashtbl(struct bonding *bond)
+static inline void _unlock_hashtbl(struct bonding *bond)
{
- spin_unlock_bh(&(BOND_ALB_INFO(bond).tx_hashtbl_lock));
+ spin_unlock_bh(&(BOND_ALB_INFO(bond).hashtbl_lock));
}
+/*********************** tlb specific functions ***************************/
/* Caller must hold tx_hashtbl lock */
static inline void tlb_init_table_entry(struct tlb_client_info *entry, int save_load)
{
@@ -163,7 +164,7 @@ static void tlb_clear_slave(struct bonding *bond, struct slave *slave, int save_
struct tlb_client_info *tx_hash_table;
u32 index;
- _lock_tx_hashtbl(bond);
+ _lock_hashtbl(bond);
/* clear slave from tx_hashtbl */
tx_hash_table = BOND_ALB_INFO(bond).tx_hashtbl;
@@ -180,7 +181,7 @@ static void tlb_clear_slave(struct bonding *bond, struct slave *slave, int save_
tlb_init_slave(slave);
- _unlock_tx_hashtbl(bond);
+ _unlock_hashtbl(bond);
}
/* Must be called before starting the monitor timer */
@@ -191,7 +192,7 @@ static int tlb_initialize(struct bonding *bond)
struct tlb_client_info *new_hashtbl;
int i;
- spin_lock_init(&(bond_info->tx_hashtbl_lock));
+ spin_lock_init(&(bond_info->hashtbl_lock));
new_hashtbl = kzalloc(size, GFP_KERNEL);
if (!new_hashtbl) {
@@ -200,7 +201,7 @@ static int tlb_initialize(struct bonding *bond)
bond->dev->name);
return -1;
}
- _lock_tx_hashtbl(bond);
+ _lock_hashtbl(bond);
bond_info->tx_hashtbl = new_hashtbl;
@@ -208,7 +209,7 @@ static int tlb_initialize(struct bonding *bond)
tlb_init_table_entry(&bond_info->tx_hashtbl[i], 1);
}
- _unlock_tx_hashtbl(bond);
+ _unlock_hashtbl(bond);
return 0;
}
@@ -218,12 +219,12 @@ static void tlb_deinitialize(struct bonding *bond)
{
struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
- _lock_tx_hashtbl(bond);
+ _lock_hashtbl(bond);
kfree(bond_info->tx_hashtbl);
bond_info->tx_hashtbl = NULL;
- _unlock_tx_hashtbl(bond);
+ _unlock_hashtbl(bond);
}
/* Caller must hold bond lock for read */
@@ -264,24 +265,6 @@ static struct slave *tlb_get_least_loaded_slave(struct bonding *bond)
return least_loaded;
}
-/* Caller must hold bond lock for read and hashtbl lock */
-static struct slave *tlb_get_best_slave(struct bonding *bond, u32 hash_index)
-{
- struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
- struct tlb_client_info *tx_hash_table = bond_info->tx_hashtbl;
- struct slave *last_slave = tx_hash_table[hash_index].last_slave;
- struct slave *next_slave = NULL;
-
- if (last_slave && SLAVE_IS_OK(last_slave)) {
- /* Use the last slave listed in the tx hashtbl if:
- the last slave currently is essentially unloaded. */
- if (SLAVE_TLB_INFO(last_slave).load < 10)
- next_slave = last_slave;
- }
-
- return next_slave ? next_slave : tlb_get_least_loaded_slave(bond);
-}
-
/* Caller must hold bond lock for read */
static struct slave *tlb_choose_channel(struct bonding *bond, u32 hash_index, u32 skb_len)
{
@@ -289,13 +272,12 @@ static struct slave *tlb_choose_channel(struct bonding *bond, u32 hash_index, u3
struct tlb_client_info *hash_table;
struct slave *assigned_slave;
- _lock_tx_hashtbl(bond);
+ _lock_hashtbl(bond);
hash_table = bond_info->tx_hashtbl;
assigned_slave = hash_table[hash_index].tx_slave;
if (!assigned_slave) {
- assigned_slave = tlb_get_best_slave(bond, hash_index);
-
+ assigned_slave = alb_get_best_slave(bond, hash_index);
if (assigned_slave) {
struct tlb_slave_info *slave_info =
&(SLAVE_TLB_INFO(assigned_slave));
@@ -319,20 +301,52 @@ static struct slave *tlb_choose_channel(struct bonding *bond, u32 hash_index, u3
hash_table[hash_index].tx_bytes += skb_len;
}
- _unlock_tx_hashtbl(bond);
+ _unlock_hashtbl(bond);
return assigned_slave;
}
/*********************** rlb specific functions ***************************/
-static inline void _lock_rx_hashtbl(struct bonding *bond)
+
+/* Caller must hold bond lock for read and hashtbl lock */
+static struct slave *rlb_update_rx_table(struct bonding *bond, struct slave *next_slave, u32 hash_index)
{
- spin_lock_bh(&(BOND_ALB_INFO(bond).rx_hashtbl_lock));
+ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
+
+ /* check rlb table and correct it if wrong */
+ if (bond_info->rlb_enabled) {
+ struct rlb_client_info *rx_client_info = &(bond_info->rx_hashtbl[hash_index]);
+
+ /* if the new slave computed by tlb checks doesn't match rlb, stop rlb from using it */
+ if (next_slave && (next_slave != rx_client_info->slave))
+ rx_client_info->slave = next_slave;
+ }
+ return next_slave;
}
-static inline void _unlock_rx_hashtbl(struct bonding *bond)
+/* Caller must hold bond lock for read and hashtbl lock */
+static struct slave *alb_get_best_slave(struct bonding *bond, u32 hash_index)
{
- spin_unlock_bh(&(BOND_ALB_INFO(bond).rx_hashtbl_lock));
+ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
+ struct tlb_client_info *tx_hash_table = bond_info->tx_hashtbl;
+ struct slave *last_slave = tx_hash_table[hash_index].last_slave;
+ struct slave *next_slave = NULL;
+
+ /* presume the next slave will be the least loaded one */
+ next_slave = tlb_get_least_loaded_slave(bond);
+
+ if (last_slave && SLAVE_IS_OK(last_slave)) {
+ /* Use the last slave listed in the tx hashtbl if:
+ the last slave currently is essentially unloaded. */
+ if (SLAVE_TLB_INFO(last_slave).load < 10)
+ next_slave = last_slave;
+ }
+
+ /* update the rlb hashtbl if there was a previous entry */
+ if (bond_info->rlb_enabled)
+ rlb_update_rx_table(bond, next_slave, hash_index);
+
+ return next_slave;
}
/* when an ARP REPLY is received from a client update its info
@@ -344,7 +358,7 @@ static void rlb_update_entry_from_arp(struct bonding *bond, struct arp_pkt *arp)
struct rlb_client_info *client_info;
u32 hash_index;
- _lock_rx_hashtbl(bond);
+ _lock_hashtbl(bond);
hash_index = _simple_hash((u8*)&(arp->ip_src), sizeof(arp->ip_src));
client_info = &(bond_info->rx_hashtbl[hash_index]);
@@ -358,7 +372,7 @@ static void rlb_update_entry_from_arp(struct bonding *bond, struct arp_pkt *arp)
bond_info->rx_ntt = 1;
}
- _unlock_rx_hashtbl(bond);
+ _unlock_hashtbl(bond);
}
static int rlb_arp_recv(struct sk_buff *skb, struct net_device *bond_dev, struct packet_type *ptype, struct net_device *orig_dev)
@@ -402,38 +416,6 @@ out:
return res;
}
-/* Caller must hold bond lock for read */
-static struct slave *rlb_next_rx_slave(struct bonding *bond)
-{
- struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
- struct slave *rx_slave, *slave, *start_at;
- int i = 0;
-
- if (bond_info->next_rx_slave) {
- start_at = bond_info->next_rx_slave;
- } else {
- start_at = bond->first_slave;
- }
-
- rx_slave = NULL;
-
- bond_for_each_slave_from(bond, slave, i, start_at) {
- if (SLAVE_IS_OK(slave)) {
- if (!rx_slave) {
- rx_slave = slave;
- } else if (slave->speed > rx_slave->speed) {
- rx_slave = slave;
- }
- }
- }
-
- if (rx_slave) {
- bond_info->next_rx_slave = rx_slave->next;
- }
-
- return rx_slave;
-}
-
/* teach the switch the mac of a disabled slave
* on the primary for fault tolerance
*
@@ -468,14 +450,14 @@ static void rlb_clear_slave(struct bonding *bond, struct slave *slave)
u32 index, next_index;
/* clear slave from rx_hashtbl */
- _lock_rx_hashtbl(bond);
+ _lock_hashtbl(bond);
rx_hash_table = bond_info->rx_hashtbl;
index = bond_info->rx_hashtbl_head;
for (; index != RLB_NULL_INDEX; index = next_index) {
next_index = rx_hash_table[index].next;
if (rx_hash_table[index].slave == slave) {
- struct slave *assigned_slave = rlb_next_rx_slave(bond);
+ struct slave *assigned_slave = alb_get_best_slave(bond, index);
if (assigned_slave) {
rx_hash_table[index].slave = assigned_slave;
@@ -499,7 +481,7 @@ static void rlb_clear_slave(struct bonding *bond, struct slave *slave)
}
}
- _unlock_rx_hashtbl(bond);
+ _unlock_hashtbl(bond);
write_lock_bh(&bond->curr_slave_lock);
@@ -558,7 +540,7 @@ static void rlb_update_rx_clients(struct bonding *bond)
struct rlb_client_info *client_info;
u32 hash_index;
- _lock_rx_hashtbl(bond);
+ _lock_hashtbl(bond);
hash_index = bond_info->rx_hashtbl_head;
for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) {
@@ -576,7 +558,7 @@ static void rlb_update_rx_clients(struct bonding *bond)
*/
bond_info->rlb_update_delay_counter = RLB_UPDATE_DELAY;
- _unlock_rx_hashtbl(bond);
+ _unlock_hashtbl(bond);
}
/* The slave was assigned a new mac address - update the clients */
@@ -587,7 +569,7 @@ static void rlb_req_update_slave_clients(struct bonding *bond, struct slave *sla
int ntt = 0;
u32 hash_index;
- _lock_rx_hashtbl(bond);
+ _lock_hashtbl(bond);
hash_index = bond_info->rx_hashtbl_head;
for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) {
@@ -607,7 +589,7 @@ static void rlb_req_update_slave_clients(struct bonding *bond, struct slave *sla
bond_info->rlb_update_retry_counter = RLB_UPDATE_RETRY;
}
- _unlock_rx_hashtbl(bond);
+ _unlock_hashtbl(bond);
}
/* mark all clients using src_ip to be updated */
@@ -617,7 +599,7 @@ static void rlb_req_update_subnet_clients(struct bonding *bond, __be32 src_ip)
struct rlb_client_info *client_info;
u32 hash_index;
- _lock_rx_hashtbl(bond);
+ _lock_hashtbl(bond);
hash_index = bond_info->rx_hashtbl_head;
for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) {
@@ -643,7 +625,7 @@ static void rlb_req_update_subnet_clients(struct bonding *bond, __be32 src_ip)
}
}
- _unlock_rx_hashtbl(bond);
+ _unlock_hashtbl(bond);
}
/* Caller must hold both bond and ptr locks for read */
@@ -655,7 +637,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
struct rlb_client_info *client_info;
u32 hash_index = 0;
- _lock_rx_hashtbl(bond);
+ _lock_hashtbl(bond);
hash_index = _simple_hash((u8 *)&arp->ip_dst, sizeof(arp->ip_src));
client_info = &(bond_info->rx_hashtbl[hash_index]);
@@ -671,7 +653,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
assigned_slave = client_info->slave;
if (assigned_slave) {
- _unlock_rx_hashtbl(bond);
+ _unlock_hashtbl(bond);
return assigned_slave;
}
} else {
@@ -687,7 +669,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
}
}
/* assign a new slave */
- assigned_slave = rlb_next_rx_slave(bond);
+ assigned_slave = alb_get_best_slave(bond, hash_index);
if (assigned_slave) {
client_info->ip_src = arp->ip_src;
@@ -723,7 +705,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
}
}
- _unlock_rx_hashtbl(bond);
+ _unlock_hashtbl(bond);
return assigned_slave;
}
@@ -771,36 +753,6 @@ static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond)
return tx_slave;
}
-/* Caller must hold bond lock for read */
-static void rlb_rebalance(struct bonding *bond)
-{
- struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
- struct slave *assigned_slave;
- struct rlb_client_info *client_info;
- int ntt;
- u32 hash_index;
-
- _lock_rx_hashtbl(bond);
-
- ntt = 0;
- hash_index = bond_info->rx_hashtbl_head;
- for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) {
- client_info = &(bond_info->rx_hashtbl[hash_index]);
- assigned_slave = rlb_next_rx_slave(bond);
- if (assigned_slave && (client_info->slave != assigned_slave)) {
- client_info->slave = assigned_slave;
- client_info->ntt = 1;
- ntt = 1;
- }
- }
-
- /* update the team's flag only after the whole iteration */
- if (ntt) {
- bond_info->rx_ntt = 1;
- }
- _unlock_rx_hashtbl(bond);
-}
-
/* Caller must hold rx_hashtbl lock */
static void rlb_init_table_entry(struct rlb_client_info *entry)
{
@@ -817,8 +769,6 @@ static int rlb_initialize(struct bonding *bond)
int size = RLB_HASH_TABLE_SIZE * sizeof(struct rlb_client_info);
int i;
- spin_lock_init(&(bond_info->rx_hashtbl_lock));
-
new_hashtbl = kmalloc(size, GFP_KERNEL);
if (!new_hashtbl) {
printk(KERN_ERR DRV_NAME
@@ -826,7 +776,7 @@ static int rlb_initialize(struct bonding *bond)
bond->dev->name);
return -1;
}
- _lock_rx_hashtbl(bond);
+ _lock_hashtbl(bond);
bond_info->rx_hashtbl = new_hashtbl;
@@ -836,7 +786,7 @@ static int rlb_initialize(struct bonding *bond)
rlb_init_table_entry(bond_info->rx_hashtbl + i);
}
- _unlock_rx_hashtbl(bond);
+ _unlock_hashtbl(bond);
/*initialize packet type*/
pk_type->type = cpu_to_be16(ETH_P_ARP);
@@ -855,13 +805,13 @@ static void rlb_deinitialize(struct bonding *bond)
dev_remove_pack(&(bond_info->rlb_pkt_type));
- _lock_rx_hashtbl(bond);
+ _lock_hashtbl(bond);
kfree(bond_info->rx_hashtbl);
bond_info->rx_hashtbl = NULL;
bond_info->rx_hashtbl_head = RLB_NULL_INDEX;
- _unlock_rx_hashtbl(bond);
+ _unlock_hashtbl(bond);
}
static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id)
@@ -869,7 +819,7 @@ static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id)
struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
u32 curr_index;
- _lock_rx_hashtbl(bond);
+ _lock_hashtbl(bond);
curr_index = bond_info->rx_hashtbl_head;
while (curr_index != RLB_NULL_INDEX) {
@@ -894,7 +844,7 @@ static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id)
curr_index = next_index;
}
- _unlock_rx_hashtbl(bond);
+ _unlock_hashtbl(bond);
}
/*********************** tlb/rlb shared functions *********************/
@@ -1521,11 +1471,6 @@ void bond_alb_monitor(struct work_struct *work)
read_lock(&bond->lock);
}
- if (bond_info->rlb_rebalance) {
- bond_info->rlb_rebalance = 0;
- rlb_rebalance(bond);
- }
-
/* check if clients need updating */
if (bond_info->rx_ntt) {
if (bond_info->rlb_update_delay_counter) {
diff --git a/drivers/net/bonding/bond_alb.h b/drivers/net/bonding/bond_alb.h
index b65fd29..09d755a 100644
--- a/drivers/net/bonding/bond_alb.h
+++ b/drivers/net/bonding/bond_alb.h
@@ -90,7 +90,7 @@ struct tlb_slave_info {
struct alb_bond_info {
struct timer_list alb_timer;
struct tlb_client_info *tx_hashtbl; /* Dynamically allocated */
- spinlock_t tx_hashtbl_lock;
+ spinlock_t hashtbl_lock; /* lock for both tables */
u32 unbalanced_load;
int tx_rebalance_counter;
int lp_counter;
@@ -98,7 +98,6 @@ struct alb_bond_info {
int rlb_enabled;
struct packet_type rlb_pkt_type;
struct rlb_client_info *rx_hashtbl; /* Receive hash table */
- spinlock_t rx_hashtbl_lock;
u32 rx_hashtbl_head;
u8 rx_ntt; /* flag - need to transmit
* to all rx clients
--
1.5.5.6
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH 2/4 v3] bonding: make sure tx and rx hash tables stay in sync when using alb mode
2009-09-18 15:36 ` Andy Gospodarek
@ 2009-09-18 15:56 ` Andy Gospodarek
2009-09-28 22:00 ` Andy Gospodarek
0 siblings, 1 reply; 8+ messages in thread
From: Andy Gospodarek @ 2009-09-18 15:56 UTC (permalink / raw)
To: Jay Vosburgh; +Cc: netdev, bonding-devel
On Fri, Sep 18, 2009 at 11:36:22AM -0400, Andy Gospodarek wrote:
> On Wed, Sep 16, 2009 at 04:36:09PM -0700, Jay Vosburgh wrote:
> > Andy Gospodarek <andy@greyhouse.net> wrote:
> >
> > >
> > >Subject: [PATCH] bonding: make sure tx and rx hash tables stay in sync when using alb mode
> >
> > When testing this, I'm getting a lockdep warning. It appears to
> > be unhappy that tlb_choose_channel acquires the tx / rx hash table locks
> > in the order tx then rx, but rlb_choose_channel -> alb_get_best_slave
> > acquires the locks in the other order. I applied all four patches, but
> > it looks like the change that trips lockdep is in this patch (#2).
> >
> > I haven't gotten an actual deadlock from this, although it seems
> > plausible if there are two cpus in bond_alb_xmit at the same time, and
> > one of them is sending an ARP.
> >
> > One fairly straightforward fix would be to combine the rx and tx
> > hash table locks into a single lock. I suspect that wouldn't have any
> > real performance penalty, since the rx hash table lock is generally not
> > acquired very often (unlike the tx lock, which is taken for every packet
> > that goes out).
> >
> > Also, FYI, two of the four patches had trailing whitespace. I
> > believe it was #2 and #4.
> >
> > Thoughts?
>
> Jay,
>
> This patch should address both the the deadlock and whitespace conerns.
> I ran a kernel with LOCKDEP enabled and saw no warnings while passing
> traffic on the bond while pulling cables and while removing the module.
> Here it is....
>
Adding the version and signed-off-by lines might be nice, eh?
[PATCH v3] bonding: make sure tx and rx hash tables stay in sync when using alb mode
I noticed that it was easy for alb (mode 6) bonding to get into a state
where the tx hash-table and rx hash-table are out of sync (there is
really nothing to keep them synchronized), and we will transmit traffic
destined for a host on one slave and send ARP frames to the same slave
from another interface using a different source MAC.
There is no compelling reason to do this, so this patch makes sure the
rx hash-table changes whenever the tx hash-table is updated based on
device load. This patch also drops the code that does rlb re-balancing
since the balancing will not be controlled by the tx hash-table based on
transmit load. In order to address an issue found with the initial
patch, I have also combined the rx and tx hash table lock into a single
lock. This will facilitate moving these into a single table at some
point.
Signed-off-by: Andy Gospodarek <andy@greyhouse.net>
---
drivers/net/bonding/bond_alb.c | 203 +++++++++++++++-------------------------
drivers/net/bonding/bond_alb.h | 3 +-
2 files changed, 75 insertions(+), 131 deletions(-)
diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index bcf25c6..04b7055 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -111,6 +111,7 @@ static inline struct arp_pkt *arp_pkt(const struct sk_buff *skb)
/* Forward declaration */
static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[]);
+static struct slave *alb_get_best_slave(struct bonding *bond, u32 hash_index);
static inline u8 _simple_hash(const u8 *hash_start, int hash_size)
{
@@ -124,18 +125,18 @@ static inline u8 _simple_hash(const u8 *hash_start, int hash_size)
return hash;
}
-/*********************** tlb specific functions ***************************/
-
-static inline void _lock_tx_hashtbl(struct bonding *bond)
+/********************* hash table lock functions *************************/
+static inline void _lock_hashtbl(struct bonding *bond)
{
- spin_lock_bh(&(BOND_ALB_INFO(bond).tx_hashtbl_lock));
+ spin_lock_bh(&(BOND_ALB_INFO(bond).hashtbl_lock));
}
-static inline void _unlock_tx_hashtbl(struct bonding *bond)
+static inline void _unlock_hashtbl(struct bonding *bond)
{
- spin_unlock_bh(&(BOND_ALB_INFO(bond).tx_hashtbl_lock));
+ spin_unlock_bh(&(BOND_ALB_INFO(bond).hashtbl_lock));
}
+/*********************** tlb specific functions ***************************/
/* Caller must hold tx_hashtbl lock */
static inline void tlb_init_table_entry(struct tlb_client_info *entry, int save_load)
{
@@ -163,7 +164,7 @@ static void tlb_clear_slave(struct bonding *bond, struct slave *slave, int save_
struct tlb_client_info *tx_hash_table;
u32 index;
- _lock_tx_hashtbl(bond);
+ _lock_hashtbl(bond);
/* clear slave from tx_hashtbl */
tx_hash_table = BOND_ALB_INFO(bond).tx_hashtbl;
@@ -180,7 +181,7 @@ static void tlb_clear_slave(struct bonding *bond, struct slave *slave, int save_
tlb_init_slave(slave);
- _unlock_tx_hashtbl(bond);
+ _unlock_hashtbl(bond);
}
/* Must be called before starting the monitor timer */
@@ -191,7 +192,7 @@ static int tlb_initialize(struct bonding *bond)
struct tlb_client_info *new_hashtbl;
int i;
- spin_lock_init(&(bond_info->tx_hashtbl_lock));
+ spin_lock_init(&(bond_info->hashtbl_lock));
new_hashtbl = kzalloc(size, GFP_KERNEL);
if (!new_hashtbl) {
@@ -200,7 +201,7 @@ static int tlb_initialize(struct bonding *bond)
bond->dev->name);
return -1;
}
- _lock_tx_hashtbl(bond);
+ _lock_hashtbl(bond);
bond_info->tx_hashtbl = new_hashtbl;
@@ -208,7 +209,7 @@ static int tlb_initialize(struct bonding *bond)
tlb_init_table_entry(&bond_info->tx_hashtbl[i], 1);
}
- _unlock_tx_hashtbl(bond);
+ _unlock_hashtbl(bond);
return 0;
}
@@ -218,12 +219,12 @@ static void tlb_deinitialize(struct bonding *bond)
{
struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
- _lock_tx_hashtbl(bond);
+ _lock_hashtbl(bond);
kfree(bond_info->tx_hashtbl);
bond_info->tx_hashtbl = NULL;
- _unlock_tx_hashtbl(bond);
+ _unlock_hashtbl(bond);
}
/* Caller must hold bond lock for read */
@@ -264,24 +265,6 @@ static struct slave *tlb_get_least_loaded_slave(struct bonding *bond)
return least_loaded;
}
-/* Caller must hold bond lock for read and hashtbl lock */
-static struct slave *tlb_get_best_slave(struct bonding *bond, u32 hash_index)
-{
- struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
- struct tlb_client_info *tx_hash_table = bond_info->tx_hashtbl;
- struct slave *last_slave = tx_hash_table[hash_index].last_slave;
- struct slave *next_slave = NULL;
-
- if (last_slave && SLAVE_IS_OK(last_slave)) {
- /* Use the last slave listed in the tx hashtbl if:
- the last slave currently is essentially unloaded. */
- if (SLAVE_TLB_INFO(last_slave).load < 10)
- next_slave = last_slave;
- }
-
- return next_slave ? next_slave : tlb_get_least_loaded_slave(bond);
-}
-
/* Caller must hold bond lock for read */
static struct slave *tlb_choose_channel(struct bonding *bond, u32 hash_index, u32 skb_len)
{
@@ -289,13 +272,12 @@ static struct slave *tlb_choose_channel(struct bonding *bond, u32 hash_index, u3
struct tlb_client_info *hash_table;
struct slave *assigned_slave;
- _lock_tx_hashtbl(bond);
+ _lock_hashtbl(bond);
hash_table = bond_info->tx_hashtbl;
assigned_slave = hash_table[hash_index].tx_slave;
if (!assigned_slave) {
- assigned_slave = tlb_get_best_slave(bond, hash_index);
-
+ assigned_slave = alb_get_best_slave(bond, hash_index);
if (assigned_slave) {
struct tlb_slave_info *slave_info =
&(SLAVE_TLB_INFO(assigned_slave));
@@ -319,20 +301,52 @@ static struct slave *tlb_choose_channel(struct bonding *bond, u32 hash_index, u3
hash_table[hash_index].tx_bytes += skb_len;
}
- _unlock_tx_hashtbl(bond);
+ _unlock_hashtbl(bond);
return assigned_slave;
}
/*********************** rlb specific functions ***************************/
-static inline void _lock_rx_hashtbl(struct bonding *bond)
+
+/* Caller must hold bond lock for read and hashtbl lock */
+static struct slave *rlb_update_rx_table(struct bonding *bond, struct slave *next_slave, u32 hash_index)
{
- spin_lock_bh(&(BOND_ALB_INFO(bond).rx_hashtbl_lock));
+ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
+
+ /* check rlb table and correct it if wrong */
+ if (bond_info->rlb_enabled) {
+ struct rlb_client_info *rx_client_info = &(bond_info->rx_hashtbl[hash_index]);
+
+ /* if the new slave computed by tlb checks doesn't match rlb, stop rlb from using it */
+ if (next_slave && (next_slave != rx_client_info->slave))
+ rx_client_info->slave = next_slave;
+ }
+ return next_slave;
}
-static inline void _unlock_rx_hashtbl(struct bonding *bond)
+/* Caller must hold bond lock for read and hashtbl lock */
+static struct slave *alb_get_best_slave(struct bonding *bond, u32 hash_index)
{
- spin_unlock_bh(&(BOND_ALB_INFO(bond).rx_hashtbl_lock));
+ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
+ struct tlb_client_info *tx_hash_table = bond_info->tx_hashtbl;
+ struct slave *last_slave = tx_hash_table[hash_index].last_slave;
+ struct slave *next_slave = NULL;
+
+ /* presume the next slave will be the least loaded one */
+ next_slave = tlb_get_least_loaded_slave(bond);
+
+ if (last_slave && SLAVE_IS_OK(last_slave)) {
+ /* Use the last slave listed in the tx hashtbl if:
+ the last slave currently is essentially unloaded. */
+ if (SLAVE_TLB_INFO(last_slave).load < 10)
+ next_slave = last_slave;
+ }
+
+ /* update the rlb hashtbl if there was a previous entry */
+ if (bond_info->rlb_enabled)
+ rlb_update_rx_table(bond, next_slave, hash_index);
+
+ return next_slave;
}
/* when an ARP REPLY is received from a client update its info
@@ -344,7 +358,7 @@ static void rlb_update_entry_from_arp(struct bonding *bond, struct arp_pkt *arp)
struct rlb_client_info *client_info;
u32 hash_index;
- _lock_rx_hashtbl(bond);
+ _lock_hashtbl(bond);
hash_index = _simple_hash((u8*)&(arp->ip_src), sizeof(arp->ip_src));
client_info = &(bond_info->rx_hashtbl[hash_index]);
@@ -358,7 +372,7 @@ static void rlb_update_entry_from_arp(struct bonding *bond, struct arp_pkt *arp)
bond_info->rx_ntt = 1;
}
- _unlock_rx_hashtbl(bond);
+ _unlock_hashtbl(bond);
}
static int rlb_arp_recv(struct sk_buff *skb, struct net_device *bond_dev, struct packet_type *ptype, struct net_device *orig_dev)
@@ -402,38 +416,6 @@ out:
return res;
}
-/* Caller must hold bond lock for read */
-static struct slave *rlb_next_rx_slave(struct bonding *bond)
-{
- struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
- struct slave *rx_slave, *slave, *start_at;
- int i = 0;
-
- if (bond_info->next_rx_slave) {
- start_at = bond_info->next_rx_slave;
- } else {
- start_at = bond->first_slave;
- }
-
- rx_slave = NULL;
-
- bond_for_each_slave_from(bond, slave, i, start_at) {
- if (SLAVE_IS_OK(slave)) {
- if (!rx_slave) {
- rx_slave = slave;
- } else if (slave->speed > rx_slave->speed) {
- rx_slave = slave;
- }
- }
- }
-
- if (rx_slave) {
- bond_info->next_rx_slave = rx_slave->next;
- }
-
- return rx_slave;
-}
-
/* teach the switch the mac of a disabled slave
* on the primary for fault tolerance
*
@@ -468,14 +450,14 @@ static void rlb_clear_slave(struct bonding *bond, struct slave *slave)
u32 index, next_index;
/* clear slave from rx_hashtbl */
- _lock_rx_hashtbl(bond);
+ _lock_hashtbl(bond);
rx_hash_table = bond_info->rx_hashtbl;
index = bond_info->rx_hashtbl_head;
for (; index != RLB_NULL_INDEX; index = next_index) {
next_index = rx_hash_table[index].next;
if (rx_hash_table[index].slave == slave) {
- struct slave *assigned_slave = rlb_next_rx_slave(bond);
+ struct slave *assigned_slave = alb_get_best_slave(bond, index);
if (assigned_slave) {
rx_hash_table[index].slave = assigned_slave;
@@ -499,7 +481,7 @@ static void rlb_clear_slave(struct bonding *bond, struct slave *slave)
}
}
- _unlock_rx_hashtbl(bond);
+ _unlock_hashtbl(bond);
write_lock_bh(&bond->curr_slave_lock);
@@ -558,7 +540,7 @@ static void rlb_update_rx_clients(struct bonding *bond)
struct rlb_client_info *client_info;
u32 hash_index;
- _lock_rx_hashtbl(bond);
+ _lock_hashtbl(bond);
hash_index = bond_info->rx_hashtbl_head;
for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) {
@@ -576,7 +558,7 @@ static void rlb_update_rx_clients(struct bonding *bond)
*/
bond_info->rlb_update_delay_counter = RLB_UPDATE_DELAY;
- _unlock_rx_hashtbl(bond);
+ _unlock_hashtbl(bond);
}
/* The slave was assigned a new mac address - update the clients */
@@ -587,7 +569,7 @@ static void rlb_req_update_slave_clients(struct bonding *bond, struct slave *sla
int ntt = 0;
u32 hash_index;
- _lock_rx_hashtbl(bond);
+ _lock_hashtbl(bond);
hash_index = bond_info->rx_hashtbl_head;
for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) {
@@ -607,7 +589,7 @@ static void rlb_req_update_slave_clients(struct bonding *bond, struct slave *sla
bond_info->rlb_update_retry_counter = RLB_UPDATE_RETRY;
}
- _unlock_rx_hashtbl(bond);
+ _unlock_hashtbl(bond);
}
/* mark all clients using src_ip to be updated */
@@ -617,7 +599,7 @@ static void rlb_req_update_subnet_clients(struct bonding *bond, __be32 src_ip)
struct rlb_client_info *client_info;
u32 hash_index;
- _lock_rx_hashtbl(bond);
+ _lock_hashtbl(bond);
hash_index = bond_info->rx_hashtbl_head;
for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) {
@@ -643,7 +625,7 @@ static void rlb_req_update_subnet_clients(struct bonding *bond, __be32 src_ip)
}
}
- _unlock_rx_hashtbl(bond);
+ _unlock_hashtbl(bond);
}
/* Caller must hold both bond and ptr locks for read */
@@ -655,7 +637,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
struct rlb_client_info *client_info;
u32 hash_index = 0;
- _lock_rx_hashtbl(bond);
+ _lock_hashtbl(bond);
hash_index = _simple_hash((u8 *)&arp->ip_dst, sizeof(arp->ip_src));
client_info = &(bond_info->rx_hashtbl[hash_index]);
@@ -671,7 +653,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
assigned_slave = client_info->slave;
if (assigned_slave) {
- _unlock_rx_hashtbl(bond);
+ _unlock_hashtbl(bond);
return assigned_slave;
}
} else {
@@ -687,7 +669,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
}
}
/* assign a new slave */
- assigned_slave = rlb_next_rx_slave(bond);
+ assigned_slave = alb_get_best_slave(bond, hash_index);
if (assigned_slave) {
client_info->ip_src = arp->ip_src;
@@ -723,7 +705,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
}
}
- _unlock_rx_hashtbl(bond);
+ _unlock_hashtbl(bond);
return assigned_slave;
}
@@ -771,36 +753,6 @@ static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond)
return tx_slave;
}
-/* Caller must hold bond lock for read */
-static void rlb_rebalance(struct bonding *bond)
-{
- struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
- struct slave *assigned_slave;
- struct rlb_client_info *client_info;
- int ntt;
- u32 hash_index;
-
- _lock_rx_hashtbl(bond);
-
- ntt = 0;
- hash_index = bond_info->rx_hashtbl_head;
- for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) {
- client_info = &(bond_info->rx_hashtbl[hash_index]);
- assigned_slave = rlb_next_rx_slave(bond);
- if (assigned_slave && (client_info->slave != assigned_slave)) {
- client_info->slave = assigned_slave;
- client_info->ntt = 1;
- ntt = 1;
- }
- }
-
- /* update the team's flag only after the whole iteration */
- if (ntt) {
- bond_info->rx_ntt = 1;
- }
- _unlock_rx_hashtbl(bond);
-}
-
/* Caller must hold rx_hashtbl lock */
static void rlb_init_table_entry(struct rlb_client_info *entry)
{
@@ -817,8 +769,6 @@ static int rlb_initialize(struct bonding *bond)
int size = RLB_HASH_TABLE_SIZE * sizeof(struct rlb_client_info);
int i;
- spin_lock_init(&(bond_info->rx_hashtbl_lock));
-
new_hashtbl = kmalloc(size, GFP_KERNEL);
if (!new_hashtbl) {
printk(KERN_ERR DRV_NAME
@@ -826,7 +776,7 @@ static int rlb_initialize(struct bonding *bond)
bond->dev->name);
return -1;
}
- _lock_rx_hashtbl(bond);
+ _lock_hashtbl(bond);
bond_info->rx_hashtbl = new_hashtbl;
@@ -836,7 +786,7 @@ static int rlb_initialize(struct bonding *bond)
rlb_init_table_entry(bond_info->rx_hashtbl + i);
}
- _unlock_rx_hashtbl(bond);
+ _unlock_hashtbl(bond);
/*initialize packet type*/
pk_type->type = cpu_to_be16(ETH_P_ARP);
@@ -855,13 +805,13 @@ static void rlb_deinitialize(struct bonding *bond)
dev_remove_pack(&(bond_info->rlb_pkt_type));
- _lock_rx_hashtbl(bond);
+ _lock_hashtbl(bond);
kfree(bond_info->rx_hashtbl);
bond_info->rx_hashtbl = NULL;
bond_info->rx_hashtbl_head = RLB_NULL_INDEX;
- _unlock_rx_hashtbl(bond);
+ _unlock_hashtbl(bond);
}
static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id)
@@ -869,7 +819,7 @@ static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id)
struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
u32 curr_index;
- _lock_rx_hashtbl(bond);
+ _lock_hashtbl(bond);
curr_index = bond_info->rx_hashtbl_head;
while (curr_index != RLB_NULL_INDEX) {
@@ -894,7 +844,7 @@ static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id)
curr_index = next_index;
}
- _unlock_rx_hashtbl(bond);
+ _unlock_hashtbl(bond);
}
/*********************** tlb/rlb shared functions *********************/
@@ -1521,11 +1471,6 @@ void bond_alb_monitor(struct work_struct *work)
read_lock(&bond->lock);
}
- if (bond_info->rlb_rebalance) {
- bond_info->rlb_rebalance = 0;
- rlb_rebalance(bond);
- }
-
/* check if clients need updating */
if (bond_info->rx_ntt) {
if (bond_info->rlb_update_delay_counter) {
diff --git a/drivers/net/bonding/bond_alb.h b/drivers/net/bonding/bond_alb.h
index b65fd29..09d755a 100644
--- a/drivers/net/bonding/bond_alb.h
+++ b/drivers/net/bonding/bond_alb.h
@@ -90,7 +90,7 @@ struct tlb_slave_info {
struct alb_bond_info {
struct timer_list alb_timer;
struct tlb_client_info *tx_hashtbl; /* Dynamically allocated */
- spinlock_t tx_hashtbl_lock;
+ spinlock_t hashtbl_lock; /* lock for both tables */
u32 unbalanced_load;
int tx_rebalance_counter;
int lp_counter;
@@ -98,7 +98,6 @@ struct alb_bond_info {
int rlb_enabled;
struct packet_type rlb_pkt_type;
struct rlb_client_info *rx_hashtbl; /* Receive hash table */
- spinlock_t rx_hashtbl_lock;
u32 rx_hashtbl_head;
u8 rx_ntt; /* flag - need to transmit
* to all rx clients
--
1.5.5.6
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH 2/4 v3] bonding: make sure tx and rx hash tables stay in sync when using alb mode
2009-09-18 15:56 ` [PATCH 2/4 v3] " Andy Gospodarek
@ 2009-09-28 22:00 ` Andy Gospodarek
2009-09-28 22:09 ` Jay Vosburgh
0 siblings, 1 reply; 8+ messages in thread
From: Andy Gospodarek @ 2009-09-28 22:00 UTC (permalink / raw)
To: Jay Vosburgh, netdev, bonding-devel
On Fri, Sep 18, 2009 at 11:56:45AM -0400, Andy Gospodarek wrote:
> On Fri, Sep 18, 2009 at 11:36:22AM -0400, Andy Gospodarek wrote:
> > On Wed, Sep 16, 2009 at 04:36:09PM -0700, Jay Vosburgh wrote:
> > > Andy Gospodarek <andy@greyhouse.net> wrote:
> > >
> > > >
> > > >Subject: [PATCH] bonding: make sure tx and rx hash tables stay in sync when using alb mode
> > >
> > > When testing this, I'm getting a lockdep warning. It appears to
> > > be unhappy that tlb_choose_channel acquires the tx / rx hash table locks
> > > in the order tx then rx, but rlb_choose_channel -> alb_get_best_slave
> > > acquires the locks in the other order. I applied all four patches, but
> > > it looks like the change that trips lockdep is in this patch (#2).
> > >
> > > I haven't gotten an actual deadlock from this, although it seems
> > > plausible if there are two cpus in bond_alb_xmit at the same time, and
> > > one of them is sending an ARP.
> > >
> > > One fairly straightforward fix would be to combine the rx and tx
> > > hash table locks into a single lock. I suspect that wouldn't have any
> > > real performance penalty, since the rx hash table lock is generally not
> > > acquired very often (unlike the tx lock, which is taken for every packet
> > > that goes out).
> > >
> > > Also, FYI, two of the four patches had trailing whitespace. I
> > > believe it was #2 and #4.
> > >
> > > Thoughts?
> >
> > Jay,
> >
> > This patch should address both the the deadlock and whitespace conerns.
> > I ran a kernel with LOCKDEP enabled and saw no warnings while passing
> > traffic on the bond while pulling cables and while removing the module.
> > Here it is....
> >
>
> Adding the version and signed-off-by lines might be nice, eh?
>
> [PATCH v3] bonding: make sure tx and rx hash tables stay in sync when using alb mode
>
> I noticed that it was easy for alb (mode 6) bonding to get into a state
> where the tx hash-table and rx hash-table are out of sync (there is
> really nothing to keep them synchronized), and we will transmit traffic
> destined for a host on one slave and send ARP frames to the same slave
> from another interface using a different source MAC.
>
> There is no compelling reason to do this, so this patch makes sure the
> rx hash-table changes whenever the tx hash-table is updated based on
> device load. This patch also drops the code that does rlb re-balancing
> since the balancing will not be controlled by the tx hash-table based on
> transmit load. In order to address an issue found with the initial
> patch, I have also combined the rx and tx hash table lock into a single
> lock. This will facilitate moving these into a single table at some
> point.
>
> Signed-off-by: Andy Gospodarek <andy@greyhouse.net>
>
> ---
> drivers/net/bonding/bond_alb.c | 203 +++++++++++++++-------------------------
> drivers/net/bonding/bond_alb.h | 3 +-
> 2 files changed, 75 insertions(+), 131 deletions(-)
>
> diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
> index bcf25c6..04b7055 100644
> --- a/drivers/net/bonding/bond_alb.c
> +++ b/drivers/net/bonding/bond_alb.c
> @@ -111,6 +111,7 @@ static inline struct arp_pkt *arp_pkt(const struct sk_buff *skb)
>
> /* Forward declaration */
> static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[]);
> +static struct slave *alb_get_best_slave(struct bonding *bond, u32 hash_index);
>
> static inline u8 _simple_hash(const u8 *hash_start, int hash_size)
> {
> @@ -124,18 +125,18 @@ static inline u8 _simple_hash(const u8 *hash_start, int hash_size)
> return hash;
> }
>
> -/*********************** tlb specific functions ***************************/
> -
> -static inline void _lock_tx_hashtbl(struct bonding *bond)
> +/********************* hash table lock functions *************************/
> +static inline void _lock_hashtbl(struct bonding *bond)
> {
> - spin_lock_bh(&(BOND_ALB_INFO(bond).tx_hashtbl_lock));
> + spin_lock_bh(&(BOND_ALB_INFO(bond).hashtbl_lock));
> }
>
> -static inline void _unlock_tx_hashtbl(struct bonding *bond)
> +static inline void _unlock_hashtbl(struct bonding *bond)
> {
> - spin_unlock_bh(&(BOND_ALB_INFO(bond).tx_hashtbl_lock));
> + spin_unlock_bh(&(BOND_ALB_INFO(bond).hashtbl_lock));
> }
>
> +/*********************** tlb specific functions ***************************/
> /* Caller must hold tx_hashtbl lock */
> static inline void tlb_init_table_entry(struct tlb_client_info *entry, int save_load)
> {
> @@ -163,7 +164,7 @@ static void tlb_clear_slave(struct bonding *bond, struct slave *slave, int save_
> struct tlb_client_info *tx_hash_table;
> u32 index;
>
> - _lock_tx_hashtbl(bond);
> + _lock_hashtbl(bond);
>
> /* clear slave from tx_hashtbl */
> tx_hash_table = BOND_ALB_INFO(bond).tx_hashtbl;
> @@ -180,7 +181,7 @@ static void tlb_clear_slave(struct bonding *bond, struct slave *slave, int save_
>
> tlb_init_slave(slave);
>
> - _unlock_tx_hashtbl(bond);
> + _unlock_hashtbl(bond);
> }
>
> /* Must be called before starting the monitor timer */
> @@ -191,7 +192,7 @@ static int tlb_initialize(struct bonding *bond)
> struct tlb_client_info *new_hashtbl;
> int i;
>
> - spin_lock_init(&(bond_info->tx_hashtbl_lock));
> + spin_lock_init(&(bond_info->hashtbl_lock));
>
> new_hashtbl = kzalloc(size, GFP_KERNEL);
> if (!new_hashtbl) {
> @@ -200,7 +201,7 @@ static int tlb_initialize(struct bonding *bond)
> bond->dev->name);
> return -1;
> }
> - _lock_tx_hashtbl(bond);
> + _lock_hashtbl(bond);
>
> bond_info->tx_hashtbl = new_hashtbl;
>
> @@ -208,7 +209,7 @@ static int tlb_initialize(struct bonding *bond)
> tlb_init_table_entry(&bond_info->tx_hashtbl[i], 1);
> }
>
> - _unlock_tx_hashtbl(bond);
> + _unlock_hashtbl(bond);
>
> return 0;
> }
> @@ -218,12 +219,12 @@ static void tlb_deinitialize(struct bonding *bond)
> {
> struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
>
> - _lock_tx_hashtbl(bond);
> + _lock_hashtbl(bond);
>
> kfree(bond_info->tx_hashtbl);
> bond_info->tx_hashtbl = NULL;
>
> - _unlock_tx_hashtbl(bond);
> + _unlock_hashtbl(bond);
> }
>
> /* Caller must hold bond lock for read */
> @@ -264,24 +265,6 @@ static struct slave *tlb_get_least_loaded_slave(struct bonding *bond)
> return least_loaded;
> }
>
> -/* Caller must hold bond lock for read and hashtbl lock */
> -static struct slave *tlb_get_best_slave(struct bonding *bond, u32 hash_index)
> -{
> - struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
> - struct tlb_client_info *tx_hash_table = bond_info->tx_hashtbl;
> - struct slave *last_slave = tx_hash_table[hash_index].last_slave;
> - struct slave *next_slave = NULL;
> -
> - if (last_slave && SLAVE_IS_OK(last_slave)) {
> - /* Use the last slave listed in the tx hashtbl if:
> - the last slave currently is essentially unloaded. */
> - if (SLAVE_TLB_INFO(last_slave).load < 10)
> - next_slave = last_slave;
> - }
> -
> - return next_slave ? next_slave : tlb_get_least_loaded_slave(bond);
> -}
> -
> /* Caller must hold bond lock for read */
> static struct slave *tlb_choose_channel(struct bonding *bond, u32 hash_index, u32 skb_len)
> {
> @@ -289,13 +272,12 @@ static struct slave *tlb_choose_channel(struct bonding *bond, u32 hash_index, u3
> struct tlb_client_info *hash_table;
> struct slave *assigned_slave;
>
> - _lock_tx_hashtbl(bond);
> + _lock_hashtbl(bond);
>
> hash_table = bond_info->tx_hashtbl;
> assigned_slave = hash_table[hash_index].tx_slave;
> if (!assigned_slave) {
> - assigned_slave = tlb_get_best_slave(bond, hash_index);
> -
> + assigned_slave = alb_get_best_slave(bond, hash_index);
> if (assigned_slave) {
> struct tlb_slave_info *slave_info =
> &(SLAVE_TLB_INFO(assigned_slave));
> @@ -319,20 +301,52 @@ static struct slave *tlb_choose_channel(struct bonding *bond, u32 hash_index, u3
> hash_table[hash_index].tx_bytes += skb_len;
> }
>
> - _unlock_tx_hashtbl(bond);
> + _unlock_hashtbl(bond);
>
> return assigned_slave;
> }
>
> /*********************** rlb specific functions ***************************/
> -static inline void _lock_rx_hashtbl(struct bonding *bond)
> +
> +/* Caller must hold bond lock for read and hashtbl lock */
> +static struct slave *rlb_update_rx_table(struct bonding *bond, struct slave *next_slave, u32 hash_index)
> {
> - spin_lock_bh(&(BOND_ALB_INFO(bond).rx_hashtbl_lock));
> + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
> +
> + /* check rlb table and correct it if wrong */
> + if (bond_info->rlb_enabled) {
> + struct rlb_client_info *rx_client_info = &(bond_info->rx_hashtbl[hash_index]);
> +
> + /* if the new slave computed by tlb checks doesn't match rlb, stop rlb from using it */
> + if (next_slave && (next_slave != rx_client_info->slave))
> + rx_client_info->slave = next_slave;
> + }
> + return next_slave;
> }
>
> -static inline void _unlock_rx_hashtbl(struct bonding *bond)
> +/* Caller must hold bond lock for read and hashtbl lock */
> +static struct slave *alb_get_best_slave(struct bonding *bond, u32 hash_index)
> {
> - spin_unlock_bh(&(BOND_ALB_INFO(bond).rx_hashtbl_lock));
> + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
> + struct tlb_client_info *tx_hash_table = bond_info->tx_hashtbl;
> + struct slave *last_slave = tx_hash_table[hash_index].last_slave;
> + struct slave *next_slave = NULL;
> +
> + /* presume the next slave will be the least loaded one */
> + next_slave = tlb_get_least_loaded_slave(bond);
> +
> + if (last_slave && SLAVE_IS_OK(last_slave)) {
> + /* Use the last slave listed in the tx hashtbl if:
> + the last slave currently is essentially unloaded. */
> + if (SLAVE_TLB_INFO(last_slave).load < 10)
> + next_slave = last_slave;
> + }
> +
> + /* update the rlb hashtbl if there was a previous entry */
> + if (bond_info->rlb_enabled)
> + rlb_update_rx_table(bond, next_slave, hash_index);
> +
> + return next_slave;
> }
>
> /* when an ARP REPLY is received from a client update its info
> @@ -344,7 +358,7 @@ static void rlb_update_entry_from_arp(struct bonding *bond, struct arp_pkt *arp)
> struct rlb_client_info *client_info;
> u32 hash_index;
>
> - _lock_rx_hashtbl(bond);
> + _lock_hashtbl(bond);
>
> hash_index = _simple_hash((u8*)&(arp->ip_src), sizeof(arp->ip_src));
> client_info = &(bond_info->rx_hashtbl[hash_index]);
> @@ -358,7 +372,7 @@ static void rlb_update_entry_from_arp(struct bonding *bond, struct arp_pkt *arp)
> bond_info->rx_ntt = 1;
> }
>
> - _unlock_rx_hashtbl(bond);
> + _unlock_hashtbl(bond);
> }
>
> static int rlb_arp_recv(struct sk_buff *skb, struct net_device *bond_dev, struct packet_type *ptype, struct net_device *orig_dev)
> @@ -402,38 +416,6 @@ out:
> return res;
> }
>
> -/* Caller must hold bond lock for read */
> -static struct slave *rlb_next_rx_slave(struct bonding *bond)
> -{
> - struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
> - struct slave *rx_slave, *slave, *start_at;
> - int i = 0;
> -
> - if (bond_info->next_rx_slave) {
> - start_at = bond_info->next_rx_slave;
> - } else {
> - start_at = bond->first_slave;
> - }
> -
> - rx_slave = NULL;
> -
> - bond_for_each_slave_from(bond, slave, i, start_at) {
> - if (SLAVE_IS_OK(slave)) {
> - if (!rx_slave) {
> - rx_slave = slave;
> - } else if (slave->speed > rx_slave->speed) {
> - rx_slave = slave;
> - }
> - }
> - }
> -
> - if (rx_slave) {
> - bond_info->next_rx_slave = rx_slave->next;
> - }
> -
> - return rx_slave;
> -}
> -
> /* teach the switch the mac of a disabled slave
> * on the primary for fault tolerance
> *
> @@ -468,14 +450,14 @@ static void rlb_clear_slave(struct bonding *bond, struct slave *slave)
> u32 index, next_index;
>
> /* clear slave from rx_hashtbl */
> - _lock_rx_hashtbl(bond);
> + _lock_hashtbl(bond);
>
> rx_hash_table = bond_info->rx_hashtbl;
> index = bond_info->rx_hashtbl_head;
> for (; index != RLB_NULL_INDEX; index = next_index) {
> next_index = rx_hash_table[index].next;
> if (rx_hash_table[index].slave == slave) {
> - struct slave *assigned_slave = rlb_next_rx_slave(bond);
> + struct slave *assigned_slave = alb_get_best_slave(bond, index);
>
> if (assigned_slave) {
> rx_hash_table[index].slave = assigned_slave;
> @@ -499,7 +481,7 @@ static void rlb_clear_slave(struct bonding *bond, struct slave *slave)
> }
> }
>
> - _unlock_rx_hashtbl(bond);
> + _unlock_hashtbl(bond);
>
> write_lock_bh(&bond->curr_slave_lock);
>
> @@ -558,7 +540,7 @@ static void rlb_update_rx_clients(struct bonding *bond)
> struct rlb_client_info *client_info;
> u32 hash_index;
>
> - _lock_rx_hashtbl(bond);
> + _lock_hashtbl(bond);
>
> hash_index = bond_info->rx_hashtbl_head;
> for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) {
> @@ -576,7 +558,7 @@ static void rlb_update_rx_clients(struct bonding *bond)
> */
> bond_info->rlb_update_delay_counter = RLB_UPDATE_DELAY;
>
> - _unlock_rx_hashtbl(bond);
> + _unlock_hashtbl(bond);
> }
>
> /* The slave was assigned a new mac address - update the clients */
> @@ -587,7 +569,7 @@ static void rlb_req_update_slave_clients(struct bonding *bond, struct slave *sla
> int ntt = 0;
> u32 hash_index;
>
> - _lock_rx_hashtbl(bond);
> + _lock_hashtbl(bond);
>
> hash_index = bond_info->rx_hashtbl_head;
> for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) {
> @@ -607,7 +589,7 @@ static void rlb_req_update_slave_clients(struct bonding *bond, struct slave *sla
> bond_info->rlb_update_retry_counter = RLB_UPDATE_RETRY;
> }
>
> - _unlock_rx_hashtbl(bond);
> + _unlock_hashtbl(bond);
> }
>
> /* mark all clients using src_ip to be updated */
> @@ -617,7 +599,7 @@ static void rlb_req_update_subnet_clients(struct bonding *bond, __be32 src_ip)
> struct rlb_client_info *client_info;
> u32 hash_index;
>
> - _lock_rx_hashtbl(bond);
> + _lock_hashtbl(bond);
>
> hash_index = bond_info->rx_hashtbl_head;
> for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) {
> @@ -643,7 +625,7 @@ static void rlb_req_update_subnet_clients(struct bonding *bond, __be32 src_ip)
> }
> }
>
> - _unlock_rx_hashtbl(bond);
> + _unlock_hashtbl(bond);
> }
>
> /* Caller must hold both bond and ptr locks for read */
> @@ -655,7 +637,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
> struct rlb_client_info *client_info;
> u32 hash_index = 0;
>
> - _lock_rx_hashtbl(bond);
> + _lock_hashtbl(bond);
>
> hash_index = _simple_hash((u8 *)&arp->ip_dst, sizeof(arp->ip_src));
> client_info = &(bond_info->rx_hashtbl[hash_index]);
> @@ -671,7 +653,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
>
> assigned_slave = client_info->slave;
> if (assigned_slave) {
> - _unlock_rx_hashtbl(bond);
> + _unlock_hashtbl(bond);
> return assigned_slave;
> }
> } else {
> @@ -687,7 +669,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
> }
> }
> /* assign a new slave */
> - assigned_slave = rlb_next_rx_slave(bond);
> + assigned_slave = alb_get_best_slave(bond, hash_index);
>
> if (assigned_slave) {
> client_info->ip_src = arp->ip_src;
> @@ -723,7 +705,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
> }
> }
>
> - _unlock_rx_hashtbl(bond);
> + _unlock_hashtbl(bond);
>
> return assigned_slave;
> }
> @@ -771,36 +753,6 @@ static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond)
> return tx_slave;
> }
>
> -/* Caller must hold bond lock for read */
> -static void rlb_rebalance(struct bonding *bond)
> -{
> - struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
> - struct slave *assigned_slave;
> - struct rlb_client_info *client_info;
> - int ntt;
> - u32 hash_index;
> -
> - _lock_rx_hashtbl(bond);
> -
> - ntt = 0;
> - hash_index = bond_info->rx_hashtbl_head;
> - for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) {
> - client_info = &(bond_info->rx_hashtbl[hash_index]);
> - assigned_slave = rlb_next_rx_slave(bond);
> - if (assigned_slave && (client_info->slave != assigned_slave)) {
> - client_info->slave = assigned_slave;
> - client_info->ntt = 1;
> - ntt = 1;
> - }
> - }
> -
> - /* update the team's flag only after the whole iteration */
> - if (ntt) {
> - bond_info->rx_ntt = 1;
> - }
> - _unlock_rx_hashtbl(bond);
> -}
> -
> /* Caller must hold rx_hashtbl lock */
> static void rlb_init_table_entry(struct rlb_client_info *entry)
> {
> @@ -817,8 +769,6 @@ static int rlb_initialize(struct bonding *bond)
> int size = RLB_HASH_TABLE_SIZE * sizeof(struct rlb_client_info);
> int i;
>
> - spin_lock_init(&(bond_info->rx_hashtbl_lock));
> -
> new_hashtbl = kmalloc(size, GFP_KERNEL);
> if (!new_hashtbl) {
> printk(KERN_ERR DRV_NAME
> @@ -826,7 +776,7 @@ static int rlb_initialize(struct bonding *bond)
> bond->dev->name);
> return -1;
> }
> - _lock_rx_hashtbl(bond);
> + _lock_hashtbl(bond);
>
> bond_info->rx_hashtbl = new_hashtbl;
>
> @@ -836,7 +786,7 @@ static int rlb_initialize(struct bonding *bond)
> rlb_init_table_entry(bond_info->rx_hashtbl + i);
> }
>
> - _unlock_rx_hashtbl(bond);
> + _unlock_hashtbl(bond);
>
> /*initialize packet type*/
> pk_type->type = cpu_to_be16(ETH_P_ARP);
> @@ -855,13 +805,13 @@ static void rlb_deinitialize(struct bonding *bond)
>
> dev_remove_pack(&(bond_info->rlb_pkt_type));
>
> - _lock_rx_hashtbl(bond);
> + _lock_hashtbl(bond);
>
> kfree(bond_info->rx_hashtbl);
> bond_info->rx_hashtbl = NULL;
> bond_info->rx_hashtbl_head = RLB_NULL_INDEX;
>
> - _unlock_rx_hashtbl(bond);
> + _unlock_hashtbl(bond);
> }
>
> static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id)
> @@ -869,7 +819,7 @@ static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id)
> struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
> u32 curr_index;
>
> - _lock_rx_hashtbl(bond);
> + _lock_hashtbl(bond);
>
> curr_index = bond_info->rx_hashtbl_head;
> while (curr_index != RLB_NULL_INDEX) {
> @@ -894,7 +844,7 @@ static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id)
> curr_index = next_index;
> }
>
> - _unlock_rx_hashtbl(bond);
> + _unlock_hashtbl(bond);
> }
>
> /*********************** tlb/rlb shared functions *********************/
> @@ -1521,11 +1471,6 @@ void bond_alb_monitor(struct work_struct *work)
> read_lock(&bond->lock);
> }
>
> - if (bond_info->rlb_rebalance) {
> - bond_info->rlb_rebalance = 0;
> - rlb_rebalance(bond);
> - }
> -
> /* check if clients need updating */
> if (bond_info->rx_ntt) {
> if (bond_info->rlb_update_delay_counter) {
> diff --git a/drivers/net/bonding/bond_alb.h b/drivers/net/bonding/bond_alb.h
> index b65fd29..09d755a 100644
> --- a/drivers/net/bonding/bond_alb.h
> +++ b/drivers/net/bonding/bond_alb.h
> @@ -90,7 +90,7 @@ struct tlb_slave_info {
> struct alb_bond_info {
> struct timer_list alb_timer;
> struct tlb_client_info *tx_hashtbl; /* Dynamically allocated */
> - spinlock_t tx_hashtbl_lock;
> + spinlock_t hashtbl_lock; /* lock for both tables */
> u32 unbalanced_load;
> int tx_rebalance_counter;
> int lp_counter;
> @@ -98,7 +98,6 @@ struct alb_bond_info {
> int rlb_enabled;
> struct packet_type rlb_pkt_type;
> struct rlb_client_info *rx_hashtbl; /* Receive hash table */
> - spinlock_t rx_hashtbl_lock;
> u32 rx_hashtbl_head;
> u8 rx_ntt; /* flag - need to transmit
> * to all rx clients
Any thoughts on this, Jay?
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 2/4 v3] bonding: make sure tx and rx hash tables stay in sync when using alb mode
2009-09-28 22:00 ` Andy Gospodarek
@ 2009-09-28 22:09 ` Jay Vosburgh
2009-09-29 0:13 ` Andy Gospodarek
0 siblings, 1 reply; 8+ messages in thread
From: Jay Vosburgh @ 2009-09-28 22:09 UTC (permalink / raw)
To: Andy Gospodarek; +Cc: netdev, bonding-devel
Andy Gospodarek <andy@greyhouse.net> wrote:
>On Fri, Sep 18, 2009 at 11:56:45AM -0400, Andy Gospodarek wrote:
>> On Fri, Sep 18, 2009 at 11:36:22AM -0400, Andy Gospodarek wrote:
>> > On Wed, Sep 16, 2009 at 04:36:09PM -0700, Jay Vosburgh wrote:
>> > > Andy Gospodarek <andy@greyhouse.net> wrote:
>> > >
>> > > >
>> > > >Subject: [PATCH] bonding: make sure tx and rx hash tables stay in sync when using alb mode
>> > >
>> > > When testing this, I'm getting a lockdep warning. It appears to
>> > > be unhappy that tlb_choose_channel acquires the tx / rx hash table locks
>> > > in the order tx then rx, but rlb_choose_channel -> alb_get_best_slave
>> > > acquires the locks in the other order. I applied all four patches, but
>> > > it looks like the change that trips lockdep is in this patch (#2).
>> > >
>> > > I haven't gotten an actual deadlock from this, although it seems
>> > > plausible if there are two cpus in bond_alb_xmit at the same time, and
>> > > one of them is sending an ARP.
>> > >
>> > > One fairly straightforward fix would be to combine the rx and tx
>> > > hash table locks into a single lock. I suspect that wouldn't have any
>> > > real performance penalty, since the rx hash table lock is generally not
>> > > acquired very often (unlike the tx lock, which is taken for every packet
>> > > that goes out).
>> > >
>> > > Also, FYI, two of the four patches had trailing whitespace. I
>> > > believe it was #2 and #4.
>> > >
>> > > Thoughts?
>> >
>> > Jay,
>> >
>> > This patch should address both the the deadlock and whitespace conerns.
>> > I ran a kernel with LOCKDEP enabled and saw no warnings while passing
>> > traffic on the bond while pulling cables and while removing the module.
>> > Here it is....
>> >
>>
>> Adding the version and signed-off-by lines might be nice, eh?
>>
>> [PATCH v3] bonding: make sure tx and rx hash tables stay in sync when using alb mode
>>
>> I noticed that it was easy for alb (mode 6) bonding to get into a state
>> where the tx hash-table and rx hash-table are out of sync (there is
>> really nothing to keep them synchronized), and we will transmit traffic
>> destined for a host on one slave and send ARP frames to the same slave
>> from another interface using a different source MAC.
>>
>> There is no compelling reason to do this, so this patch makes sure the
>> rx hash-table changes whenever the tx hash-table is updated based on
>> device load. This patch also drops the code that does rlb re-balancing
>> since the balancing will not be controlled by the tx hash-table based on
In addition to my response in the other thread, I changed the
"not" above to "now," which I suspect is what you meant.
>> transmit load. In order to address an issue found with the initial
>> patch, I have also combined the rx and tx hash table lock into a single
>> lock. This will facilitate moving these into a single table at some
>> point.
>>
>> Signed-off-by: Andy Gospodarek <andy@greyhouse.net>
>>
>> ---
>> drivers/net/bonding/bond_alb.c | 203 +++++++++++++++-------------------------
>> drivers/net/bonding/bond_alb.h | 3 +-
>> 2 files changed, 75 insertions(+), 131 deletions(-)
>>
>> diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
>> index bcf25c6..04b7055 100644
>> --- a/drivers/net/bonding/bond_alb.c
>> +++ b/drivers/net/bonding/bond_alb.c
>> @@ -111,6 +111,7 @@ static inline struct arp_pkt *arp_pkt(const struct sk_buff *skb)
>>
>> /* Forward declaration */
>> static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[]);
>> +static struct slave *alb_get_best_slave(struct bonding *bond, u32 hash_index);
>>
>> static inline u8 _simple_hash(const u8 *hash_start, int hash_size)
>> {
>> @@ -124,18 +125,18 @@ static inline u8 _simple_hash(const u8 *hash_start, int hash_size)
>> return hash;
>> }
>>
>> -/*********************** tlb specific functions ***************************/
>> -
>> -static inline void _lock_tx_hashtbl(struct bonding *bond)
>> +/********************* hash table lock functions *************************/
>> +static inline void _lock_hashtbl(struct bonding *bond)
>> {
>> - spin_lock_bh(&(BOND_ALB_INFO(bond).tx_hashtbl_lock));
>> + spin_lock_bh(&(BOND_ALB_INFO(bond).hashtbl_lock));
>> }
>>
>> -static inline void _unlock_tx_hashtbl(struct bonding *bond)
>> +static inline void _unlock_hashtbl(struct bonding *bond)
>> {
>> - spin_unlock_bh(&(BOND_ALB_INFO(bond).tx_hashtbl_lock));
>> + spin_unlock_bh(&(BOND_ALB_INFO(bond).hashtbl_lock));
>> }
>>
>> +/*********************** tlb specific functions ***************************/
>> /* Caller must hold tx_hashtbl lock */
>> static inline void tlb_init_table_entry(struct tlb_client_info *entry, int save_load)
>> {
>> @@ -163,7 +164,7 @@ static void tlb_clear_slave(struct bonding *bond, struct slave *slave, int save_
>> struct tlb_client_info *tx_hash_table;
>> u32 index;
>>
>> - _lock_tx_hashtbl(bond);
>> + _lock_hashtbl(bond);
>>
>> /* clear slave from tx_hashtbl */
>> tx_hash_table = BOND_ALB_INFO(bond).tx_hashtbl;
>> @@ -180,7 +181,7 @@ static void tlb_clear_slave(struct bonding *bond, struct slave *slave, int save_
>>
>> tlb_init_slave(slave);
>>
>> - _unlock_tx_hashtbl(bond);
>> + _unlock_hashtbl(bond);
>> }
>>
>> /* Must be called before starting the monitor timer */
>> @@ -191,7 +192,7 @@ static int tlb_initialize(struct bonding *bond)
>> struct tlb_client_info *new_hashtbl;
>> int i;
>>
>> - spin_lock_init(&(bond_info->tx_hashtbl_lock));
>> + spin_lock_init(&(bond_info->hashtbl_lock));
>>
>> new_hashtbl = kzalloc(size, GFP_KERNEL);
>> if (!new_hashtbl) {
>> @@ -200,7 +201,7 @@ static int tlb_initialize(struct bonding *bond)
>> bond->dev->name);
>> return -1;
>> }
>> - _lock_tx_hashtbl(bond);
>> + _lock_hashtbl(bond);
>>
>> bond_info->tx_hashtbl = new_hashtbl;
>>
>> @@ -208,7 +209,7 @@ static int tlb_initialize(struct bonding *bond)
>> tlb_init_table_entry(&bond_info->tx_hashtbl[i], 1);
>> }
>>
>> - _unlock_tx_hashtbl(bond);
>> + _unlock_hashtbl(bond);
>>
>> return 0;
>> }
>> @@ -218,12 +219,12 @@ static void tlb_deinitialize(struct bonding *bond)
>> {
>> struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
>>
>> - _lock_tx_hashtbl(bond);
>> + _lock_hashtbl(bond);
>>
>> kfree(bond_info->tx_hashtbl);
>> bond_info->tx_hashtbl = NULL;
>>
>> - _unlock_tx_hashtbl(bond);
>> + _unlock_hashtbl(bond);
>> }
>>
>> /* Caller must hold bond lock for read */
>> @@ -264,24 +265,6 @@ static struct slave *tlb_get_least_loaded_slave(struct bonding *bond)
>> return least_loaded;
>> }
>>
>> -/* Caller must hold bond lock for read and hashtbl lock */
>> -static struct slave *tlb_get_best_slave(struct bonding *bond, u32 hash_index)
>> -{
>> - struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
>> - struct tlb_client_info *tx_hash_table = bond_info->tx_hashtbl;
>> - struct slave *last_slave = tx_hash_table[hash_index].last_slave;
>> - struct slave *next_slave = NULL;
>> -
>> - if (last_slave && SLAVE_IS_OK(last_slave)) {
>> - /* Use the last slave listed in the tx hashtbl if:
>> - the last slave currently is essentially unloaded. */
>> - if (SLAVE_TLB_INFO(last_slave).load < 10)
>> - next_slave = last_slave;
>> - }
>> -
>> - return next_slave ? next_slave : tlb_get_least_loaded_slave(bond);
>> -}
>> -
>> /* Caller must hold bond lock for read */
>> static struct slave *tlb_choose_channel(struct bonding *bond, u32 hash_index, u32 skb_len)
>> {
>> @@ -289,13 +272,12 @@ static struct slave *tlb_choose_channel(struct bonding *bond, u32 hash_index, u3
>> struct tlb_client_info *hash_table;
>> struct slave *assigned_slave;
>>
>> - _lock_tx_hashtbl(bond);
>> + _lock_hashtbl(bond);
>>
>> hash_table = bond_info->tx_hashtbl;
>> assigned_slave = hash_table[hash_index].tx_slave;
>> if (!assigned_slave) {
>> - assigned_slave = tlb_get_best_slave(bond, hash_index);
>> -
>> + assigned_slave = alb_get_best_slave(bond, hash_index);
>> if (assigned_slave) {
>> struct tlb_slave_info *slave_info =
>> &(SLAVE_TLB_INFO(assigned_slave));
>> @@ -319,20 +301,52 @@ static struct slave *tlb_choose_channel(struct bonding *bond, u32 hash_index, u3
>> hash_table[hash_index].tx_bytes += skb_len;
>> }
>>
>> - _unlock_tx_hashtbl(bond);
>> + _unlock_hashtbl(bond);
>>
>> return assigned_slave;
>> }
>>
>> /*********************** rlb specific functions ***************************/
>> -static inline void _lock_rx_hashtbl(struct bonding *bond)
>> +
>> +/* Caller must hold bond lock for read and hashtbl lock */
>> +static struct slave *rlb_update_rx_table(struct bonding *bond, struct slave *next_slave, u32 hash_index)
>> {
>> - spin_lock_bh(&(BOND_ALB_INFO(bond).rx_hashtbl_lock));
>> + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
>> +
>> + /* check rlb table and correct it if wrong */
>> + if (bond_info->rlb_enabled) {
>> + struct rlb_client_info *rx_client_info = &(bond_info->rx_hashtbl[hash_index]);
>> +
>> + /* if the new slave computed by tlb checks doesn't match rlb, stop rlb from using it */
>> + if (next_slave && (next_slave != rx_client_info->slave))
>> + rx_client_info->slave = next_slave;
>> + }
>> + return next_slave;
>> }
>>
>> -static inline void _unlock_rx_hashtbl(struct bonding *bond)
>> +/* Caller must hold bond lock for read and hashtbl lock */
>> +static struct slave *alb_get_best_slave(struct bonding *bond, u32 hash_index)
>> {
>> - spin_unlock_bh(&(BOND_ALB_INFO(bond).rx_hashtbl_lock));
>> + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
>> + struct tlb_client_info *tx_hash_table = bond_info->tx_hashtbl;
>> + struct slave *last_slave = tx_hash_table[hash_index].last_slave;
>> + struct slave *next_slave = NULL;
>> +
>> + /* presume the next slave will be the least loaded one */
>> + next_slave = tlb_get_least_loaded_slave(bond);
>> +
>> + if (last_slave && SLAVE_IS_OK(last_slave)) {
>> + /* Use the last slave listed in the tx hashtbl if:
>> + the last slave currently is essentially unloaded. */
>> + if (SLAVE_TLB_INFO(last_slave).load < 10)
>> + next_slave = last_slave;
>> + }
>> +
>> + /* update the rlb hashtbl if there was a previous entry */
>> + if (bond_info->rlb_enabled)
>> + rlb_update_rx_table(bond, next_slave, hash_index);
>> +
>> + return next_slave;
>> }
>>
>> /* when an ARP REPLY is received from a client update its info
>> @@ -344,7 +358,7 @@ static void rlb_update_entry_from_arp(struct bonding *bond, struct arp_pkt *arp)
>> struct rlb_client_info *client_info;
>> u32 hash_index;
>>
>> - _lock_rx_hashtbl(bond);
>> + _lock_hashtbl(bond);
>>
>> hash_index = _simple_hash((u8*)&(arp->ip_src), sizeof(arp->ip_src));
>> client_info = &(bond_info->rx_hashtbl[hash_index]);
>> @@ -358,7 +372,7 @@ static void rlb_update_entry_from_arp(struct bonding *bond, struct arp_pkt *arp)
>> bond_info->rx_ntt = 1;
>> }
>>
>> - _unlock_rx_hashtbl(bond);
>> + _unlock_hashtbl(bond);
>> }
>>
>> static int rlb_arp_recv(struct sk_buff *skb, struct net_device *bond_dev, struct packet_type *ptype, struct net_device *orig_dev)
>> @@ -402,38 +416,6 @@ out:
>> return res;
>> }
>>
>> -/* Caller must hold bond lock for read */
>> -static struct slave *rlb_next_rx_slave(struct bonding *bond)
>> -{
>> - struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
>> - struct slave *rx_slave, *slave, *start_at;
>> - int i = 0;
>> -
>> - if (bond_info->next_rx_slave) {
>> - start_at = bond_info->next_rx_slave;
>> - } else {
>> - start_at = bond->first_slave;
>> - }
>> -
>> - rx_slave = NULL;
>> -
>> - bond_for_each_slave_from(bond, slave, i, start_at) {
>> - if (SLAVE_IS_OK(slave)) {
>> - if (!rx_slave) {
>> - rx_slave = slave;
>> - } else if (slave->speed > rx_slave->speed) {
>> - rx_slave = slave;
>> - }
>> - }
>> - }
>> -
>> - if (rx_slave) {
>> - bond_info->next_rx_slave = rx_slave->next;
>> - }
>> -
>> - return rx_slave;
>> -}
>> -
>> /* teach the switch the mac of a disabled slave
>> * on the primary for fault tolerance
>> *
>> @@ -468,14 +450,14 @@ static void rlb_clear_slave(struct bonding *bond, struct slave *slave)
>> u32 index, next_index;
>>
>> /* clear slave from rx_hashtbl */
>> - _lock_rx_hashtbl(bond);
>> + _lock_hashtbl(bond);
>>
>> rx_hash_table = bond_info->rx_hashtbl;
>> index = bond_info->rx_hashtbl_head;
>> for (; index != RLB_NULL_INDEX; index = next_index) {
>> next_index = rx_hash_table[index].next;
>> if (rx_hash_table[index].slave == slave) {
>> - struct slave *assigned_slave = rlb_next_rx_slave(bond);
>> + struct slave *assigned_slave = alb_get_best_slave(bond, index);
>>
>> if (assigned_slave) {
>> rx_hash_table[index].slave = assigned_slave;
>> @@ -499,7 +481,7 @@ static void rlb_clear_slave(struct bonding *bond, struct slave *slave)
>> }
>> }
>>
>> - _unlock_rx_hashtbl(bond);
>> + _unlock_hashtbl(bond);
>>
>> write_lock_bh(&bond->curr_slave_lock);
>>
>> @@ -558,7 +540,7 @@ static void rlb_update_rx_clients(struct bonding *bond)
>> struct rlb_client_info *client_info;
>> u32 hash_index;
>>
>> - _lock_rx_hashtbl(bond);
>> + _lock_hashtbl(bond);
>>
>> hash_index = bond_info->rx_hashtbl_head;
>> for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) {
>> @@ -576,7 +558,7 @@ static void rlb_update_rx_clients(struct bonding *bond)
>> */
>> bond_info->rlb_update_delay_counter = RLB_UPDATE_DELAY;
>>
>> - _unlock_rx_hashtbl(bond);
>> + _unlock_hashtbl(bond);
>> }
>>
>> /* The slave was assigned a new mac address - update the clients */
>> @@ -587,7 +569,7 @@ static void rlb_req_update_slave_clients(struct bonding *bond, struct slave *sla
>> int ntt = 0;
>> u32 hash_index;
>>
>> - _lock_rx_hashtbl(bond);
>> + _lock_hashtbl(bond);
>>
>> hash_index = bond_info->rx_hashtbl_head;
>> for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) {
>> @@ -607,7 +589,7 @@ static void rlb_req_update_slave_clients(struct bonding *bond, struct slave *sla
>> bond_info->rlb_update_retry_counter = RLB_UPDATE_RETRY;
>> }
>>
>> - _unlock_rx_hashtbl(bond);
>> + _unlock_hashtbl(bond);
>> }
>>
>> /* mark all clients using src_ip to be updated */
>> @@ -617,7 +599,7 @@ static void rlb_req_update_subnet_clients(struct bonding *bond, __be32 src_ip)
>> struct rlb_client_info *client_info;
>> u32 hash_index;
>>
>> - _lock_rx_hashtbl(bond);
>> + _lock_hashtbl(bond);
>>
>> hash_index = bond_info->rx_hashtbl_head;
>> for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) {
>> @@ -643,7 +625,7 @@ static void rlb_req_update_subnet_clients(struct bonding *bond, __be32 src_ip)
>> }
>> }
>>
>> - _unlock_rx_hashtbl(bond);
>> + _unlock_hashtbl(bond);
>> }
>>
>> /* Caller must hold both bond and ptr locks for read */
>> @@ -655,7 +637,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
>> struct rlb_client_info *client_info;
>> u32 hash_index = 0;
>>
>> - _lock_rx_hashtbl(bond);
>> + _lock_hashtbl(bond);
>>
>> hash_index = _simple_hash((u8 *)&arp->ip_dst, sizeof(arp->ip_src));
>> client_info = &(bond_info->rx_hashtbl[hash_index]);
>> @@ -671,7 +653,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
>>
>> assigned_slave = client_info->slave;
>> if (assigned_slave) {
>> - _unlock_rx_hashtbl(bond);
>> + _unlock_hashtbl(bond);
>> return assigned_slave;
>> }
>> } else {
>> @@ -687,7 +669,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
>> }
>> }
>> /* assign a new slave */
>> - assigned_slave = rlb_next_rx_slave(bond);
>> + assigned_slave = alb_get_best_slave(bond, hash_index);
>>
>> if (assigned_slave) {
>> client_info->ip_src = arp->ip_src;
>> @@ -723,7 +705,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
>> }
>> }
>>
>> - _unlock_rx_hashtbl(bond);
>> + _unlock_hashtbl(bond);
>>
>> return assigned_slave;
>> }
>> @@ -771,36 +753,6 @@ static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond)
>> return tx_slave;
>> }
>>
>> -/* Caller must hold bond lock for read */
>> -static void rlb_rebalance(struct bonding *bond)
>> -{
>> - struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
>> - struct slave *assigned_slave;
>> - struct rlb_client_info *client_info;
>> - int ntt;
>> - u32 hash_index;
>> -
>> - _lock_rx_hashtbl(bond);
>> -
>> - ntt = 0;
>> - hash_index = bond_info->rx_hashtbl_head;
>> - for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) {
>> - client_info = &(bond_info->rx_hashtbl[hash_index]);
>> - assigned_slave = rlb_next_rx_slave(bond);
>> - if (assigned_slave && (client_info->slave != assigned_slave)) {
>> - client_info->slave = assigned_slave;
>> - client_info->ntt = 1;
>> - ntt = 1;
>> - }
>> - }
>> -
>> - /* update the team's flag only after the whole iteration */
>> - if (ntt) {
>> - bond_info->rx_ntt = 1;
>> - }
>> - _unlock_rx_hashtbl(bond);
>> -}
>> -
>> /* Caller must hold rx_hashtbl lock */
>> static void rlb_init_table_entry(struct rlb_client_info *entry)
>> {
>> @@ -817,8 +769,6 @@ static int rlb_initialize(struct bonding *bond)
>> int size = RLB_HASH_TABLE_SIZE * sizeof(struct rlb_client_info);
>> int i;
>>
>> - spin_lock_init(&(bond_info->rx_hashtbl_lock));
>> -
>> new_hashtbl = kmalloc(size, GFP_KERNEL);
>> if (!new_hashtbl) {
>> printk(KERN_ERR DRV_NAME
>> @@ -826,7 +776,7 @@ static int rlb_initialize(struct bonding *bond)
>> bond->dev->name);
>> return -1;
>> }
>> - _lock_rx_hashtbl(bond);
>> + _lock_hashtbl(bond);
>>
>> bond_info->rx_hashtbl = new_hashtbl;
>>
>> @@ -836,7 +786,7 @@ static int rlb_initialize(struct bonding *bond)
>> rlb_init_table_entry(bond_info->rx_hashtbl + i);
>> }
>>
>> - _unlock_rx_hashtbl(bond);
>> + _unlock_hashtbl(bond);
>>
>> /*initialize packet type*/
>> pk_type->type = cpu_to_be16(ETH_P_ARP);
>> @@ -855,13 +805,13 @@ static void rlb_deinitialize(struct bonding *bond)
>>
>> dev_remove_pack(&(bond_info->rlb_pkt_type));
>>
>> - _lock_rx_hashtbl(bond);
>> + _lock_hashtbl(bond);
>>
>> kfree(bond_info->rx_hashtbl);
>> bond_info->rx_hashtbl = NULL;
>> bond_info->rx_hashtbl_head = RLB_NULL_INDEX;
>>
>> - _unlock_rx_hashtbl(bond);
>> + _unlock_hashtbl(bond);
>> }
>>
>> static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id)
>> @@ -869,7 +819,7 @@ static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id)
>> struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
>> u32 curr_index;
>>
>> - _lock_rx_hashtbl(bond);
>> + _lock_hashtbl(bond);
>>
>> curr_index = bond_info->rx_hashtbl_head;
>> while (curr_index != RLB_NULL_INDEX) {
>> @@ -894,7 +844,7 @@ static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id)
>> curr_index = next_index;
>> }
>>
>> - _unlock_rx_hashtbl(bond);
>> + _unlock_hashtbl(bond);
>> }
>>
>> /*********************** tlb/rlb shared functions *********************/
>> @@ -1521,11 +1471,6 @@ void bond_alb_monitor(struct work_struct *work)
>> read_lock(&bond->lock);
>> }
>>
>> - if (bond_info->rlb_rebalance) {
>> - bond_info->rlb_rebalance = 0;
>> - rlb_rebalance(bond);
>> - }
>> -
>> /* check if clients need updating */
>> if (bond_info->rx_ntt) {
>> if (bond_info->rlb_update_delay_counter) {
>> diff --git a/drivers/net/bonding/bond_alb.h b/drivers/net/bonding/bond_alb.h
>> index b65fd29..09d755a 100644
>> --- a/drivers/net/bonding/bond_alb.h
>> +++ b/drivers/net/bonding/bond_alb.h
>> @@ -90,7 +90,7 @@ struct tlb_slave_info {
>> struct alb_bond_info {
>> struct timer_list alb_timer;
>> struct tlb_client_info *tx_hashtbl; /* Dynamically allocated */
>> - spinlock_t tx_hashtbl_lock;
>> + spinlock_t hashtbl_lock; /* lock for both tables */
>> u32 unbalanced_load;
>> int tx_rebalance_counter;
>> int lp_counter;
>> @@ -98,7 +98,6 @@ struct alb_bond_info {
>> int rlb_enabled;
>> struct packet_type rlb_pkt_type;
>> struct rlb_client_info *rx_hashtbl; /* Receive hash table */
>> - spinlock_t rx_hashtbl_lock;
>> u32 rx_hashtbl_head;
>> u8 rx_ntt; /* flag - need to transmit
>> * to all rx clients
>
>Any thoughts on this, Jay?
-J
---
-Jay Vosburgh, IBM Linux Technology Center, fubar@us.ibm.com
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 2/4 v3] bonding: make sure tx and rx hash tables stay in sync when using alb mode
2009-09-28 22:09 ` Jay Vosburgh
@ 2009-09-29 0:13 ` Andy Gospodarek
0 siblings, 0 replies; 8+ messages in thread
From: Andy Gospodarek @ 2009-09-29 0:13 UTC (permalink / raw)
To: Jay Vosburgh; +Cc: Andy Gospodarek, netdev, bonding-devel
On Mon, Sep 28, 2009 at 03:09:54PM -0700, Jay Vosburgh wrote:
> Andy Gospodarek <andy@greyhouse.net> wrote:
>
> >On Fri, Sep 18, 2009 at 11:56:45AM -0400, Andy Gospodarek wrote:
> >> On Fri, Sep 18, 2009 at 11:36:22AM -0400, Andy Gospodarek wrote:
> >> > On Wed, Sep 16, 2009 at 04:36:09PM -0700, Jay Vosburgh wrote:
> >> > > Andy Gospodarek <andy@greyhouse.net> wrote:
> >> > >
> >> > > >
> >> > > >Subject: [PATCH] bonding: make sure tx and rx hash tables stay in sync when using alb mode
> >> > >
> >> > > When testing this, I'm getting a lockdep warning. It appears to
> >> > > be unhappy that tlb_choose_channel acquires the tx / rx hash table locks
> >> > > in the order tx then rx, but rlb_choose_channel -> alb_get_best_slave
> >> > > acquires the locks in the other order. I applied all four patches, but
> >> > > it looks like the change that trips lockdep is in this patch (#2).
> >> > >
> >> > > I haven't gotten an actual deadlock from this, although it seems
> >> > > plausible if there are two cpus in bond_alb_xmit at the same time, and
> >> > > one of them is sending an ARP.
> >> > >
> >> > > One fairly straightforward fix would be to combine the rx and tx
> >> > > hash table locks into a single lock. I suspect that wouldn't have any
> >> > > real performance penalty, since the rx hash table lock is generally not
> >> > > acquired very often (unlike the tx lock, which is taken for every packet
> >> > > that goes out).
> >> > >
> >> > > Also, FYI, two of the four patches had trailing whitespace. I
> >> > > believe it was #2 and #4.
> >> > >
> >> > > Thoughts?
> >> >
> >> > Jay,
> >> >
> >> > This patch should address both the the deadlock and whitespace conerns.
> >> > I ran a kernel with LOCKDEP enabled and saw no warnings while passing
> >> > traffic on the bond while pulling cables and while removing the module.
> >> > Here it is....
> >> >
> >>
> >> Adding the version and signed-off-by lines might be nice, eh?
> >>
> >> [PATCH v3] bonding: make sure tx and rx hash tables stay in sync when using alb mode
> >>
> >> I noticed that it was easy for alb (mode 6) bonding to get into a state
> >> where the tx hash-table and rx hash-table are out of sync (there is
> >> really nothing to keep them synchronized), and we will transmit traffic
> >> destined for a host on one slave and send ARP frames to the same slave
> >> from another interface using a different source MAC.
> >>
> >> There is no compelling reason to do this, so this patch makes sure the
> >> rx hash-table changes whenever the tx hash-table is updated based on
> >> device load. This patch also drops the code that does rlb re-balancing
> >> since the balancing will not be controlled by the tx hash-table based on
>
> In addition to my response in the other thread, I changed the
> "not" above to "now," which I suspect is what you meant.
>
You are correct. Thanks for catching that!
> >> transmit load. In order to address an issue found with the initial
> >> patch, I have also combined the rx and tx hash table lock into a single
> >> lock. This will facilitate moving these into a single table at some
> >> point.
> >>
> >> Signed-off-by: Andy Gospodarek <andy@greyhouse.net>
> >>
> >> ---
> >> drivers/net/bonding/bond_alb.c | 203 +++++++++++++++-------------------------
> >> drivers/net/bonding/bond_alb.h | 3 +-
> >> 2 files changed, 75 insertions(+), 131 deletions(-)
> >>
> >> diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
> >> index bcf25c6..04b7055 100644
> >> --- a/drivers/net/bonding/bond_alb.c
> >> +++ b/drivers/net/bonding/bond_alb.c
> >> @@ -111,6 +111,7 @@ static inline struct arp_pkt *arp_pkt(const struct sk_buff *skb)
> >>
> >> /* Forward declaration */
> >> static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[]);
> >> +static struct slave *alb_get_best_slave(struct bonding *bond, u32 hash_index);
> >>
> >> static inline u8 _simple_hash(const u8 *hash_start, int hash_size)
> >> {
> >> @@ -124,18 +125,18 @@ static inline u8 _simple_hash(const u8 *hash_start, int hash_size)
> >> return hash;
> >> }
> >>
> >> -/*********************** tlb specific functions ***************************/
> >> -
> >> -static inline void _lock_tx_hashtbl(struct bonding *bond)
> >> +/********************* hash table lock functions *************************/
> >> +static inline void _lock_hashtbl(struct bonding *bond)
> >> {
> >> - spin_lock_bh(&(BOND_ALB_INFO(bond).tx_hashtbl_lock));
> >> + spin_lock_bh(&(BOND_ALB_INFO(bond).hashtbl_lock));
> >> }
> >>
> >> -static inline void _unlock_tx_hashtbl(struct bonding *bond)
> >> +static inline void _unlock_hashtbl(struct bonding *bond)
> >> {
> >> - spin_unlock_bh(&(BOND_ALB_INFO(bond).tx_hashtbl_lock));
> >> + spin_unlock_bh(&(BOND_ALB_INFO(bond).hashtbl_lock));
> >> }
> >>
> >> +/*********************** tlb specific functions ***************************/
> >> /* Caller must hold tx_hashtbl lock */
> >> static inline void tlb_init_table_entry(struct tlb_client_info *entry, int save_load)
> >> {
> >> @@ -163,7 +164,7 @@ static void tlb_clear_slave(struct bonding *bond, struct slave *slave, int save_
> >> struct tlb_client_info *tx_hash_table;
> >> u32 index;
> >>
> >> - _lock_tx_hashtbl(bond);
> >> + _lock_hashtbl(bond);
> >>
> >> /* clear slave from tx_hashtbl */
> >> tx_hash_table = BOND_ALB_INFO(bond).tx_hashtbl;
> >> @@ -180,7 +181,7 @@ static void tlb_clear_slave(struct bonding *bond, struct slave *slave, int save_
> >>
> >> tlb_init_slave(slave);
> >>
> >> - _unlock_tx_hashtbl(bond);
> >> + _unlock_hashtbl(bond);
> >> }
> >>
> >> /* Must be called before starting the monitor timer */
> >> @@ -191,7 +192,7 @@ static int tlb_initialize(struct bonding *bond)
> >> struct tlb_client_info *new_hashtbl;
> >> int i;
> >>
> >> - spin_lock_init(&(bond_info->tx_hashtbl_lock));
> >> + spin_lock_init(&(bond_info->hashtbl_lock));
> >>
> >> new_hashtbl = kzalloc(size, GFP_KERNEL);
> >> if (!new_hashtbl) {
> >> @@ -200,7 +201,7 @@ static int tlb_initialize(struct bonding *bond)
> >> bond->dev->name);
> >> return -1;
> >> }
> >> - _lock_tx_hashtbl(bond);
> >> + _lock_hashtbl(bond);
> >>
> >> bond_info->tx_hashtbl = new_hashtbl;
> >>
> >> @@ -208,7 +209,7 @@ static int tlb_initialize(struct bonding *bond)
> >> tlb_init_table_entry(&bond_info->tx_hashtbl[i], 1);
> >> }
> >>
> >> - _unlock_tx_hashtbl(bond);
> >> + _unlock_hashtbl(bond);
> >>
> >> return 0;
> >> }
> >> @@ -218,12 +219,12 @@ static void tlb_deinitialize(struct bonding *bond)
> >> {
> >> struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
> >>
> >> - _lock_tx_hashtbl(bond);
> >> + _lock_hashtbl(bond);
> >>
> >> kfree(bond_info->tx_hashtbl);
> >> bond_info->tx_hashtbl = NULL;
> >>
> >> - _unlock_tx_hashtbl(bond);
> >> + _unlock_hashtbl(bond);
> >> }
> >>
> >> /* Caller must hold bond lock for read */
> >> @@ -264,24 +265,6 @@ static struct slave *tlb_get_least_loaded_slave(struct bonding *bond)
> >> return least_loaded;
> >> }
> >>
> >> -/* Caller must hold bond lock for read and hashtbl lock */
> >> -static struct slave *tlb_get_best_slave(struct bonding *bond, u32 hash_index)
> >> -{
> >> - struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
> >> - struct tlb_client_info *tx_hash_table = bond_info->tx_hashtbl;
> >> - struct slave *last_slave = tx_hash_table[hash_index].last_slave;
> >> - struct slave *next_slave = NULL;
> >> -
> >> - if (last_slave && SLAVE_IS_OK(last_slave)) {
> >> - /* Use the last slave listed in the tx hashtbl if:
> >> - the last slave currently is essentially unloaded. */
> >> - if (SLAVE_TLB_INFO(last_slave).load < 10)
> >> - next_slave = last_slave;
> >> - }
> >> -
> >> - return next_slave ? next_slave : tlb_get_least_loaded_slave(bond);
> >> -}
> >> -
> >> /* Caller must hold bond lock for read */
> >> static struct slave *tlb_choose_channel(struct bonding *bond, u32 hash_index, u32 skb_len)
> >> {
> >> @@ -289,13 +272,12 @@ static struct slave *tlb_choose_channel(struct bonding *bond, u32 hash_index, u3
> >> struct tlb_client_info *hash_table;
> >> struct slave *assigned_slave;
> >>
> >> - _lock_tx_hashtbl(bond);
> >> + _lock_hashtbl(bond);
> >>
> >> hash_table = bond_info->tx_hashtbl;
> >> assigned_slave = hash_table[hash_index].tx_slave;
> >> if (!assigned_slave) {
> >> - assigned_slave = tlb_get_best_slave(bond, hash_index);
> >> -
> >> + assigned_slave = alb_get_best_slave(bond, hash_index);
> >> if (assigned_slave) {
> >> struct tlb_slave_info *slave_info =
> >> &(SLAVE_TLB_INFO(assigned_slave));
> >> @@ -319,20 +301,52 @@ static struct slave *tlb_choose_channel(struct bonding *bond, u32 hash_index, u3
> >> hash_table[hash_index].tx_bytes += skb_len;
> >> }
> >>
> >> - _unlock_tx_hashtbl(bond);
> >> + _unlock_hashtbl(bond);
> >>
> >> return assigned_slave;
> >> }
> >>
> >> /*********************** rlb specific functions ***************************/
> >> -static inline void _lock_rx_hashtbl(struct bonding *bond)
> >> +
> >> +/* Caller must hold bond lock for read and hashtbl lock */
> >> +static struct slave *rlb_update_rx_table(struct bonding *bond, struct slave *next_slave, u32 hash_index)
> >> {
> >> - spin_lock_bh(&(BOND_ALB_INFO(bond).rx_hashtbl_lock));
> >> + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
> >> +
> >> + /* check rlb table and correct it if wrong */
> >> + if (bond_info->rlb_enabled) {
> >> + struct rlb_client_info *rx_client_info = &(bond_info->rx_hashtbl[hash_index]);
> >> +
> >> + /* if the new slave computed by tlb checks doesn't match rlb, stop rlb from using it */
> >> + if (next_slave && (next_slave != rx_client_info->slave))
> >> + rx_client_info->slave = next_slave;
> >> + }
> >> + return next_slave;
> >> }
> >>
> >> -static inline void _unlock_rx_hashtbl(struct bonding *bond)
> >> +/* Caller must hold bond lock for read and hashtbl lock */
> >> +static struct slave *alb_get_best_slave(struct bonding *bond, u32 hash_index)
> >> {
> >> - spin_unlock_bh(&(BOND_ALB_INFO(bond).rx_hashtbl_lock));
> >> + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
> >> + struct tlb_client_info *tx_hash_table = bond_info->tx_hashtbl;
> >> + struct slave *last_slave = tx_hash_table[hash_index].last_slave;
> >> + struct slave *next_slave = NULL;
> >> +
> >> + /* presume the next slave will be the least loaded one */
> >> + next_slave = tlb_get_least_loaded_slave(bond);
> >> +
> >> + if (last_slave && SLAVE_IS_OK(last_slave)) {
> >> + /* Use the last slave listed in the tx hashtbl if:
> >> + the last slave currently is essentially unloaded. */
> >> + if (SLAVE_TLB_INFO(last_slave).load < 10)
> >> + next_slave = last_slave;
> >> + }
> >> +
> >> + /* update the rlb hashtbl if there was a previous entry */
> >> + if (bond_info->rlb_enabled)
> >> + rlb_update_rx_table(bond, next_slave, hash_index);
> >> +
> >> + return next_slave;
> >> }
> >>
> >> /* when an ARP REPLY is received from a client update its info
> >> @@ -344,7 +358,7 @@ static void rlb_update_entry_from_arp(struct bonding *bond, struct arp_pkt *arp)
> >> struct rlb_client_info *client_info;
> >> u32 hash_index;
> >>
> >> - _lock_rx_hashtbl(bond);
> >> + _lock_hashtbl(bond);
> >>
> >> hash_index = _simple_hash((u8*)&(arp->ip_src), sizeof(arp->ip_src));
> >> client_info = &(bond_info->rx_hashtbl[hash_index]);
> >> @@ -358,7 +372,7 @@ static void rlb_update_entry_from_arp(struct bonding *bond, struct arp_pkt *arp)
> >> bond_info->rx_ntt = 1;
> >> }
> >>
> >> - _unlock_rx_hashtbl(bond);
> >> + _unlock_hashtbl(bond);
> >> }
> >>
> >> static int rlb_arp_recv(struct sk_buff *skb, struct net_device *bond_dev, struct packet_type *ptype, struct net_device *orig_dev)
> >> @@ -402,38 +416,6 @@ out:
> >> return res;
> >> }
> >>
> >> -/* Caller must hold bond lock for read */
> >> -static struct slave *rlb_next_rx_slave(struct bonding *bond)
> >> -{
> >> - struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
> >> - struct slave *rx_slave, *slave, *start_at;
> >> - int i = 0;
> >> -
> >> - if (bond_info->next_rx_slave) {
> >> - start_at = bond_info->next_rx_slave;
> >> - } else {
> >> - start_at = bond->first_slave;
> >> - }
> >> -
> >> - rx_slave = NULL;
> >> -
> >> - bond_for_each_slave_from(bond, slave, i, start_at) {
> >> - if (SLAVE_IS_OK(slave)) {
> >> - if (!rx_slave) {
> >> - rx_slave = slave;
> >> - } else if (slave->speed > rx_slave->speed) {
> >> - rx_slave = slave;
> >> - }
> >> - }
> >> - }
> >> -
> >> - if (rx_slave) {
> >> - bond_info->next_rx_slave = rx_slave->next;
> >> - }
> >> -
> >> - return rx_slave;
> >> -}
> >> -
> >> /* teach the switch the mac of a disabled slave
> >> * on the primary for fault tolerance
> >> *
> >> @@ -468,14 +450,14 @@ static void rlb_clear_slave(struct bonding *bond, struct slave *slave)
> >> u32 index, next_index;
> >>
> >> /* clear slave from rx_hashtbl */
> >> - _lock_rx_hashtbl(bond);
> >> + _lock_hashtbl(bond);
> >>
> >> rx_hash_table = bond_info->rx_hashtbl;
> >> index = bond_info->rx_hashtbl_head;
> >> for (; index != RLB_NULL_INDEX; index = next_index) {
> >> next_index = rx_hash_table[index].next;
> >> if (rx_hash_table[index].slave == slave) {
> >> - struct slave *assigned_slave = rlb_next_rx_slave(bond);
> >> + struct slave *assigned_slave = alb_get_best_slave(bond, index);
> >>
> >> if (assigned_slave) {
> >> rx_hash_table[index].slave = assigned_slave;
> >> @@ -499,7 +481,7 @@ static void rlb_clear_slave(struct bonding *bond, struct slave *slave)
> >> }
> >> }
> >>
> >> - _unlock_rx_hashtbl(bond);
> >> + _unlock_hashtbl(bond);
> >>
> >> write_lock_bh(&bond->curr_slave_lock);
> >>
> >> @@ -558,7 +540,7 @@ static void rlb_update_rx_clients(struct bonding *bond)
> >> struct rlb_client_info *client_info;
> >> u32 hash_index;
> >>
> >> - _lock_rx_hashtbl(bond);
> >> + _lock_hashtbl(bond);
> >>
> >> hash_index = bond_info->rx_hashtbl_head;
> >> for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) {
> >> @@ -576,7 +558,7 @@ static void rlb_update_rx_clients(struct bonding *bond)
> >> */
> >> bond_info->rlb_update_delay_counter = RLB_UPDATE_DELAY;
> >>
> >> - _unlock_rx_hashtbl(bond);
> >> + _unlock_hashtbl(bond);
> >> }
> >>
> >> /* The slave was assigned a new mac address - update the clients */
> >> @@ -587,7 +569,7 @@ static void rlb_req_update_slave_clients(struct bonding *bond, struct slave *sla
> >> int ntt = 0;
> >> u32 hash_index;
> >>
> >> - _lock_rx_hashtbl(bond);
> >> + _lock_hashtbl(bond);
> >>
> >> hash_index = bond_info->rx_hashtbl_head;
> >> for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) {
> >> @@ -607,7 +589,7 @@ static void rlb_req_update_slave_clients(struct bonding *bond, struct slave *sla
> >> bond_info->rlb_update_retry_counter = RLB_UPDATE_RETRY;
> >> }
> >>
> >> - _unlock_rx_hashtbl(bond);
> >> + _unlock_hashtbl(bond);
> >> }
> >>
> >> /* mark all clients using src_ip to be updated */
> >> @@ -617,7 +599,7 @@ static void rlb_req_update_subnet_clients(struct bonding *bond, __be32 src_ip)
> >> struct rlb_client_info *client_info;
> >> u32 hash_index;
> >>
> >> - _lock_rx_hashtbl(bond);
> >> + _lock_hashtbl(bond);
> >>
> >> hash_index = bond_info->rx_hashtbl_head;
> >> for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) {
> >> @@ -643,7 +625,7 @@ static void rlb_req_update_subnet_clients(struct bonding *bond, __be32 src_ip)
> >> }
> >> }
> >>
> >> - _unlock_rx_hashtbl(bond);
> >> + _unlock_hashtbl(bond);
> >> }
> >>
> >> /* Caller must hold both bond and ptr locks for read */
> >> @@ -655,7 +637,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
> >> struct rlb_client_info *client_info;
> >> u32 hash_index = 0;
> >>
> >> - _lock_rx_hashtbl(bond);
> >> + _lock_hashtbl(bond);
> >>
> >> hash_index = _simple_hash((u8 *)&arp->ip_dst, sizeof(arp->ip_src));
> >> client_info = &(bond_info->rx_hashtbl[hash_index]);
> >> @@ -671,7 +653,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
> >>
> >> assigned_slave = client_info->slave;
> >> if (assigned_slave) {
> >> - _unlock_rx_hashtbl(bond);
> >> + _unlock_hashtbl(bond);
> >> return assigned_slave;
> >> }
> >> } else {
> >> @@ -687,7 +669,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
> >> }
> >> }
> >> /* assign a new slave */
> >> - assigned_slave = rlb_next_rx_slave(bond);
> >> + assigned_slave = alb_get_best_slave(bond, hash_index);
> >>
> >> if (assigned_slave) {
> >> client_info->ip_src = arp->ip_src;
> >> @@ -723,7 +705,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
> >> }
> >> }
> >>
> >> - _unlock_rx_hashtbl(bond);
> >> + _unlock_hashtbl(bond);
> >>
> >> return assigned_slave;
> >> }
> >> @@ -771,36 +753,6 @@ static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond)
> >> return tx_slave;
> >> }
> >>
> >> -/* Caller must hold bond lock for read */
> >> -static void rlb_rebalance(struct bonding *bond)
> >> -{
> >> - struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
> >> - struct slave *assigned_slave;
> >> - struct rlb_client_info *client_info;
> >> - int ntt;
> >> - u32 hash_index;
> >> -
> >> - _lock_rx_hashtbl(bond);
> >> -
> >> - ntt = 0;
> >> - hash_index = bond_info->rx_hashtbl_head;
> >> - for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) {
> >> - client_info = &(bond_info->rx_hashtbl[hash_index]);
> >> - assigned_slave = rlb_next_rx_slave(bond);
> >> - if (assigned_slave && (client_info->slave != assigned_slave)) {
> >> - client_info->slave = assigned_slave;
> >> - client_info->ntt = 1;
> >> - ntt = 1;
> >> - }
> >> - }
> >> -
> >> - /* update the team's flag only after the whole iteration */
> >> - if (ntt) {
> >> - bond_info->rx_ntt = 1;
> >> - }
> >> - _unlock_rx_hashtbl(bond);
> >> -}
> >> -
> >> /* Caller must hold rx_hashtbl lock */
> >> static void rlb_init_table_entry(struct rlb_client_info *entry)
> >> {
> >> @@ -817,8 +769,6 @@ static int rlb_initialize(struct bonding *bond)
> >> int size = RLB_HASH_TABLE_SIZE * sizeof(struct rlb_client_info);
> >> int i;
> >>
> >> - spin_lock_init(&(bond_info->rx_hashtbl_lock));
> >> -
> >> new_hashtbl = kmalloc(size, GFP_KERNEL);
> >> if (!new_hashtbl) {
> >> printk(KERN_ERR DRV_NAME
> >> @@ -826,7 +776,7 @@ static int rlb_initialize(struct bonding *bond)
> >> bond->dev->name);
> >> return -1;
> >> }
> >> - _lock_rx_hashtbl(bond);
> >> + _lock_hashtbl(bond);
> >>
> >> bond_info->rx_hashtbl = new_hashtbl;
> >>
> >> @@ -836,7 +786,7 @@ static int rlb_initialize(struct bonding *bond)
> >> rlb_init_table_entry(bond_info->rx_hashtbl + i);
> >> }
> >>
> >> - _unlock_rx_hashtbl(bond);
> >> + _unlock_hashtbl(bond);
> >>
> >> /*initialize packet type*/
> >> pk_type->type = cpu_to_be16(ETH_P_ARP);
> >> @@ -855,13 +805,13 @@ static void rlb_deinitialize(struct bonding *bond)
> >>
> >> dev_remove_pack(&(bond_info->rlb_pkt_type));
> >>
> >> - _lock_rx_hashtbl(bond);
> >> + _lock_hashtbl(bond);
> >>
> >> kfree(bond_info->rx_hashtbl);
> >> bond_info->rx_hashtbl = NULL;
> >> bond_info->rx_hashtbl_head = RLB_NULL_INDEX;
> >>
> >> - _unlock_rx_hashtbl(bond);
> >> + _unlock_hashtbl(bond);
> >> }
> >>
> >> static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id)
> >> @@ -869,7 +819,7 @@ static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id)
> >> struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
> >> u32 curr_index;
> >>
> >> - _lock_rx_hashtbl(bond);
> >> + _lock_hashtbl(bond);
> >>
> >> curr_index = bond_info->rx_hashtbl_head;
> >> while (curr_index != RLB_NULL_INDEX) {
> >> @@ -894,7 +844,7 @@ static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id)
> >> curr_index = next_index;
> >> }
> >>
> >> - _unlock_rx_hashtbl(bond);
> >> + _unlock_hashtbl(bond);
> >> }
> >>
> >> /*********************** tlb/rlb shared functions *********************/
> >> @@ -1521,11 +1471,6 @@ void bond_alb_monitor(struct work_struct *work)
> >> read_lock(&bond->lock);
> >> }
> >>
> >> - if (bond_info->rlb_rebalance) {
> >> - bond_info->rlb_rebalance = 0;
> >> - rlb_rebalance(bond);
> >> - }
> >> -
> >> /* check if clients need updating */
> >> if (bond_info->rx_ntt) {
> >> if (bond_info->rlb_update_delay_counter) {
> >> diff --git a/drivers/net/bonding/bond_alb.h b/drivers/net/bonding/bond_alb.h
> >> index b65fd29..09d755a 100644
> >> --- a/drivers/net/bonding/bond_alb.h
> >> +++ b/drivers/net/bonding/bond_alb.h
> >> @@ -90,7 +90,7 @@ struct tlb_slave_info {
> >> struct alb_bond_info {
> >> struct timer_list alb_timer;
> >> struct tlb_client_info *tx_hashtbl; /* Dynamically allocated */
> >> - spinlock_t tx_hashtbl_lock;
> >> + spinlock_t hashtbl_lock; /* lock for both tables */
> >> u32 unbalanced_load;
> >> int tx_rebalance_counter;
> >> int lp_counter;
> >> @@ -98,7 +98,6 @@ struct alb_bond_info {
> >> int rlb_enabled;
> >> struct packet_type rlb_pkt_type;
> >> struct rlb_client_info *rx_hashtbl; /* Receive hash table */
> >> - spinlock_t rx_hashtbl_lock;
> >> u32 rx_hashtbl_head;
> >> u8 rx_ntt; /* flag - need to transmit
> >> * to all rx clients
> >
> >Any thoughts on this, Jay?
>
> -J
>
> ---
> -Jay Vosburgh, IBM Linux Technology Center, fubar@us.ibm.com
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2009-09-29 0:13 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-09-11 21:11 [PATCH 2/4] bonding: make sure tx and rx hash tables stay in sync when using alb mode Andy Gospodarek
2009-09-16 23:36 ` Jay Vosburgh
2009-09-16 23:44 ` Andy Gospodarek
2009-09-18 15:36 ` Andy Gospodarek
2009-09-18 15:56 ` [PATCH 2/4 v3] " Andy Gospodarek
2009-09-28 22:00 ` Andy Gospodarek
2009-09-28 22:09 ` Jay Vosburgh
2009-09-29 0:13 ` Andy Gospodarek
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox