* [PATCH][RFC] bonding: delete migrated IP addresses from the rlb hash table
@ 2012-02-27 17:34 Jiri Bohac
2012-02-27 17:46 ` bonding: should rlb rx_hashtbl be reimplemented? Jiri Bohac
` (5 more replies)
0 siblings, 6 replies; 17+ messages in thread
From: Jiri Bohac @ 2012-02-27 17:34 UTC (permalink / raw)
To: Jay Vosburgh, Andy Gospodarek, netdev
Bonding in balance-alb mode records information from ARP packets
passing through the bond in a hash table (rx_hashtbl).
At certain situations (e.g. link change of a slave),
rlb_update_rx_clients() will send out ARP packets to update ARP
caches of other hosts on the network to achieve RX load balancing.
The problem is that once an IP address is recorded in the hash
table, it stays there indefinitely [1]. If this IP address is
migrated to a different host in the network, bonding still sends
out ARP packets that poison other systems' ARP caches with
invalid information.
This patch solves this by looking at all incoming ARP packets,
and checking if the source IP address is one of the source
addresses stored in the rx_hashtbl. If it is, the corresponding
hash table entries are removed. Thus, when an IP address is
migrated, the first ARP broadcast by its new owner will purge the
offending entries of rx_hashtbl.
(a simpler approach, where bonding would monitor IP address
changes on the local system does not work for setups like:
HostA --- NetworkA --- eth0-bond0-br0 --- NetworkB --- hostB
and an IP address migrating from HostB to HostA)
The hash table is hashed by ip_dst. To be able to do the above
check efficiently (not walking the whole hash table), we need a
reverse mapping (by ip_src).
I added three new members in struct rlb_client_info:
rx_hashtbl[x].reverse_first will point to the start of a list of
entries for which hash(ip_src) == x.
The list is linked with reverse_next and reverse_prev.
When an incoming ARP packet arrives at rlb_arp_recv()
rlb_purge_src_ip() can quickly walk only the entries on the
corresponding lists, i.e. the entries that are likely to contain
the offending IP address.
-----
[1]: ... unless it is replaced with another value that happens to
have the same hash -- rx_hashtbl is not a proper hash table. In
case of a collision, the old entry is replaced by the new one.
Signed-off-by: Jiri Bohac <jbohac@suse.cz>
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -84,6 +84,9 @@ static inline struct arp_pkt *arp_pkt(const struct sk_buff *skb)
/* Forward declaration */
static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[]);
+static void rlb_purge_src_ip(struct bonding *bond, u32 ip_src);
+static void rlb_delete_table_entry_reverse(struct bonding* bond, u32 index);
+static void rlb_set_reverse_entry(struct bonding* bond, u32 ip_src_hash, u32 ip_dst_hash);
static inline u8 _simple_hash(const u8 *hash_start, int hash_size)
{
@@ -366,6 +369,15 @@ static void rlb_arp_recv(struct sk_buff *skb, struct bonding *bond,
return;
}
+ /* We received an ARP from arp->ip_src.
+ * We might have used this IP address previously (on the bonding host
+ * itself or on a system that is bridged together with the bond), but
+ * now the IP has migrated elsewhere and we must prevent sending out
+ * client updates with this IP address as the source.
+ * Clean up all hash table entries that have this address as ip_src.
+ */
+ rlb_purge_src_ip(bond, arp->ip_src);
+
if (arp->op_code == htons(ARPOP_REPLY)) {
/* update rx hash table for this ARP */
rlb_update_entry_from_arp(bond, arp);
@@ -657,6 +669,13 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
assigned_slave = rlb_next_rx_slave(bond);
if (assigned_slave) {
+ if (!(client_info->assigned && client_info->ip_src == arp->ip_src)) {
+ /* ip_src is going to be updated, fix the reverse hashing */
+ u32 hash_src = _simple_hash((u8 *)&arp->ip_src, sizeof(arp->ip_src));
+ rlb_delete_table_entry_reverse(bond, hash_index);
+ rlb_set_reverse_entry(bond, hash_src, hash_index);
+ }
+
client_info->ip_src = arp->ip_src;
client_info->ip_dst = arp->ip_dst;
/* arp->mac_dst is broadcast for arp reqeusts.
@@ -769,11 +788,111 @@ static void rlb_rebalance(struct bonding *bond)
}
/* Caller must hold rx_hashtbl lock */
-static void rlb_init_table_entry(struct rlb_client_info *entry)
+static void rlb_init_table_entry_forward(struct rlb_client_info *entry)
{
- memset(entry, 0, sizeof(struct rlb_client_info));
entry->next = RLB_NULL_INDEX;
entry->prev = RLB_NULL_INDEX;
+ entry->assigned = 0;
+ entry->slave = NULL;
+ entry->tag = 0;
+}
+static void rlb_init_table_entry_reverse(struct rlb_client_info *entry)
+{
+ entry->reverse_first = RLB_NULL_INDEX;
+ entry->reverse_prev = RLB_NULL_INDEX;
+ entry->reverse_next = RLB_NULL_INDEX;
+}
+
+static void rlb_init_table_entry(struct rlb_client_info *entry)
+{
+ memset(entry, 0, sizeof(struct rlb_client_info));
+ rlb_init_table_entry_forward(entry);
+ rlb_init_table_entry_reverse(entry);
+}
+
+static void rlb_delete_table_entry_forward(struct bonding* bond, u32 index)
+{
+ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
+ u32 next_index = bond_info->rx_hashtbl[index].next;
+ u32 prev_index = bond_info->rx_hashtbl[index].prev;
+
+ if (index == bond_info->rx_hashtbl_head) {
+ bond_info->rx_hashtbl_head = next_index;
+ }
+ if (prev_index != RLB_NULL_INDEX) {
+ bond_info->rx_hashtbl[prev_index].next = next_index;
+ }
+ if (next_index != RLB_NULL_INDEX) {
+ bond_info->rx_hashtbl[next_index].prev = prev_index;
+ }
+}
+
+static void rlb_delete_table_entry_reverse(struct bonding* bond, u32 index)
+{
+ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
+ u32 next_index = bond_info->rx_hashtbl[index].reverse_next;
+ u32 prev_index = bond_info->rx_hashtbl[index].reverse_prev;
+
+ bond_info->rx_hashtbl[index].reverse_next = RLB_NULL_INDEX;
+ bond_info->rx_hashtbl[index].reverse_prev = RLB_NULL_INDEX;
+
+ if (next_index != RLB_NULL_INDEX) {
+ bond_info->rx_hashtbl[next_index].reverse_prev = prev_index;
+ }
+
+ if (prev_index == RLB_NULL_INDEX)
+ return;
+
+ /* is prev_index pointing to the head of this chain? */
+ if (bond_info->rx_hashtbl[prev_index].reverse_first == index)
+ bond_info->rx_hashtbl[prev_index].reverse_first = next_index;
+ else
+ bond_info->rx_hashtbl[prev_index].reverse_next = next_index;
+
+}
+
+static void rlb_delete_table_entry(struct bonding* bond, u32 index)
+{
+ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
+ struct rlb_client_info *entry = &(bond_info->rx_hashtbl[index]);
+
+ rlb_delete_table_entry_forward(bond, index);
+ rlb_init_table_entry_forward(entry);
+
+ rlb_delete_table_entry_reverse(bond, index);
+}
+
+static void rlb_set_reverse_entry(struct bonding* bond, u32 ip_src_hash, u32 ip_dst_hash)
+{
+ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
+ u32 next;
+
+ bond_info->rx_hashtbl[ip_dst_hash].reverse_prev = ip_src_hash;
+ next = bond_info->rx_hashtbl[ip_src_hash].reverse_first;
+ bond_info->rx_hashtbl[ip_dst_hash].reverse_next = next;
+ if (next != RLB_NULL_INDEX)
+ bond_info->rx_hashtbl[next].reverse_prev = ip_dst_hash;
+ bond_info->rx_hashtbl[ip_src_hash].reverse_first = ip_dst_hash;
+}
+
+/* deletes all entries with a given ip_src */
+static void rlb_purge_src_ip(struct bonding *bond, u32 ip_src)
+{
+ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
+ u32 ip_src_hash = _simple_hash((u8*)&(ip_src), sizeof(ip_src));
+ u32 index;
+
+ _lock_rx_hashtbl_bh(bond);
+
+ index = bond_info->rx_hashtbl[ip_src_hash].reverse_first;
+ while (index != RLB_NULL_INDEX) {
+ struct rlb_client_info *entry = &(bond_info->rx_hashtbl[index]);
+ u32 next_index = entry->reverse_next;
+ if (entry->ip_src == ip_src)
+ rlb_delete_table_entry(bond, index);
+ index = next_index;
+ }
+ _unlock_rx_hashtbl_bh(bond);
}
static int rlb_initialize(struct bonding *bond)
@@ -831,21 +950,9 @@ static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id)
while (curr_index != RLB_NULL_INDEX) {
struct rlb_client_info *curr = &(bond_info->rx_hashtbl[curr_index]);
u32 next_index = bond_info->rx_hashtbl[curr_index].next;
- u32 prev_index = bond_info->rx_hashtbl[curr_index].prev;
- if (curr->tag && (curr->vlan_id == vlan_id)) {
- if (curr_index == bond_info->rx_hashtbl_head) {
- bond_info->rx_hashtbl_head = next_index;
- }
- if (prev_index != RLB_NULL_INDEX) {
- bond_info->rx_hashtbl[prev_index].next = next_index;
- }
- if (next_index != RLB_NULL_INDEX) {
- bond_info->rx_hashtbl[next_index].prev = prev_index;
- }
-
- rlb_init_table_entry(curr);
- }
+ if (curr->tag && (curr->vlan_id == vlan_id))
+ rlb_delete_table_entry(bond, curr_index);
curr_index = next_index;
}
diff --git a/drivers/net/bonding/bond_alb.h b/drivers/net/bonding/bond_alb.h
index 90f140a..f10ecf7 100644
--- a/drivers/net/bonding/bond_alb.h
+++ b/drivers/net/bonding/bond_alb.h
@@ -108,6 +108,9 @@ struct rlb_client_info {
struct slave *slave; /* the slave assigned to this client */
u8 tag; /* flag - need to tag skb */
unsigned short vlan_id; /* VLAN tag associated with IP address */
+ u32 reverse_next; /* next entry with same hash(ip_src) */
+ u32 reverse_prev; /* prev entry with same hash(ip_src) */
+ u32 reverse_first; /* first entry with hash(ip_src) == this entry's index */
};
struct tlb_slave_info {
--
Jiri Bohac <jbohac@suse.cz>
SUSE Labs, SUSE CZ
^ permalink raw reply related [flat|nested] 17+ messages in thread
* bonding: should rlb rx_hashtbl be reimplemented?
2012-02-27 17:34 [PATCH][RFC] bonding: delete migrated IP addresses from the rlb hash table Jiri Bohac
@ 2012-02-27 17:46 ` Jiri Bohac
2012-02-29 13:55 ` [PATCH net] bonding:update rlb entry for arp request Weiping Pan
` (4 subsequent siblings)
5 siblings, 0 replies; 17+ messages in thread
From: Jiri Bohac @ 2012-02-27 17:46 UTC (permalink / raw)
To: Jiri Bohac; +Cc: Jay Vosburgh, Andy Gospodarek, netdev
On Mon, Feb 27, 2012 at 06:34:00PM +0100, Jiri Bohac wrote:
> [1]: ... unless it is replaced with another value that happens to
> have the same hash -- rx_hashtbl is not a proper hash table. In
> case of a collision, the old entry is replaced by the new one.
Unrelated to this patch:
I believe that with the current implementation, if one needs to
communicate with two hosts with colliding IP addresses, RLB can't
work properly unless you renumber your network (and it will
generate an ARP storm as a side effect!).
Has anyone thought of replacing this strange hash table
implementation with a proper hash table, with colliding entries
in linked lists, some garbage collection when the hash grows too
large (configurable hash table size), etc?
Also, once the code is reworked, it could easily be made to work with
IPv6/ND in additiopn to IPv4/ARP.
I am willing to give this a try -- any thoughts?
--
Jiri Bohac <jbohac@suse.cz>
SUSE Labs, SUSE CZ
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH net] bonding:update rlb entry for arp request
2012-02-27 17:34 [PATCH][RFC] bonding: delete migrated IP addresses from the rlb hash table Jiri Bohac
2012-02-27 17:46 ` bonding: should rlb rx_hashtbl be reimplemented? Jiri Bohac
@ 2012-02-29 13:55 ` Weiping Pan
2012-02-29 18:26 ` Jiri Bohac
2012-02-29 13:58 ` [PATCH][RFC] bonding: delete migrated IP addresses from the rlb hash table WeipingPan
` (3 subsequent siblings)
5 siblings, 1 reply; 17+ messages in thread
From: Weiping Pan @ 2012-02-29 13:55 UTC (permalink / raw)
To: netdev; +Cc: jbohac, fubar, andy, Weiping Pan
rlb_arp_recv() only handles arp reply packets,
but I think arp request packets contain the latest information about
clients(ip and mac), so we should update rlb entry for arp request.
This patch can resolve a problem that if an IP address is migrated to a
different host in the network, the corresponding rlb entry still contains the
old mac address for this IP, and bonding will send out invalid ARP packets
that will poison other systems' ARP caches.
Jiri Bohac <jbohac@suse.cz> found this problem and posted a patch,
but I don't know whether this patch can fix his problem.
Signed-off-by: Weiping Pan <panweiping3@gmail.com>
---
drivers/net/bonding/bond_alb.c | 9 +++------
1 files changed, 3 insertions(+), 6 deletions(-)
diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index f820b26..fe881a9 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -332,7 +332,6 @@ static void rlb_update_entry_from_arp(struct bonding *bond, struct arp_pkt *arp)
client_info = &(bond_info->rx_hashtbl[hash_index]);
if ((client_info->assigned) &&
- (client_info->ip_src == arp->ip_dst) &&
(client_info->ip_dst == arp->ip_src) &&
(compare_ether_addr_64bits(client_info->mac_dst, arp->mac_src))) {
/* update the clients MAC address */
@@ -366,11 +365,9 @@ static void rlb_arp_recv(struct sk_buff *skb, struct bonding *bond,
return;
}
- if (arp->op_code == htons(ARPOP_REPLY)) {
- /* update rx hash table for this ARP */
- rlb_update_entry_from_arp(bond, arp);
- pr_debug("Server received an ARP Reply from client\n");
- }
+ /* update rx hash table for this ARP */
+ rlb_update_entry_from_arp(bond, arp);
+ pr_debug("Server received an ARP Request/Reply from client\n");
}
/* Caller must hold bond lock for read */
--
1.7.4.4
^ permalink raw reply related [flat|nested] 17+ messages in thread
* Re: [PATCH][RFC] bonding: delete migrated IP addresses from the rlb hash table
2012-02-27 17:34 [PATCH][RFC] bonding: delete migrated IP addresses from the rlb hash table Jiri Bohac
2012-02-27 17:46 ` bonding: should rlb rx_hashtbl be reimplemented? Jiri Bohac
2012-02-29 13:55 ` [PATCH net] bonding:update rlb entry for arp request Weiping Pan
@ 2012-02-29 13:58 ` WeipingPan
2012-02-29 18:49 ` Jiri Bohac
2012-02-29 18:59 ` Jay Vosburgh
` (2 subsequent siblings)
5 siblings, 1 reply; 17+ messages in thread
From: WeipingPan @ 2012-02-29 13:58 UTC (permalink / raw)
To: Jiri Bohac; +Cc: Jay Vosburgh, Andy Gospodarek, netdev
On 02/28/2012 01:34 AM, Jiri Bohac wrote:
> Bonding in balance-alb mode records information from ARP packets
> passing through the bond in a hash table (rx_hashtbl).
>
> At certain situations (e.g. link change of a slave),
> rlb_update_rx_clients() will send out ARP packets to update ARP
> caches of other hosts on the network to achieve RX load balancing.
>
> The problem is that once an IP address is recorded in the hash
> table, it stays there indefinitely [1]. If this IP address is
> migrated to a different host in the network, bonding still sends
> out ARP packets that poison other systems' ARP caches with
> invalid information.
I met this problem, too.
> This patch solves this by looking at all incoming ARP packets,
> and checking if the source IP address is one of the source
> addresses stored in the rx_hashtbl. If it is, the corresponding
> hash table entries are removed. Thus, when an IP address is
> migrated, the first ARP broadcast by its new owner will purge the
> offending entries of rx_hashtbl.
>
> (a simpler approach, where bonding would monitor IP address
> changes on the local system does not work for setups like:
> HostA --- NetworkA --- eth0-bond0-br0 --- NetworkB --- hostB
> and an IP address migrating from HostB to HostA)
Could you explain in detail or step by step how to reproduce your problem ?
I made a patch but I do not know whether it can fix your problem.
thanks
Weiping Pan
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH net] bonding:update rlb entry for arp request
2012-02-29 13:55 ` [PATCH net] bonding:update rlb entry for arp request Weiping Pan
@ 2012-02-29 18:26 ` Jiri Bohac
0 siblings, 0 replies; 17+ messages in thread
From: Jiri Bohac @ 2012-02-29 18:26 UTC (permalink / raw)
To: Weiping Pan; +Cc: netdev, jbohac, fubar, andy
On Wed, Feb 29, 2012 at 09:55:36PM +0800, Weiping Pan wrote:
> rlb_arp_recv() only handles arp reply packets,
> but I think arp request packets contain the latest information about
> clients(ip and mac), so we should update rlb entry for arp request.
when an ARP request arrives, a reply will be generated by the ARP
protocol and that is going to be intercepted by rlb_arp_xmit().
When we start the "connection" by sending an ARP request,
again, this is intercepted by rlb_arp_xmit(). But at that point
we don't know the client's MAC address - that's why we handle the
ARP reply in rlb_arp_recv().
> This patch can resolve a problem that if an IP address is migrated to a
> different host in the network
No, it can't - it in no way removes entries from the hash table.
And these entries that list an IP address as ours while it is no
longer ours cause the problem.
> the corresponding rlb entry still contains the
> old mac address for this IP, and bonding will send out invalid ARP packets
> that will poison other systems' ARP caches.
yes, but it poisons the caches with an invalid
client_info->ip_src (IP) + client_info->slave->dev->dev_addr (MAC)
combination.
rlb_update_entry_from_arp() updates the _client_ MAC address,
i.e. client_info->mac_dst.
--
Jiri Bohac <jbohac@suse.cz>
SUSE Labs, SUSE CZ
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH][RFC] bonding: delete migrated IP addresses from the rlb hash table
2012-02-29 13:58 ` [PATCH][RFC] bonding: delete migrated IP addresses from the rlb hash table WeipingPan
@ 2012-02-29 18:49 ` Jiri Bohac
0 siblings, 0 replies; 17+ messages in thread
From: Jiri Bohac @ 2012-02-29 18:49 UTC (permalink / raw)
To: WeipingPan; +Cc: Jiri Bohac, Jay Vosburgh, Andy Gospodarek, netdev
On Wed, Feb 29, 2012 at 09:58:53PM +0800, WeipingPan wrote:
> On 02/28/2012 01:34 AM, Jiri Bohac wrote:
> >Bonding in balance-alb mode records information from ARP packets
> >passing through the bond in a hash table (rx_hashtbl).
> >
> >At certain situations (e.g. link change of a slave),
> >rlb_update_rx_clients() will send out ARP packets to update ARP
> >caches of other hosts on the network to achieve RX load balancing.
> >
> >The problem is that once an IP address is recorded in the hash
> >table, it stays there indefinitely [1]. If this IP address is
> >migrated to a different host in the network, bonding still sends
> >out ARP packets that poison other systems' ARP caches with
> >invalid information.
> I met this problem, too.
>
> >This patch solves this by looking at all incoming ARP packets,
> >and checking if the source IP address is one of the source
> >addresses stored in the rx_hashtbl. If it is, the corresponding
> >hash table entries are removed. Thus, when an IP address is
> >migrated, the first ARP broadcast by its new owner will purge the
> >offending entries of rx_hashtbl.
> >
> > (a simpler approach, where bonding would monitor IP address
> > changes on the local system does not work for setups like:
> > HostA --- NetworkA --- eth0-bond0-br0 --- NetworkB --- hostB
> > and an IP address migrating from HostB to HostA)
> Could you explain in detail or step by step how to reproduce your problem ?
> I made a patch but I do not know whether it can fix your problem.
> > HostA --- NetworkA --- eth0-bond0-br0 --- NetworkB --- hostB
the simplest setup is something like:
---- HostC ------
HostA ----- switch ---|-- eth0 - bond0 |
HostB -----/ \--|-- eth1 -/ |
-----------------
HostA has IP address A, HostB has IP address B, HostC has IP
address C).
Set link of eth1 down.
Ping from C to A. This will create the rlb hash table entry with
ip_src = C and ip_dst = A and tx_slave = eth0.
Delete IP address C from bond0.
Assign IP address C to HostB
Set link of eth1 up.
Set link of eth0 down.
This should cause the hash table to be walked and all clients
(HostA included) "updated" win a unicast ARP REPLY sent by
rlb_update_client(). The reply will claim that IP address C has
the MAC address of eth1.
If at this point HostA communicated with HostB using IP address
C, communication will be broken, because the ARP cache of Host A
will be poisoned.
My patch would delete the bad hash table entry when
it sees an ARP packet from Host B from IP address C.
Now, this packet is not guaranteed to be broadcast
(imagine HostA sending a broadcast ARP request for IP address C
and HostB replying with an unicast ARP reply.)
But setups that migrate IP addresses from host to hosts often use
gratuitous ARPs to update ARP caches after the IP address
migration and that would make sure the hash table entry is
deleted.
--
Jiri Bohac <jbohac@suse.cz>
SUSE Labs, SUSE CZ
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH][RFC] bonding: delete migrated IP addresses from the rlb hash table
2012-02-27 17:34 [PATCH][RFC] bonding: delete migrated IP addresses from the rlb hash table Jiri Bohac
` (2 preceding siblings ...)
2012-02-29 13:58 ` [PATCH][RFC] bonding: delete migrated IP addresses from the rlb hash table WeipingPan
@ 2012-02-29 18:59 ` Jay Vosburgh
2012-03-01 2:12 ` Jay Vosburgh
2012-03-23 7:10 ` WeipingPan
5 siblings, 0 replies; 17+ messages in thread
From: Jay Vosburgh @ 2012-02-29 18:59 UTC (permalink / raw)
To: Jiri Bohac; +Cc: Andy Gospodarek, netdev
Jiri Bohac <jbohac@suse.cz> wrote:
>Bonding in balance-alb mode records information from ARP packets
>passing through the bond in a hash table (rx_hashtbl).
>
>At certain situations (e.g. link change of a slave),
>rlb_update_rx_clients() will send out ARP packets to update ARP
>caches of other hosts on the network to achieve RX load balancing.
>
>The problem is that once an IP address is recorded in the hash
>table, it stays there indefinitely [1]. If this IP address is
>migrated to a different host in the network, bonding still sends
>out ARP packets that poison other systems' ARP caches with
>invalid information.
>
>This patch solves this by looking at all incoming ARP packets,
>and checking if the source IP address is one of the source
>addresses stored in the rx_hashtbl. If it is, the corresponding
>hash table entries are removed. Thus, when an IP address is
>migrated, the first ARP broadcast by its new owner will purge the
>offending entries of rx_hashtbl.
>
> (a simpler approach, where bonding would monitor IP address
> changes on the local system does not work for setups like:
> HostA --- NetworkA --- eth0-bond0-br0 --- NetworkB --- hostB
> and an IP address migrating from HostB to HostA)
>
>The hash table is hashed by ip_dst. To be able to do the above
>check efficiently (not walking the whole hash table), we need a
>reverse mapping (by ip_src).
>
>I added three new members in struct rlb_client_info:
> rx_hashtbl[x].reverse_first will point to the start of a list of
> entries for which hash(ip_src) == x.
> The list is linked with reverse_next and reverse_prev.
>
>When an incoming ARP packet arrives at rlb_arp_recv()
>rlb_purge_src_ip() can quickly walk only the entries on the
>corresponding lists, i.e. the entries that are likely to contain
>the offending IP address.
Just a note that I'm going to set this up and test it out today.
I think I understand what's going on and how the fix works, but I need
to see it in action to make sure I'm understanding correctly.
I would also point out that there are a few style issues in the
patch, e.g., several occurances of
if (something) {
one_line_here_doesnt_need_braces();
}
I know there is some of this in the original code, but if you're
refactoring that anyway, I don't think it's a problem to correct the
style. There are also a few cases of pointers referenced as "struct
bonding* bond" instead of "struct bonding *bond".
Also, I'm not too sure about the "reverse" nomenclature; it's
not really reverse in the ordering sense, it's just another hashing
against a different key. Perhaps calling the original "dst" and the new
hash "src" or the like would be clearer. It might also be possible to
combine the next, prev and first fields into a structure so that the
add, remove, etc, functions could use common code instead two almost (?)
identical functions that simply reference different field names.
This is a separate discussion from whether or not the not-quite
hash table here should be replaced entirely; perhaps replacement with a
normal hash table that keys on both the source and destination IP
addresses would work better overall as a replacement. In that case,
destination IPs might have multiple entries if there are multiple local
source IP addresses connecting to a single remote IP destination, but
that seems like an uncommon case. This might also permit unifying the
RX and TX tables.
The extant hash-like table does have the advantage that no
allocations are required to add or remove entries, which I'd guess is
why it was implemented the way it was.
-J
>-----
>[1]: ... unless it is replaced with another value that happens to
>have the same hash -- rx_hashtbl is not a proper hash table. In
>case of a collision, the old entry is replaced by the new one.
>
>Signed-off-by: Jiri Bohac <jbohac@suse.cz>
>
>--- a/drivers/net/bonding/bond_alb.c
>+++ b/drivers/net/bonding/bond_alb.c
>@@ -84,6 +84,9 @@ static inline struct arp_pkt *arp_pkt(const struct sk_buff *skb)
>
> /* Forward declaration */
> static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[]);
>+static void rlb_purge_src_ip(struct bonding *bond, u32 ip_src);
>+static void rlb_delete_table_entry_reverse(struct bonding* bond, u32 index);
>+static void rlb_set_reverse_entry(struct bonding* bond, u32 ip_src_hash, u32 ip_dst_hash);
>
> static inline u8 _simple_hash(const u8 *hash_start, int hash_size)
> {
>@@ -366,6 +369,15 @@ static void rlb_arp_recv(struct sk_buff *skb, struct bonding *bond,
> return;
> }
>
>+ /* We received an ARP from arp->ip_src.
>+ * We might have used this IP address previously (on the bonding host
>+ * itself or on a system that is bridged together with the bond), but
>+ * now the IP has migrated elsewhere and we must prevent sending out
>+ * client updates with this IP address as the source.
>+ * Clean up all hash table entries that have this address as ip_src.
>+ */
>+ rlb_purge_src_ip(bond, arp->ip_src);
>+
> if (arp->op_code == htons(ARPOP_REPLY)) {
> /* update rx hash table for this ARP */
> rlb_update_entry_from_arp(bond, arp);
>@@ -657,6 +669,13 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
> assigned_slave = rlb_next_rx_slave(bond);
>
> if (assigned_slave) {
>+ if (!(client_info->assigned && client_info->ip_src == arp->ip_src)) {
>+ /* ip_src is going to be updated, fix the reverse hashing */
>+ u32 hash_src = _simple_hash((u8 *)&arp->ip_src, sizeof(arp->ip_src));
>+ rlb_delete_table_entry_reverse(bond, hash_index);
>+ rlb_set_reverse_entry(bond, hash_src, hash_index);
>+ }
>+
> client_info->ip_src = arp->ip_src;
> client_info->ip_dst = arp->ip_dst;
> /* arp->mac_dst is broadcast for arp reqeusts.
>@@ -769,11 +788,111 @@ static void rlb_rebalance(struct bonding *bond)
> }
>
> /* Caller must hold rx_hashtbl lock */
>-static void rlb_init_table_entry(struct rlb_client_info *entry)
>+static void rlb_init_table_entry_forward(struct rlb_client_info *entry)
> {
>- memset(entry, 0, sizeof(struct rlb_client_info));
> entry->next = RLB_NULL_INDEX;
> entry->prev = RLB_NULL_INDEX;
>+ entry->assigned = 0;
>+ entry->slave = NULL;
>+ entry->tag = 0;
>+}
>+static void rlb_init_table_entry_reverse(struct rlb_client_info *entry)
>+{
>+ entry->reverse_first = RLB_NULL_INDEX;
>+ entry->reverse_prev = RLB_NULL_INDEX;
>+ entry->reverse_next = RLB_NULL_INDEX;
>+}
>+
>+static void rlb_init_table_entry(struct rlb_client_info *entry)
>+{
>+ memset(entry, 0, sizeof(struct rlb_client_info));
>+ rlb_init_table_entry_forward(entry);
>+ rlb_init_table_entry_reverse(entry);
>+}
>+
>+static void rlb_delete_table_entry_forward(struct bonding* bond, u32 index)
>+{
>+ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
>+ u32 next_index = bond_info->rx_hashtbl[index].next;
>+ u32 prev_index = bond_info->rx_hashtbl[index].prev;
>+
>+ if (index == bond_info->rx_hashtbl_head) {
>+ bond_info->rx_hashtbl_head = next_index;
>+ }
>+ if (prev_index != RLB_NULL_INDEX) {
>+ bond_info->rx_hashtbl[prev_index].next = next_index;
>+ }
>+ if (next_index != RLB_NULL_INDEX) {
>+ bond_info->rx_hashtbl[next_index].prev = prev_index;
>+ }
>+}
>+
>+static void rlb_delete_table_entry_reverse(struct bonding* bond, u32 index)
>+{
>+ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
>+ u32 next_index = bond_info->rx_hashtbl[index].reverse_next;
>+ u32 prev_index = bond_info->rx_hashtbl[index].reverse_prev;
>+
>+ bond_info->rx_hashtbl[index].reverse_next = RLB_NULL_INDEX;
>+ bond_info->rx_hashtbl[index].reverse_prev = RLB_NULL_INDEX;
>+
>+ if (next_index != RLB_NULL_INDEX) {
>+ bond_info->rx_hashtbl[next_index].reverse_prev = prev_index;
>+ }
>+
>+ if (prev_index == RLB_NULL_INDEX)
>+ return;
>+
>+ /* is prev_index pointing to the head of this chain? */
>+ if (bond_info->rx_hashtbl[prev_index].reverse_first == index)
>+ bond_info->rx_hashtbl[prev_index].reverse_first = next_index;
>+ else
>+ bond_info->rx_hashtbl[prev_index].reverse_next = next_index;
>+
>+}
>+
>+static void rlb_delete_table_entry(struct bonding* bond, u32 index)
>+{
>+ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
>+ struct rlb_client_info *entry = &(bond_info->rx_hashtbl[index]);
>+
>+ rlb_delete_table_entry_forward(bond, index);
>+ rlb_init_table_entry_forward(entry);
>+
>+ rlb_delete_table_entry_reverse(bond, index);
>+}
>+
>+static void rlb_set_reverse_entry(struct bonding* bond, u32 ip_src_hash, u32 ip_dst_hash)
>+{
>+ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
>+ u32 next;
>+
>+ bond_info->rx_hashtbl[ip_dst_hash].reverse_prev = ip_src_hash;
>+ next = bond_info->rx_hashtbl[ip_src_hash].reverse_first;
>+ bond_info->rx_hashtbl[ip_dst_hash].reverse_next = next;
>+ if (next != RLB_NULL_INDEX)
>+ bond_info->rx_hashtbl[next].reverse_prev = ip_dst_hash;
>+ bond_info->rx_hashtbl[ip_src_hash].reverse_first = ip_dst_hash;
>+}
>+
>+/* deletes all entries with a given ip_src */
>+static void rlb_purge_src_ip(struct bonding *bond, u32 ip_src)
>+{
>+ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
>+ u32 ip_src_hash = _simple_hash((u8*)&(ip_src), sizeof(ip_src));
>+ u32 index;
>+
>+ _lock_rx_hashtbl_bh(bond);
>+
>+ index = bond_info->rx_hashtbl[ip_src_hash].reverse_first;
>+ while (index != RLB_NULL_INDEX) {
>+ struct rlb_client_info *entry = &(bond_info->rx_hashtbl[index]);
>+ u32 next_index = entry->reverse_next;
>+ if (entry->ip_src == ip_src)
>+ rlb_delete_table_entry(bond, index);
>+ index = next_index;
>+ }
>+ _unlock_rx_hashtbl_bh(bond);
> }
>
> static int rlb_initialize(struct bonding *bond)
>@@ -831,21 +950,9 @@ static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id)
> while (curr_index != RLB_NULL_INDEX) {
> struct rlb_client_info *curr = &(bond_info->rx_hashtbl[curr_index]);
> u32 next_index = bond_info->rx_hashtbl[curr_index].next;
>- u32 prev_index = bond_info->rx_hashtbl[curr_index].prev;
>
>- if (curr->tag && (curr->vlan_id == vlan_id)) {
>- if (curr_index == bond_info->rx_hashtbl_head) {
>- bond_info->rx_hashtbl_head = next_index;
>- }
>- if (prev_index != RLB_NULL_INDEX) {
>- bond_info->rx_hashtbl[prev_index].next = next_index;
>- }
>- if (next_index != RLB_NULL_INDEX) {
>- bond_info->rx_hashtbl[next_index].prev = prev_index;
>- }
>-
>- rlb_init_table_entry(curr);
>- }
>+ if (curr->tag && (curr->vlan_id == vlan_id))
>+ rlb_delete_table_entry(bond, curr_index);
>
> curr_index = next_index;
> }
>diff --git a/drivers/net/bonding/bond_alb.h b/drivers/net/bonding/bond_alb.h
>index 90f140a..f10ecf7 100644
>--- a/drivers/net/bonding/bond_alb.h
>+++ b/drivers/net/bonding/bond_alb.h
>@@ -108,6 +108,9 @@ struct rlb_client_info {
> struct slave *slave; /* the slave assigned to this client */
> u8 tag; /* flag - need to tag skb */
> unsigned short vlan_id; /* VLAN tag associated with IP address */
>+ u32 reverse_next; /* next entry with same hash(ip_src) */
>+ u32 reverse_prev; /* prev entry with same hash(ip_src) */
>+ u32 reverse_first; /* first entry with hash(ip_src) == this entry's index */
> };
>
> struct tlb_slave_info {
>
>--
>Jiri Bohac <jbohac@suse.cz>
>SUSE Labs, SUSE CZ
>
---
-Jay Vosburgh, IBM Linux Technology Center, fubar@us.ibm.com
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH][RFC] bonding: delete migrated IP addresses from the rlb hash table
2012-02-27 17:34 [PATCH][RFC] bonding: delete migrated IP addresses from the rlb hash table Jiri Bohac
` (3 preceding siblings ...)
2012-02-29 18:59 ` Jay Vosburgh
@ 2012-03-01 2:12 ` Jay Vosburgh
2012-03-01 3:58 ` WeipingPan
2012-03-07 16:02 ` [PATCH][RFC] bonding: delete migrated IP addresses from the rlb hash table Jiri Bohac
2012-03-23 7:10 ` WeipingPan
5 siblings, 2 replies; 17+ messages in thread
From: Jay Vosburgh @ 2012-03-01 2:12 UTC (permalink / raw)
To: Jiri Bohac; +Cc: Andy Gospodarek, netdev
Jiri Bohac <jbohac@suse.cz> wrote:
>Bonding in balance-alb mode records information from ARP packets
>passing through the bond in a hash table (rx_hashtbl).
>
>At certain situations (e.g. link change of a slave),
>rlb_update_rx_clients() will send out ARP packets to update ARP
>caches of other hosts on the network to achieve RX load balancing.
>
>The problem is that once an IP address is recorded in the hash
>table, it stays there indefinitely [1]. If this IP address is
>migrated to a different host in the network, bonding still sends
>out ARP packets that poison other systems' ARP caches with
>invalid information.
>
>This patch solves this by looking at all incoming ARP packets,
>and checking if the source IP address is one of the source
>addresses stored in the rx_hashtbl. If it is, the corresponding
>hash table entries are removed. Thus, when an IP address is
>migrated, the first ARP broadcast by its new owner will purge the
>offending entries of rx_hashtbl.
>
> (a simpler approach, where bonding would monitor IP address
> changes on the local system does not work for setups like:
> HostA --- NetworkA --- eth0-bond0-br0 --- NetworkB --- hostB
> and an IP address migrating from HostB to HostA)
>
>The hash table is hashed by ip_dst. To be able to do the above
>check efficiently (not walking the whole hash table), we need a
>reverse mapping (by ip_src).
>
>I added three new members in struct rlb_client_info:
> rx_hashtbl[x].reverse_first will point to the start of a list of
> entries for which hash(ip_src) == x.
> The list is linked with reverse_next and reverse_prev.
>
>When an incoming ARP packet arrives at rlb_arp_recv()
>rlb_purge_src_ip() can quickly walk only the entries on the
>corresponding lists, i.e. the entries that are likely to contain
>the offending IP address.
I've done some initial testing with this, and so far I'm seeing
one problem: every time the local host (with bonding) sends a broadcast
ARP, that ends up flushing the entire RLB table. Well, all entries that
match the IP on the bond that's sending an ARP request, which is just
one address in my testing.
Anyway, this happens because the switch forwards the broadcast
ARP back around to one of the other bond slaves, and then that
"incoming" ARP bears an ip_src of our already-in-use IP address, and
that matches everything in the table.
So, yes, this does work to flush entries out of the table using
a particular IP source address, but as a practical matter the false
positives will vastly outnumber the valid hits. Yes, the table will
repopulate itself, but that does have some cost if, e.g., flows bounce
around from one interface to another. How often this happens would be a
function of how often the bond has to ARP for an unknown address (i.e.,
send a broadcast ARP).
Perhaps a check that the ip_src being flushed is not actually in
use locally is warranted?
-J
>-----
>[1]: ... unless it is replaced with another value that happens to
>have the same hash -- rx_hashtbl is not a proper hash table. In
>case of a collision, the old entry is replaced by the new one.
>
>Signed-off-by: Jiri Bohac <jbohac@suse.cz>
>
>--- a/drivers/net/bonding/bond_alb.c
>+++ b/drivers/net/bonding/bond_alb.c
>@@ -84,6 +84,9 @@ static inline struct arp_pkt *arp_pkt(const struct sk_buff *skb)
>
> /* Forward declaration */
> static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[]);
>+static void rlb_purge_src_ip(struct bonding *bond, u32 ip_src);
>+static void rlb_delete_table_entry_reverse(struct bonding* bond, u32 index);
>+static void rlb_set_reverse_entry(struct bonding* bond, u32 ip_src_hash, u32 ip_dst_hash);
>
> static inline u8 _simple_hash(const u8 *hash_start, int hash_size)
> {
>@@ -366,6 +369,15 @@ static void rlb_arp_recv(struct sk_buff *skb, struct bonding *bond,
> return;
> }
>
>+ /* We received an ARP from arp->ip_src.
>+ * We might have used this IP address previously (on the bonding host
>+ * itself or on a system that is bridged together with the bond), but
>+ * now the IP has migrated elsewhere and we must prevent sending out
>+ * client updates with this IP address as the source.
>+ * Clean up all hash table entries that have this address as ip_src.
>+ */
>+ rlb_purge_src_ip(bond, arp->ip_src);
>+
> if (arp->op_code == htons(ARPOP_REPLY)) {
> /* update rx hash table for this ARP */
> rlb_update_entry_from_arp(bond, arp);
>@@ -657,6 +669,13 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
> assigned_slave = rlb_next_rx_slave(bond);
>
> if (assigned_slave) {
>+ if (!(client_info->assigned && client_info->ip_src == arp->ip_src)) {
>+ /* ip_src is going to be updated, fix the reverse hashing */
>+ u32 hash_src = _simple_hash((u8 *)&arp->ip_src, sizeof(arp->ip_src));
>+ rlb_delete_table_entry_reverse(bond, hash_index);
>+ rlb_set_reverse_entry(bond, hash_src, hash_index);
>+ }
>+
> client_info->ip_src = arp->ip_src;
> client_info->ip_dst = arp->ip_dst;
> /* arp->mac_dst is broadcast for arp reqeusts.
>@@ -769,11 +788,111 @@ static void rlb_rebalance(struct bonding *bond)
> }
>
> /* Caller must hold rx_hashtbl lock */
>-static void rlb_init_table_entry(struct rlb_client_info *entry)
>+static void rlb_init_table_entry_forward(struct rlb_client_info *entry)
> {
>- memset(entry, 0, sizeof(struct rlb_client_info));
> entry->next = RLB_NULL_INDEX;
> entry->prev = RLB_NULL_INDEX;
>+ entry->assigned = 0;
>+ entry->slave = NULL;
>+ entry->tag = 0;
>+}
>+static void rlb_init_table_entry_reverse(struct rlb_client_info *entry)
>+{
>+ entry->reverse_first = RLB_NULL_INDEX;
>+ entry->reverse_prev = RLB_NULL_INDEX;
>+ entry->reverse_next = RLB_NULL_INDEX;
>+}
>+
>+static void rlb_init_table_entry(struct rlb_client_info *entry)
>+{
>+ memset(entry, 0, sizeof(struct rlb_client_info));
>+ rlb_init_table_entry_forward(entry);
>+ rlb_init_table_entry_reverse(entry);
>+}
>+
>+static void rlb_delete_table_entry_forward(struct bonding* bond, u32 index)
>+{
>+ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
>+ u32 next_index = bond_info->rx_hashtbl[index].next;
>+ u32 prev_index = bond_info->rx_hashtbl[index].prev;
>+
>+ if (index == bond_info->rx_hashtbl_head) {
>+ bond_info->rx_hashtbl_head = next_index;
>+ }
>+ if (prev_index != RLB_NULL_INDEX) {
>+ bond_info->rx_hashtbl[prev_index].next = next_index;
>+ }
>+ if (next_index != RLB_NULL_INDEX) {
>+ bond_info->rx_hashtbl[next_index].prev = prev_index;
>+ }
>+}
>+
>+static void rlb_delete_table_entry_reverse(struct bonding* bond, u32 index)
>+{
>+ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
>+ u32 next_index = bond_info->rx_hashtbl[index].reverse_next;
>+ u32 prev_index = bond_info->rx_hashtbl[index].reverse_prev;
>+
>+ bond_info->rx_hashtbl[index].reverse_next = RLB_NULL_INDEX;
>+ bond_info->rx_hashtbl[index].reverse_prev = RLB_NULL_INDEX;
>+
>+ if (next_index != RLB_NULL_INDEX) {
>+ bond_info->rx_hashtbl[next_index].reverse_prev = prev_index;
>+ }
>+
>+ if (prev_index == RLB_NULL_INDEX)
>+ return;
>+
>+ /* is prev_index pointing to the head of this chain? */
>+ if (bond_info->rx_hashtbl[prev_index].reverse_first == index)
>+ bond_info->rx_hashtbl[prev_index].reverse_first = next_index;
>+ else
>+ bond_info->rx_hashtbl[prev_index].reverse_next = next_index;
>+
>+}
>+
>+static void rlb_delete_table_entry(struct bonding* bond, u32 index)
>+{
>+ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
>+ struct rlb_client_info *entry = &(bond_info->rx_hashtbl[index]);
>+
>+ rlb_delete_table_entry_forward(bond, index);
>+ rlb_init_table_entry_forward(entry);
>+
>+ rlb_delete_table_entry_reverse(bond, index);
>+}
>+
>+static void rlb_set_reverse_entry(struct bonding* bond, u32 ip_src_hash, u32 ip_dst_hash)
>+{
>+ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
>+ u32 next;
>+
>+ bond_info->rx_hashtbl[ip_dst_hash].reverse_prev = ip_src_hash;
>+ next = bond_info->rx_hashtbl[ip_src_hash].reverse_first;
>+ bond_info->rx_hashtbl[ip_dst_hash].reverse_next = next;
>+ if (next != RLB_NULL_INDEX)
>+ bond_info->rx_hashtbl[next].reverse_prev = ip_dst_hash;
>+ bond_info->rx_hashtbl[ip_src_hash].reverse_first = ip_dst_hash;
>+}
>+
>+/* deletes all entries with a given ip_src */
>+static void rlb_purge_src_ip(struct bonding *bond, u32 ip_src)
>+{
>+ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
>+ u32 ip_src_hash = _simple_hash((u8*)&(ip_src), sizeof(ip_src));
>+ u32 index;
>+
>+ _lock_rx_hashtbl_bh(bond);
>+
>+ index = bond_info->rx_hashtbl[ip_src_hash].reverse_first;
>+ while (index != RLB_NULL_INDEX) {
>+ struct rlb_client_info *entry = &(bond_info->rx_hashtbl[index]);
>+ u32 next_index = entry->reverse_next;
>+ if (entry->ip_src == ip_src)
>+ rlb_delete_table_entry(bond, index);
>+ index = next_index;
>+ }
>+ _unlock_rx_hashtbl_bh(bond);
> }
>
> static int rlb_initialize(struct bonding *bond)
>@@ -831,21 +950,9 @@ static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id)
> while (curr_index != RLB_NULL_INDEX) {
> struct rlb_client_info *curr = &(bond_info->rx_hashtbl[curr_index]);
> u32 next_index = bond_info->rx_hashtbl[curr_index].next;
>- u32 prev_index = bond_info->rx_hashtbl[curr_index].prev;
>
>- if (curr->tag && (curr->vlan_id == vlan_id)) {
>- if (curr_index == bond_info->rx_hashtbl_head) {
>- bond_info->rx_hashtbl_head = next_index;
>- }
>- if (prev_index != RLB_NULL_INDEX) {
>- bond_info->rx_hashtbl[prev_index].next = next_index;
>- }
>- if (next_index != RLB_NULL_INDEX) {
>- bond_info->rx_hashtbl[next_index].prev = prev_index;
>- }
>-
>- rlb_init_table_entry(curr);
>- }
>+ if (curr->tag && (curr->vlan_id == vlan_id))
>+ rlb_delete_table_entry(bond, curr_index);
>
> curr_index = next_index;
> }
>diff --git a/drivers/net/bonding/bond_alb.h b/drivers/net/bonding/bond_alb.h
>index 90f140a..f10ecf7 100644
>--- a/drivers/net/bonding/bond_alb.h
>+++ b/drivers/net/bonding/bond_alb.h
>@@ -108,6 +108,9 @@ struct rlb_client_info {
> struct slave *slave; /* the slave assigned to this client */
> u8 tag; /* flag - need to tag skb */
> unsigned short vlan_id; /* VLAN tag associated with IP address */
>+ u32 reverse_next; /* next entry with same hash(ip_src) */
>+ u32 reverse_prev; /* prev entry with same hash(ip_src) */
>+ u32 reverse_first; /* first entry with hash(ip_src) == this entry's index */
> };
>
> struct tlb_slave_info {
>
>--
>Jiri Bohac <jbohac@suse.cz>
>SUSE Labs, SUSE CZ
>
---
-Jay Vosburgh, IBM Linux Technology Center, fubar@us.ibm.com
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH][RFC] bonding: delete migrated IP addresses from the rlb hash table
2012-03-01 2:12 ` Jay Vosburgh
@ 2012-03-01 3:58 ` WeipingPan
2012-03-01 4:01 ` [PATCH net] delete rlb entry if ip of bonding is deleted Weiping Pan
2012-03-07 16:02 ` [PATCH][RFC] bonding: delete migrated IP addresses from the rlb hash table Jiri Bohac
1 sibling, 1 reply; 17+ messages in thread
From: WeipingPan @ 2012-03-01 3:58 UTC (permalink / raw)
To: Jay Vosburgh; +Cc: Jiri Bohac, Andy Gospodarek, netdev
On 03/01/2012 10:12 AM, Jay Vosburgh wrote:
> Jiri Bohac<jbohac@suse.cz> wrote:
>
>> Bonding in balance-alb mode records information from ARP packets
>> passing through the bond in a hash table (rx_hashtbl).
>>
>> At certain situations (e.g. link change of a slave),
>> rlb_update_rx_clients() will send out ARP packets to update ARP
>> caches of other hosts on the network to achieve RX load balancing.
>>
>> The problem is that once an IP address is recorded in the hash
>> table, it stays there indefinitely [1]. If this IP address is
>> migrated to a different host in the network, bonding still sends
>> out ARP packets that poison other systems' ARP caches with
>> invalid information.
>>
>> This patch solves this by looking at all incoming ARP packets,
>> and checking if the source IP address is one of the source
>> addresses stored in the rx_hashtbl. If it is, the corresponding
>> hash table entries are removed. Thus, when an IP address is
>> migrated, the first ARP broadcast by its new owner will purge the
>> offending entries of rx_hashtbl.
>>
>> (a simpler approach, where bonding would monitor IP address
>> changes on the local system does not work for setups like:
>> HostA --- NetworkA --- eth0-bond0-br0 --- NetworkB --- hostB
>> and an IP address migrating from HostB to HostA)
>>
>> The hash table is hashed by ip_dst. To be able to do the above
>> check efficiently (not walking the whole hash table), we need a
>> reverse mapping (by ip_src).
>>
>> I added three new members in struct rlb_client_info:
>> rx_hashtbl[x].reverse_first will point to the start of a list of
>> entries for which hash(ip_src) == x.
>> The list is linked with reverse_next and reverse_prev.
>>
>> When an incoming ARP packet arrives at rlb_arp_recv()
>> rlb_purge_src_ip() can quickly walk only the entries on the
>> corresponding lists, i.e. the entries that are likely to contain
>> the offending IP address.
> I've done some initial testing with this, and so far I'm seeing
> one problem: every time the local host (with bonding) sends a broadcast
> ARP, that ends up flushing the entire RLB table. Well, all entries that
> match the IP on the bond that's sending an ARP request, which is just
> one address in my testing.
>
> Anyway, this happens because the switch forwards the broadcast
> ARP back around to one of the other bond slaves, and then that
> "incoming" ARP bears an ip_src of our already-in-use IP address, and
> that matches everything in the table.
>
> So, yes, this does work to flush entries out of the table using
> a particular IP source address, but as a practical matter the false
> positives will vastly outnumber the valid hits. Yes, the table will
> repopulate itself, but that does have some cost if, e.g., flows bounce
> around from one interface to another. How often this happens would be a
> function of how often the bond has to ARP for an unknown address (i.e.,
> send a broadcast ARP).
>
> Perhaps a check that the ip_src being flushed is not actually in
> use locally is warranted?
>
> -J
How about just deleting these rlb entries when we receive the events
that the ip address is deleted ?
Then we need not to montor all arp requests.
Patch is to be followed.
thanks
Weiping Pan
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH net] delete rlb entry if ip of bonding is deleted
2012-03-01 3:58 ` WeipingPan
@ 2012-03-01 4:01 ` Weiping Pan
2012-03-01 4:19 ` Jay Vosburgh
0 siblings, 1 reply; 17+ messages in thread
From: Weiping Pan @ 2012-03-01 4:01 UTC (permalink / raw)
To: netdev; +Cc: jbohac, fubar, andy, Weiping Pan
When the ip of bonding is deleted, its rlb table still contains old invalid
mappings, just delete them to avoid poisoning other clients arp cache.
Signed-off-by: Weiping Pan <panweiping3@gmail.com>
---
drivers/net/bonding/bond_alb.c | 35 +++++++++++++++++++++++++++++++++++
drivers/net/bonding/bond_alb.h | 2 ++
drivers/net/bonding/bond_main.c | 1 +
3 files changed, 38 insertions(+), 0 deletions(-)
diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index f820b26..928c636 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -853,6 +853,41 @@ static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id)
_unlock_rx_hashtbl_bh(bond);
}
+/* delete all rlb entries which has ip as ip_src */
+void bond_alb_delete_entry(struct bonding *bond, __be32 ip)
+{
+ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
+ u32 curr_index;
+
+ _lock_rx_hashtbl_bh(bond);
+
+ curr_index = bond_info->rx_hashtbl_head;
+ while (curr_index != RLB_NULL_INDEX) {
+ struct rlb_client_info *curr = &(bond_info->rx_hashtbl[curr_index]);
+ u32 next_index = bond_info->rx_hashtbl[curr_index].next;
+ u32 prev_index = bond_info->rx_hashtbl[curr_index].prev;
+ if (curr->assigned && (curr->ip_src == ip)) {
+ if (curr_index == bond_info->rx_hashtbl_head) {
+ bond_info->rx_hashtbl_head = next_index;
+ }
+
+ if (prev_index != RLB_NULL_INDEX) {
+ bond_info->rx_hashtbl[prev_index].next = next_index;
+ }
+
+ if (next_index != RLB_NULL_INDEX) {
+ bond_info->rx_hashtbl[next_index].prev = prev_index;
+ }
+
+ rlb_init_table_entry(curr);
+ }
+
+ curr_index = next_index;
+ }
+
+ _unlock_rx_hashtbl_bh(bond);
+}
+
/*********************** tlb/rlb shared functions *********************/
static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[])
diff --git a/drivers/net/bonding/bond_alb.h b/drivers/net/bonding/bond_alb.h
index 90f140a..38863fc 100644
--- a/drivers/net/bonding/bond_alb.h
+++ b/drivers/net/bonding/bond_alb.h
@@ -163,5 +163,7 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev);
void bond_alb_monitor(struct work_struct *);
int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr);
void bond_alb_clear_vlan(struct bonding *bond, unsigned short vlan_id);
+
+void bond_alb_delete_entry(struct bonding *bond, __be32 ip);
#endif /* __BOND_ALB_H__ */
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 435984a..ec071b9 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -3315,6 +3315,7 @@ static int bond_inetaddr_event(struct notifier_block *this, unsigned long event,
return NOTIFY_OK;
case NETDEV_DOWN:
bond->master_ip = 0;
+ bond_alb_delete_entry(bond, ifa->ifa_local);
return NOTIFY_OK;
default:
return NOTIFY_DONE;
--
1.7.4.4
^ permalink raw reply related [flat|nested] 17+ messages in thread
* Re: [PATCH net] delete rlb entry if ip of bonding is deleted
2012-03-01 4:01 ` [PATCH net] delete rlb entry if ip of bonding is deleted Weiping Pan
@ 2012-03-01 4:19 ` Jay Vosburgh
0 siblings, 0 replies; 17+ messages in thread
From: Jay Vosburgh @ 2012-03-01 4:19 UTC (permalink / raw)
To: Weiping Pan; +Cc: netdev, jbohac, andy
Weiping Pan <panweiping3@gmail.com> wrote:
>When the ip of bonding is deleted, its rlb table still contains old invalid
>mappings, just delete them to avoid poisoning other clients arp cache.
I don't believe this patch will handle the case that Jiri
mentioned earlier: the bond host is acting as a router or bridge. The
key here is that the "source address" in question is not assigned to the
bond at all, but comes from another host whose traffic passes through
the bond.
On the other hand, the logic of this patch might be useful to
purge table entries for local addresses when they are deconfigured from
the bond. I don't think it's a complete solution by itself, though.
-J
>Signed-off-by: Weiping Pan <panweiping3@gmail.com>
>---
> drivers/net/bonding/bond_alb.c | 35 +++++++++++++++++++++++++++++++++++
> drivers/net/bonding/bond_alb.h | 2 ++
> drivers/net/bonding/bond_main.c | 1 +
> 3 files changed, 38 insertions(+), 0 deletions(-)
>
>diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
>index f820b26..928c636 100644
>--- a/drivers/net/bonding/bond_alb.c
>+++ b/drivers/net/bonding/bond_alb.c
>@@ -853,6 +853,41 @@ static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id)
> _unlock_rx_hashtbl_bh(bond);
> }
>
>+/* delete all rlb entries which has ip as ip_src */
>+void bond_alb_delete_entry(struct bonding *bond, __be32 ip)
>+{
>+ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
>+ u32 curr_index;
>+
>+ _lock_rx_hashtbl_bh(bond);
>+
>+ curr_index = bond_info->rx_hashtbl_head;
>+ while (curr_index != RLB_NULL_INDEX) {
>+ struct rlb_client_info *curr = &(bond_info->rx_hashtbl[curr_index]);
>+ u32 next_index = bond_info->rx_hashtbl[curr_index].next;
>+ u32 prev_index = bond_info->rx_hashtbl[curr_index].prev;
>+ if (curr->assigned && (curr->ip_src == ip)) {
>+ if (curr_index == bond_info->rx_hashtbl_head) {
>+ bond_info->rx_hashtbl_head = next_index;
>+ }
>+
>+ if (prev_index != RLB_NULL_INDEX) {
>+ bond_info->rx_hashtbl[prev_index].next = next_index;
>+ }
>+
>+ if (next_index != RLB_NULL_INDEX) {
>+ bond_info->rx_hashtbl[next_index].prev = prev_index;
>+ }
>+
>+ rlb_init_table_entry(curr);
>+ }
>+
>+ curr_index = next_index;
>+ }
>+
>+ _unlock_rx_hashtbl_bh(bond);
>+}
>+
> /*********************** tlb/rlb shared functions *********************/
>
> static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[])
>diff --git a/drivers/net/bonding/bond_alb.h b/drivers/net/bonding/bond_alb.h
>index 90f140a..38863fc 100644
>--- a/drivers/net/bonding/bond_alb.h
>+++ b/drivers/net/bonding/bond_alb.h
>@@ -163,5 +163,7 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev);
> void bond_alb_monitor(struct work_struct *);
> int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr);
> void bond_alb_clear_vlan(struct bonding *bond, unsigned short vlan_id);
>+
>+void bond_alb_delete_entry(struct bonding *bond, __be32 ip);
> #endif /* __BOND_ALB_H__ */
>
>diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
>index 435984a..ec071b9 100644
>--- a/drivers/net/bonding/bond_main.c
>+++ b/drivers/net/bonding/bond_main.c
>@@ -3315,6 +3315,7 @@ static int bond_inetaddr_event(struct notifier_block *this, unsigned long event,
> return NOTIFY_OK;
> case NETDEV_DOWN:
> bond->master_ip = 0;
>+ bond_alb_delete_entry(bond, ifa->ifa_local);
> return NOTIFY_OK;
> default:
> return NOTIFY_DONE;
>--
>1.7.4.4
>
---
-Jay Vosburgh, IBM Linux Technology Center, fubar@us.ibm.com
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH][RFC] bonding: delete migrated IP addresses from the rlb hash table
2012-03-01 2:12 ` Jay Vosburgh
2012-03-01 3:58 ` WeipingPan
@ 2012-03-07 16:02 ` Jiri Bohac
2012-04-20 18:56 ` Jiri Bohac
1 sibling, 1 reply; 17+ messages in thread
From: Jiri Bohac @ 2012-03-07 16:02 UTC (permalink / raw)
To: Jay Vosburgh; +Cc: Jiri Bohac, Andy Gospodarek, netdev
On Wed, Feb 29, 2012 at 06:12:20PM -0800, Jay Vosburgh wrote:
> I've done some initial testing with this, and so far I'm seeing
> one problem: every time the local host (with bonding) sends a broadcast
> ARP, that ends up flushing the entire RLB table. Well, all entries that
> match the IP on the bond that's sending an ARP request, which is just
> one address in my testing.
>
> Anyway, this happens because the switch forwards the broadcast
> ARP back around to one of the other bond slaves, and then that
> "incoming" ARP bears an ip_src of our already-in-use IP address, and
> that matches everything in the table.
Good catch! I did not notice this.
> Perhaps a check that the ip_src being flushed is not actually in
> use locally is warranted?
This would not work for the setups where the bonding master is
bridget to some other network. I think it would be better to also
store the source (server) MAC address in struct client_info and
only flush the hash table entries if the MAC address from the
incoming APR packet and the source MAC address stored in the hash
table differ.
Updated patch follows (compile-tested only) I also
fixed the coding style problems you pointed out. As for the
forward/reverse naming, it's your call. Should I change it to
src/dst?
Signed-off-by: Jiri Bohac <jbohac@suse.cz>
diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index f820b26..a938ab6 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -84,6 +84,9 @@ static inline struct arp_pkt *arp_pkt(const struct sk_buff *skb)
/* Forward declaration */
static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[]);
+static void rlb_purge_src_ip(struct bonding *bond, struct arp_pkt *arp);
+static void rlb_delete_table_entry_reverse(struct bonding *bond, u32 index);
+static void rlb_set_reverse_entry(struct bonding *bond, u32 ip_src_hash, u32 ip_dst_hash);
static inline u8 _simple_hash(const u8 *hash_start, int hash_size)
{
@@ -366,6 +369,17 @@ static void rlb_arp_recv(struct sk_buff *skb, struct bonding *bond,
return;
}
+ /* We received an ARP from arp->ip_src.
+ * We might have used this IP address previously (on the bonding host
+ * itself or on a system that is bridged together with the bond).
+ * However, if arp->mac_src is different than what is stored in
+ * rx_hashtbl, some other host is now using the IP and we must prevent
+ * sending out client updates with this IP address as the source.
+ * Clean up all hash table entries that have this address as ip_src but
+ * have a dirrerent mac_src.
+ */
+ rlb_purge_src_ip(bond, arp);
+
if (arp->op_code == htons(ARPOP_REPLY)) {
/* update rx hash table for this ARP */
rlb_update_entry_from_arp(bond, arp);
@@ -635,6 +649,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
/* update mac address from arp */
memcpy(client_info->mac_dst, arp->mac_dst, ETH_ALEN);
}
+ memcpy(client_info->mac_src, arp->mac_src, ETH_ALEN);
assigned_slave = client_info->slave;
if (assigned_slave) {
@@ -657,6 +672,13 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
assigned_slave = rlb_next_rx_slave(bond);
if (assigned_slave) {
+ if (!(client_info->assigned && client_info->ip_src == arp->ip_src)) {
+ /* ip_src is going to be updated, fix the reverse hashing */
+ u32 hash_src = _simple_hash((u8 *)&arp->ip_src, sizeof(arp->ip_src));
+ rlb_delete_table_entry_reverse(bond, hash_index);
+ rlb_set_reverse_entry(bond, hash_src, hash_index);
+ }
+
client_info->ip_src = arp->ip_src;
client_info->ip_dst = arp->ip_dst;
/* arp->mac_dst is broadcast for arp reqeusts.
@@ -664,6 +686,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
* upon receiving an arp reply.
*/
memcpy(client_info->mac_dst, arp->mac_dst, ETH_ALEN);
+ memcpy(client_info->mac_src, arp->mac_src, ETH_ALEN);
client_info->slave = assigned_slave;
if (compare_ether_addr_64bits(client_info->mac_dst, mac_bcast)) {
@@ -769,11 +792,109 @@ static void rlb_rebalance(struct bonding *bond)
}
/* Caller must hold rx_hashtbl lock */
-static void rlb_init_table_entry(struct rlb_client_info *entry)
+static void rlb_init_table_entry_forward(struct rlb_client_info *entry)
{
- memset(entry, 0, sizeof(struct rlb_client_info));
entry->next = RLB_NULL_INDEX;
entry->prev = RLB_NULL_INDEX;
+ entry->assigned = 0;
+ entry->slave = NULL;
+ entry->tag = 0;
+}
+static void rlb_init_table_entry_reverse(struct rlb_client_info *entry)
+{
+ entry->reverse_first = RLB_NULL_INDEX;
+ entry->reverse_prev = RLB_NULL_INDEX;
+ entry->reverse_next = RLB_NULL_INDEX;
+}
+
+static void rlb_init_table_entry(struct rlb_client_info *entry)
+{
+ memset(entry, 0, sizeof(struct rlb_client_info));
+ rlb_init_table_entry_forward(entry);
+ rlb_init_table_entry_reverse(entry);
+}
+
+static void rlb_delete_table_entry_forward(struct bonding *bond, u32 index)
+{
+ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
+ u32 next_index = bond_info->rx_hashtbl[index].next;
+ u32 prev_index = bond_info->rx_hashtbl[index].prev;
+
+ if (index == bond_info->rx_hashtbl_head)
+ bond_info->rx_hashtbl_head = next_index;
+ if (prev_index != RLB_NULL_INDEX)
+ bond_info->rx_hashtbl[prev_index].next = next_index;
+ if (next_index != RLB_NULL_INDEX)
+ bond_info->rx_hashtbl[next_index].prev = prev_index;
+}
+
+static void rlb_delete_table_entry_reverse(struct bonding *bond, u32 index)
+{
+ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
+ u32 next_index = bond_info->rx_hashtbl[index].reverse_next;
+ u32 prev_index = bond_info->rx_hashtbl[index].reverse_prev;
+
+ bond_info->rx_hashtbl[index].reverse_next = RLB_NULL_INDEX;
+ bond_info->rx_hashtbl[index].reverse_prev = RLB_NULL_INDEX;
+
+ if (next_index != RLB_NULL_INDEX)
+ bond_info->rx_hashtbl[next_index].reverse_prev = prev_index;
+
+ if (prev_index == RLB_NULL_INDEX)
+ return;
+
+ /* is prev_index pointing to the head of this chain? */
+ if (bond_info->rx_hashtbl[prev_index].reverse_first == index)
+ bond_info->rx_hashtbl[prev_index].reverse_first = next_index;
+ else
+ bond_info->rx_hashtbl[prev_index].reverse_next = next_index;
+
+}
+
+static void rlb_delete_table_entry(struct bonding *bond, u32 index)
+{
+ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
+ struct rlb_client_info *entry = &(bond_info->rx_hashtbl[index]);
+
+ rlb_delete_table_entry_forward(bond, index);
+ rlb_init_table_entry_forward(entry);
+
+ rlb_delete_table_entry_reverse(bond, index);
+}
+
+static void rlb_set_reverse_entry(struct bonding *bond, u32 ip_src_hash, u32 ip_dst_hash)
+{
+ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
+ u32 next;
+
+ bond_info->rx_hashtbl[ip_dst_hash].reverse_prev = ip_src_hash;
+ next = bond_info->rx_hashtbl[ip_src_hash].reverse_first;
+ bond_info->rx_hashtbl[ip_dst_hash].reverse_next = next;
+ if (next != RLB_NULL_INDEX)
+ bond_info->rx_hashtbl[next].reverse_prev = ip_dst_hash;
+ bond_info->rx_hashtbl[ip_src_hash].reverse_first = ip_dst_hash;
+}
+
+/* deletes all rx_hashtbl entries with arp->ip_src if their mac_src does
+ * not match arp->mac_src */
+static void rlb_purge_src_ip(struct bonding *bond, struct arp_pkt *arp)
+{
+ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
+ u32 ip_src_hash = _simple_hash((u8*)&(arp->ip_src), sizeof(arp->ip_src));
+ u32 index;
+
+ _lock_rx_hashtbl_bh(bond);
+
+ index = bond_info->rx_hashtbl[ip_src_hash].reverse_first;
+ while (index != RLB_NULL_INDEX) {
+ struct rlb_client_info *entry = &(bond_info->rx_hashtbl[index]);
+ u32 next_index = entry->reverse_next;
+ if (entry->ip_src == arp->ip_src &&
+ compare_ether_addr_64bits(arp->mac_src, entry->mac_src))
+ rlb_delete_table_entry(bond, index);
+ index = next_index;
+ }
+ _unlock_rx_hashtbl_bh(bond);
}
static int rlb_initialize(struct bonding *bond)
@@ -831,21 +952,9 @@ static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id)
while (curr_index != RLB_NULL_INDEX) {
struct rlb_client_info *curr = &(bond_info->rx_hashtbl[curr_index]);
u32 next_index = bond_info->rx_hashtbl[curr_index].next;
- u32 prev_index = bond_info->rx_hashtbl[curr_index].prev;
-
- if (curr->tag && (curr->vlan_id == vlan_id)) {
- if (curr_index == bond_info->rx_hashtbl_head) {
- bond_info->rx_hashtbl_head = next_index;
- }
- if (prev_index != RLB_NULL_INDEX) {
- bond_info->rx_hashtbl[prev_index].next = next_index;
- }
- if (next_index != RLB_NULL_INDEX) {
- bond_info->rx_hashtbl[next_index].prev = prev_index;
- }
- rlb_init_table_entry(curr);
- }
+ if (curr->tag && (curr->vlan_id == vlan_id))
+ rlb_delete_table_entry(bond, curr_index);
curr_index = next_index;
}
diff --git a/drivers/net/bonding/bond_alb.h b/drivers/net/bonding/bond_alb.h
index 90f140a..8286df52 100644
--- a/drivers/net/bonding/bond_alb.h
+++ b/drivers/net/bonding/bond_alb.h
@@ -100,6 +100,7 @@ struct tlb_client_info {
struct rlb_client_info {
__be32 ip_src; /* the server IP address */
__be32 ip_dst; /* the client IP address */
+ u8 mac_src[ETH_ALEN]; /* the server MAC address */
u8 mac_dst[ETH_ALEN]; /* the client MAC address */
u32 next; /* The next Hash table entry index */
u32 prev; /* The previous Hash table entry index */
@@ -108,6 +109,9 @@ struct rlb_client_info {
struct slave *slave; /* the slave assigned to this client */
u8 tag; /* flag - need to tag skb */
unsigned short vlan_id; /* VLAN tag associated with IP address */
+ u32 reverse_next; /* next entry with same hash(ip_src) */
+ u32 reverse_prev; /* prev entry with same hash(ip_src) */
+ u32 reverse_first; /* first entry with hash(ip_src) == this entry's index */
};
struct tlb_slave_info {
--
Jiri Bohac <jbohac@suse.cz>
SUSE Labs, SUSE CZ
^ permalink raw reply related [flat|nested] 17+ messages in thread
* Re: [PATCH][RFC] bonding: delete migrated IP addresses from the rlb hash table
2012-02-27 17:34 [PATCH][RFC] bonding: delete migrated IP addresses from the rlb hash table Jiri Bohac
` (4 preceding siblings ...)
2012-03-01 2:12 ` Jay Vosburgh
@ 2012-03-23 7:10 ` WeipingPan
2012-03-23 10:33 ` Jiri Bohac
5 siblings, 1 reply; 17+ messages in thread
From: WeipingPan @ 2012-03-23 7:10 UTC (permalink / raw)
To: Jiri Bohac; +Cc: Jay Vosburgh, Andy Gospodarek, netdev
On 02/28/2012 01:34 AM, Jiri Bohac wrote:
> Bonding in balance-alb mode records information from ARP packets
> passing through the bond in a hash table (rx_hashtbl).
>
> At certain situations (e.g. link change of a slave),
> rlb_update_rx_clients() will send out ARP packets to update ARP
> caches of other hosts on the network to achieve RX load balancing.
>
> The problem is that once an IP address is recorded in the hash
> table, it stays there indefinitely [1]. If this IP address is
> migrated to a different host in the network, bonding still sends
> out ARP packets that poison other systems' ARP caches with
> invalid information.
>
> This patch solves this by looking at all incoming ARP packets,
> and checking if the source IP address is one of the source
> addresses stored in the rx_hashtbl. If it is, the corresponding
> hash table entries are removed. Thus, when an IP address is
> migrated, the first ARP broadcast by its new owner will purge the
> offending entries of rx_hashtbl.
>
> (a simpler approach, where bonding would monitor IP address
> changes on the local system does not work for setups like:
> HostA --- NetworkA --- eth0-bond0-br0 --- NetworkB --- hostB
> and an IP address migrating from HostB to HostA)
Hi, Jiri,
Do "NetworkA" and "NetworkB" mean different subnet ?
How to configure bonding and bridge to make HostA communicate with hostB ?
What is the problem for this setup ?
I will appreciate it if you can elaborate the problem.
thanks
Weiping Pan
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH][RFC] bonding: delete migrated IP addresses from the rlb hash table
2012-03-23 7:10 ` WeipingPan
@ 2012-03-23 10:33 ` Jiri Bohac
0 siblings, 0 replies; 17+ messages in thread
From: Jiri Bohac @ 2012-03-23 10:33 UTC (permalink / raw)
To: WeipingPan; +Cc: Jiri Bohac, Jay Vosburgh, Andy Gospodarek, netdev
On Fri, Mar 23, 2012 at 03:10:15PM +0800, WeipingPan wrote:
> On 02/28/2012 01:34 AM, Jiri Bohac wrote:
> >This patch solves this by looking at all incoming ARP packets,
> >and checking if the source IP address is one of the source
> >addresses stored in the rx_hashtbl. If it is, the corresponding
> >hash table entries are removed. Thus, when an IP address is
> >migrated, the first ARP broadcast by its new owner will purge the
> >offending entries of rx_hashtbl.
> >
> > (a simpler approach, where bonding would monitor IP address
> > changes on the local system does not work for setups like:
> > HostA --- NetworkA --- eth0-bond0-br0 --- NetworkB --- hostB
> > and an IP address migrating from HostB to HostA)
> Hi, Jiri,
> Do "NetworkA" and "NetworkB" mean different subnet ?
> How to configure bonding and bridge to make HostA communicate with hostB ?
> What is the problem for this setup ?
No, NetworkA and NetworkB are the same subnet, same L2 network.
It may be two ethernet segments that are bridge by the br0
bridge. A more common scenario is that HostB is a virtual machine
that communicates through br0->bond0->ethX with HostA.
In this setup, bond0 can not solve the original bug (stale ARP
information in the rlb hash table) simply by monitoring the
removal of IP addresses on the local host. The IP address that is
about to be migrated from HostB to HostA is _not_ configured on
any of the interfaces of the machine running bond0.
The patch solves the problem by looking at ARP requests coming
from NetworkA and deleting RLB hash table entries that were
created while the ARP's src_ip was still assigned in NetworkB.
--
Jiri Bohac <jbohac@suse.cz>
SUSE Labs, SUSE CZ
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH][RFC] bonding: delete migrated IP addresses from the rlb hash table
2012-03-07 16:02 ` [PATCH][RFC] bonding: delete migrated IP addresses from the rlb hash table Jiri Bohac
@ 2012-04-20 18:56 ` Jiri Bohac
2012-04-26 20:18 ` Jay Vosburgh
0 siblings, 1 reply; 17+ messages in thread
From: Jiri Bohac @ 2012-04-20 18:56 UTC (permalink / raw)
To: Jay Vosburgh; +Cc: jbohac, Andy Gospodarek, netdev
I finally got back to this, sorry for such a long delay ...
Start of this thread is here:
http://thread.gmane.org/gmane.linux.network/222233
On Wed, Mar 07, 2012 at 05:02:16PM +0100, Jiri Bohac wrote:
> On Wed, Feb 29, 2012 at 06:12:20PM -0800, Jay Vosburgh wrote:
> > I've done some initial testing with this, and so far I'm seeing
> > one problem: every time the local host (with bonding) sends a broadcast
> > ARP, that ends up flushing the entire RLB table. Well, all entries that
> > match the IP on the bond that's sending an ARP request, which is just
> > one address in my testing.
> >
> > Anyway, this happens because the switch forwards the broadcast
> > ARP back around to one of the other bond slaves, and then that
> > "incoming" ARP bears an ip_src of our already-in-use IP address, and
> > that matches everything in the table.
>
> Good catch! I did not notice this.
>
> > Perhaps a check that the ip_src being flushed is not actually in
> > use locally is warranted?
>
> This would not work for the setups where the bonding master is
> bridget to some other network. I think it would be better to also
> store the source (server) MAC address in struct client_info and
> only flush the hash table entries if the MAC address from the
> incoming APR packet and the source MAC address stored in the hash
> table differ.
>
> Updated patch follows (compile-tested only) I also
> fixed the coding style problems you pointed out. As for the
> forward/reverse naming, it's your call. Should I change it to
> src/dst?
I now thoroughly tested this updated patch in a real setup. It
works as intended - it does not purge the entries when receiving the
looped-back ARP requests. It correctly purges the offending
entries when an ARP packet arrives with a its SRC IP address
matching a SRC IP address in the table and having differrent MAC
address -- exactly the case that would cause the ARP cache
poisoning if not purged from the table.
Jay, would you please consider applying this? Or do you want me
to somehow rename the RLB table forward/reverse elements?
Re-sending the fixed patch with the original description:
------
Bonding in balance-alb mode records information from ARP packets
passing through the bond in a hash table (rx_hashtbl).
At certain situations (e.g. link change of a slave),
rlb_update_rx_clients() will send out ARP packets to update ARP
caches of other hosts on the network to achieve RX load balancing.
The problem is that once an IP address is recorded in the hash
table, it stays there indefinitely. If this IP address is
migrated to a different host in the network, bonding still sends
out ARP packets that poison other systems' ARP caches with
invalid information.
This patch solves this by looking at all incoming ARP packets,
and checking if the source IP address is one of the source
addresses stored in the rx_hashtbl. If it is, the corresponding
hash table entries are removed. Thus, when an IP address is
migrated, the first ARP broadcast by its new owner will purge the
offending entries of rx_hashtbl.
The hash table is hashed by ip_dst. To be able to do the above
check efficiently (not walking the whole hash table), we need a
reverse mapping (by ip_src).
I added three new members in struct rlb_client_info:
rx_hashtbl[x].reverse_first will point to the start of a list of
entries for which hash(ip_src) == x.
The list is linked with reverse_next and reverse_prev.
When an incoming ARP packet arrives at rlb_arp_recv()
rlb_purge_src_ip() can quickly walk only the entries on the
corresponding lists, i.e. the entries that are likely to contain
the offending IP address.
Signed-off-by: Jiri Bohac <jbohac@suse.cz>
diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index f820b26..a938ab6 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -84,6 +84,9 @@ static inline struct arp_pkt *arp_pkt(const struct sk_buff *skb)
/* Forward declaration */
static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[]);
+static void rlb_purge_src_ip(struct bonding *bond, struct arp_pkt *arp);
+static void rlb_delete_table_entry_reverse(struct bonding *bond, u32 index);
+static void rlb_set_reverse_entry(struct bonding *bond, u32 ip_src_hash, u32 ip_dst_hash);
static inline u8 _simple_hash(const u8 *hash_start, int hash_size)
{
@@ -366,6 +369,17 @@ static void rlb_arp_recv(struct sk_buff *skb, struct bonding *bond,
return;
}
+ /* We received an ARP from arp->ip_src.
+ * We might have used this IP address previously (on the bonding host
+ * itself or on a system that is bridged together with the bond).
+ * However, if arp->mac_src is different than what is stored in
+ * rx_hashtbl, some other host is now using the IP and we must prevent
+ * sending out client updates with this IP address as the source.
+ * Clean up all hash table entries that have this address as ip_src but
+ * have a dirrerent mac_src.
+ */
+ rlb_purge_src_ip(bond, arp);
+
if (arp->op_code == htons(ARPOP_REPLY)) {
/* update rx hash table for this ARP */
rlb_update_entry_from_arp(bond, arp);
@@ -635,6 +649,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
/* update mac address from arp */
memcpy(client_info->mac_dst, arp->mac_dst, ETH_ALEN);
}
+ memcpy(client_info->mac_src, arp->mac_src, ETH_ALEN);
assigned_slave = client_info->slave;
if (assigned_slave) {
@@ -657,6 +672,13 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
assigned_slave = rlb_next_rx_slave(bond);
if (assigned_slave) {
+ if (!(client_info->assigned && client_info->ip_src == arp->ip_src)) {
+ /* ip_src is going to be updated, fix the reverse hashing */
+ u32 hash_src = _simple_hash((u8 *)&arp->ip_src, sizeof(arp->ip_src));
+ rlb_delete_table_entry_reverse(bond, hash_index);
+ rlb_set_reverse_entry(bond, hash_src, hash_index);
+ }
+
client_info->ip_src = arp->ip_src;
client_info->ip_dst = arp->ip_dst;
/* arp->mac_dst is broadcast for arp reqeusts.
@@ -664,6 +686,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
* upon receiving an arp reply.
*/
memcpy(client_info->mac_dst, arp->mac_dst, ETH_ALEN);
+ memcpy(client_info->mac_src, arp->mac_src, ETH_ALEN);
client_info->slave = assigned_slave;
if (compare_ether_addr_64bits(client_info->mac_dst, mac_bcast)) {
@@ -769,11 +792,109 @@ static void rlb_rebalance(struct bonding *bond)
}
/* Caller must hold rx_hashtbl lock */
-static void rlb_init_table_entry(struct rlb_client_info *entry)
+static void rlb_init_table_entry_forward(struct rlb_client_info *entry)
{
- memset(entry, 0, sizeof(struct rlb_client_info));
entry->next = RLB_NULL_INDEX;
entry->prev = RLB_NULL_INDEX;
+ entry->assigned = 0;
+ entry->slave = NULL;
+ entry->tag = 0;
+}
+static void rlb_init_table_entry_reverse(struct rlb_client_info *entry)
+{
+ entry->reverse_first = RLB_NULL_INDEX;
+ entry->reverse_prev = RLB_NULL_INDEX;
+ entry->reverse_next = RLB_NULL_INDEX;
+}
+
+static void rlb_init_table_entry(struct rlb_client_info *entry)
+{
+ memset(entry, 0, sizeof(struct rlb_client_info));
+ rlb_init_table_entry_forward(entry);
+ rlb_init_table_entry_reverse(entry);
+}
+
+static void rlb_delete_table_entry_forward(struct bonding *bond, u32 index)
+{
+ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
+ u32 next_index = bond_info->rx_hashtbl[index].next;
+ u32 prev_index = bond_info->rx_hashtbl[index].prev;
+
+ if (index == bond_info->rx_hashtbl_head)
+ bond_info->rx_hashtbl_head = next_index;
+ if (prev_index != RLB_NULL_INDEX)
+ bond_info->rx_hashtbl[prev_index].next = next_index;
+ if (next_index != RLB_NULL_INDEX)
+ bond_info->rx_hashtbl[next_index].prev = prev_index;
+}
+
+static void rlb_delete_table_entry_reverse(struct bonding *bond, u32 index)
+{
+ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
+ u32 next_index = bond_info->rx_hashtbl[index].reverse_next;
+ u32 prev_index = bond_info->rx_hashtbl[index].reverse_prev;
+
+ bond_info->rx_hashtbl[index].reverse_next = RLB_NULL_INDEX;
+ bond_info->rx_hashtbl[index].reverse_prev = RLB_NULL_INDEX;
+
+ if (next_index != RLB_NULL_INDEX)
+ bond_info->rx_hashtbl[next_index].reverse_prev = prev_index;
+
+ if (prev_index == RLB_NULL_INDEX)
+ return;
+
+ /* is prev_index pointing to the head of this chain? */
+ if (bond_info->rx_hashtbl[prev_index].reverse_first == index)
+ bond_info->rx_hashtbl[prev_index].reverse_first = next_index;
+ else
+ bond_info->rx_hashtbl[prev_index].reverse_next = next_index;
+
+}
+
+static void rlb_delete_table_entry(struct bonding *bond, u32 index)
+{
+ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
+ struct rlb_client_info *entry = &(bond_info->rx_hashtbl[index]);
+
+ rlb_delete_table_entry_forward(bond, index);
+ rlb_init_table_entry_forward(entry);
+
+ rlb_delete_table_entry_reverse(bond, index);
+}
+
+static void rlb_set_reverse_entry(struct bonding *bond, u32 ip_src_hash, u32 ip_dst_hash)
+{
+ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
+ u32 next;
+
+ bond_info->rx_hashtbl[ip_dst_hash].reverse_prev = ip_src_hash;
+ next = bond_info->rx_hashtbl[ip_src_hash].reverse_first;
+ bond_info->rx_hashtbl[ip_dst_hash].reverse_next = next;
+ if (next != RLB_NULL_INDEX)
+ bond_info->rx_hashtbl[next].reverse_prev = ip_dst_hash;
+ bond_info->rx_hashtbl[ip_src_hash].reverse_first = ip_dst_hash;
+}
+
+/* deletes all rx_hashtbl entries with arp->ip_src if their mac_src does
+ * not match arp->mac_src */
+static void rlb_purge_src_ip(struct bonding *bond, struct arp_pkt *arp)
+{
+ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
+ u32 ip_src_hash = _simple_hash((u8*)&(arp->ip_src), sizeof(arp->ip_src));
+ u32 index;
+
+ _lock_rx_hashtbl_bh(bond);
+
+ index = bond_info->rx_hashtbl[ip_src_hash].reverse_first;
+ while (index != RLB_NULL_INDEX) {
+ struct rlb_client_info *entry = &(bond_info->rx_hashtbl[index]);
+ u32 next_index = entry->reverse_next;
+ if (entry->ip_src == arp->ip_src &&
+ compare_ether_addr_64bits(arp->mac_src, entry->mac_src))
+ rlb_delete_table_entry(bond, index);
+ index = next_index;
+ }
+ _unlock_rx_hashtbl_bh(bond);
}
static int rlb_initialize(struct bonding *bond)
@@ -831,21 +952,9 @@ static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id)
while (curr_index != RLB_NULL_INDEX) {
struct rlb_client_info *curr = &(bond_info->rx_hashtbl[curr_index]);
u32 next_index = bond_info->rx_hashtbl[curr_index].next;
- u32 prev_index = bond_info->rx_hashtbl[curr_index].prev;
-
- if (curr->tag && (curr->vlan_id == vlan_id)) {
- if (curr_index == bond_info->rx_hashtbl_head) {
- bond_info->rx_hashtbl_head = next_index;
- }
- if (prev_index != RLB_NULL_INDEX) {
- bond_info->rx_hashtbl[prev_index].next = next_index;
- }
- if (next_index != RLB_NULL_INDEX) {
- bond_info->rx_hashtbl[next_index].prev = prev_index;
- }
- rlb_init_table_entry(curr);
- }
+ if (curr->tag && (curr->vlan_id == vlan_id))
+ rlb_delete_table_entry(bond, curr_index);
curr_index = next_index;
}
diff --git a/drivers/net/bonding/bond_alb.h b/drivers/net/bonding/bond_alb.h
index 90f140a..8286df52 100644
--- a/drivers/net/bonding/bond_alb.h
+++ b/drivers/net/bonding/bond_alb.h
@@ -100,6 +100,7 @@ struct tlb_client_info {
struct rlb_client_info {
__be32 ip_src; /* the server IP address */
__be32 ip_dst; /* the client IP address */
+ u8 mac_src[ETH_ALEN]; /* the server MAC address */
u8 mac_dst[ETH_ALEN]; /* the client MAC address */
u32 next; /* The next Hash table entry index */
u32 prev; /* The previous Hash table entry index */
@@ -108,6 +109,9 @@ struct rlb_client_info {
struct slave *slave; /* the slave assigned to this client */
u8 tag; /* flag - need to tag skb */
unsigned short vlan_id; /* VLAN tag associated with IP address */
+ u32 reverse_next; /* next entry with same hash(ip_src) */
+ u32 reverse_prev; /* prev entry with same hash(ip_src) */
+ u32 reverse_first; /* first entry with hash(ip_src) == this entry's index */
};
struct tlb_slave_info {
--
Jiri Bohac <jbohac@suse.cz>
SUSE Labs, SUSE CZ
^ permalink raw reply related [flat|nested] 17+ messages in thread
* Re: [PATCH][RFC] bonding: delete migrated IP addresses from the rlb hash table
2012-04-20 18:56 ` Jiri Bohac
@ 2012-04-26 20:18 ` Jay Vosburgh
2012-04-27 21:03 ` Jiri Bohac
0 siblings, 1 reply; 17+ messages in thread
From: Jay Vosburgh @ 2012-04-26 20:18 UTC (permalink / raw)
To: Jiri Bohac; +Cc: Andy Gospodarek, netdev
Jiri Bohac <jbohac@suse.cz> wrote:
>I finally got back to this, sorry for such a long delay ...
>Start of this thread is here:
>http://thread.gmane.org/gmane.linux.network/222233
>
>On Wed, Mar 07, 2012 at 05:02:16PM +0100, Jiri Bohac wrote:
>> On Wed, Feb 29, 2012 at 06:12:20PM -0800, Jay Vosburgh wrote:
>> > I've done some initial testing with this, and so far I'm seeing
>> > one problem: every time the local host (with bonding) sends a broadcast
>> > ARP, that ends up flushing the entire RLB table. Well, all entries that
>> > match the IP on the bond that's sending an ARP request, which is just
>> > one address in my testing.
>> >
>> > Anyway, this happens because the switch forwards the broadcast
>> > ARP back around to one of the other bond slaves, and then that
>> > "incoming" ARP bears an ip_src of our already-in-use IP address, and
>> > that matches everything in the table.
>>
>> Good catch! I did not notice this.
>>
>> > Perhaps a check that the ip_src being flushed is not actually in
>> > use locally is warranted?
>>
>> This would not work for the setups where the bonding master is
>> bridget to some other network. I think it would be better to also
>> store the source (server) MAC address in struct client_info and
>> only flush the hash table entries if the MAC address from the
>> incoming APR packet and the source MAC address stored in the hash
>> table differ.
Just to make sure I understand: the additional check you propose
(beyond a check that the IP source address is not locally in use) is for
the purpose of minimizing unnecessary flushes, by insuring that the
address really has moved. Correct?
>> Updated patch follows (compile-tested only) I also
>> fixed the coding style problems you pointed out. As for the
>> forward/reverse naming, it's your call. Should I change it to
>> src/dst?
>
>I now thoroughly tested this updated patch in a real setup. It
>works as intended - it does not purge the entries when receiving the
>looped-back ARP requests. It correctly purges the offending
>entries when an ARP packet arrives with a its SRC IP address
>matching a SRC IP address in the table and having differrent MAC
>address -- exactly the case that would cause the ARP cache
>poisoning if not purged from the table.
>
>Jay, would you please consider applying this? Or do you want me
>to somehow rename the RLB table forward/reverse elements?
I'm going to give this a spin this afternoon, but just skimming
through it, I'm still not that thrilled about the "forward" and
"reverse" terminology applying to "hash by dst" and "hash by src"; why
not just call 'em "dst_next" and "src_next", et al, and cut out the
middle man?
-J
>Re-sending the fixed patch with the original description:
>
>------
>
>
>Bonding in balance-alb mode records information from ARP packets
>passing through the bond in a hash table (rx_hashtbl).
>
>At certain situations (e.g. link change of a slave),
>rlb_update_rx_clients() will send out ARP packets to update ARP
>caches of other hosts on the network to achieve RX load balancing.
>
>The problem is that once an IP address is recorded in the hash
>table, it stays there indefinitely. If this IP address is
>migrated to a different host in the network, bonding still sends
>out ARP packets that poison other systems' ARP caches with
>invalid information.
>
>This patch solves this by looking at all incoming ARP packets,
>and checking if the source IP address is one of the source
>addresses stored in the rx_hashtbl. If it is, the corresponding
>hash table entries are removed. Thus, when an IP address is
>migrated, the first ARP broadcast by its new owner will purge the
>offending entries of rx_hashtbl.
>
>The hash table is hashed by ip_dst. To be able to do the above
>check efficiently (not walking the whole hash table), we need a
>reverse mapping (by ip_src).
>
>I added three new members in struct rlb_client_info:
> rx_hashtbl[x].reverse_first will point to the start of a list of
> entries for which hash(ip_src) == x.
> The list is linked with reverse_next and reverse_prev.
>
>When an incoming ARP packet arrives at rlb_arp_recv()
>rlb_purge_src_ip() can quickly walk only the entries on the
>corresponding lists, i.e. the entries that are likely to contain
>the offending IP address.
>
>Signed-off-by: Jiri Bohac <jbohac@suse.cz>
>
>diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
>index f820b26..a938ab6 100644
>--- a/drivers/net/bonding/bond_alb.c
>+++ b/drivers/net/bonding/bond_alb.c
>@@ -84,6 +84,9 @@ static inline struct arp_pkt *arp_pkt(const struct sk_buff *skb)
>
> /* Forward declaration */
> static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[]);
>+static void rlb_purge_src_ip(struct bonding *bond, struct arp_pkt *arp);
>+static void rlb_delete_table_entry_reverse(struct bonding *bond, u32 index);
>+static void rlb_set_reverse_entry(struct bonding *bond, u32 ip_src_hash, u32 ip_dst_hash);
>
> static inline u8 _simple_hash(const u8 *hash_start, int hash_size)
> {
>@@ -366,6 +369,17 @@ static void rlb_arp_recv(struct sk_buff *skb, struct bonding *bond,
> return;
> }
>
>+ /* We received an ARP from arp->ip_src.
>+ * We might have used this IP address previously (on the bonding host
>+ * itself or on a system that is bridged together with the bond).
>+ * However, if arp->mac_src is different than what is stored in
>+ * rx_hashtbl, some other host is now using the IP and we must prevent
>+ * sending out client updates with this IP address as the source.
>+ * Clean up all hash table entries that have this address as ip_src but
>+ * have a dirrerent mac_src.
>+ */
>+ rlb_purge_src_ip(bond, arp);
>+
> if (arp->op_code == htons(ARPOP_REPLY)) {
> /* update rx hash table for this ARP */
> rlb_update_entry_from_arp(bond, arp);
>@@ -635,6 +649,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
> /* update mac address from arp */
> memcpy(client_info->mac_dst, arp->mac_dst, ETH_ALEN);
> }
>+ memcpy(client_info->mac_src, arp->mac_src, ETH_ALEN);
>
> assigned_slave = client_info->slave;
> if (assigned_slave) {
>@@ -657,6 +672,13 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
> assigned_slave = rlb_next_rx_slave(bond);
>
> if (assigned_slave) {
>+ if (!(client_info->assigned && client_info->ip_src == arp->ip_src)) {
>+ /* ip_src is going to be updated, fix the reverse hashing */
>+ u32 hash_src = _simple_hash((u8 *)&arp->ip_src, sizeof(arp->ip_src));
>+ rlb_delete_table_entry_reverse(bond, hash_index);
>+ rlb_set_reverse_entry(bond, hash_src, hash_index);
>+ }
>+
> client_info->ip_src = arp->ip_src;
> client_info->ip_dst = arp->ip_dst;
> /* arp->mac_dst is broadcast for arp reqeusts.
>@@ -664,6 +686,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
> * upon receiving an arp reply.
> */
> memcpy(client_info->mac_dst, arp->mac_dst, ETH_ALEN);
>+ memcpy(client_info->mac_src, arp->mac_src, ETH_ALEN);
> client_info->slave = assigned_slave;
>
> if (compare_ether_addr_64bits(client_info->mac_dst, mac_bcast)) {
>@@ -769,11 +792,109 @@ static void rlb_rebalance(struct bonding *bond)
> }
>
> /* Caller must hold rx_hashtbl lock */
>-static void rlb_init_table_entry(struct rlb_client_info *entry)
>+static void rlb_init_table_entry_forward(struct rlb_client_info *entry)
> {
>- memset(entry, 0, sizeof(struct rlb_client_info));
> entry->next = RLB_NULL_INDEX;
> entry->prev = RLB_NULL_INDEX;
>+ entry->assigned = 0;
>+ entry->slave = NULL;
>+ entry->tag = 0;
>+}
>+static void rlb_init_table_entry_reverse(struct rlb_client_info *entry)
>+{
>+ entry->reverse_first = RLB_NULL_INDEX;
>+ entry->reverse_prev = RLB_NULL_INDEX;
>+ entry->reverse_next = RLB_NULL_INDEX;
>+}
>+
>+static void rlb_init_table_entry(struct rlb_client_info *entry)
>+{
>+ memset(entry, 0, sizeof(struct rlb_client_info));
>+ rlb_init_table_entry_forward(entry);
>+ rlb_init_table_entry_reverse(entry);
>+}
>+
>+static void rlb_delete_table_entry_forward(struct bonding *bond, u32 index)
>+{
>+ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
>+ u32 next_index = bond_info->rx_hashtbl[index].next;
>+ u32 prev_index = bond_info->rx_hashtbl[index].prev;
>+
>+ if (index == bond_info->rx_hashtbl_head)
>+ bond_info->rx_hashtbl_head = next_index;
>+ if (prev_index != RLB_NULL_INDEX)
>+ bond_info->rx_hashtbl[prev_index].next = next_index;
>+ if (next_index != RLB_NULL_INDEX)
>+ bond_info->rx_hashtbl[next_index].prev = prev_index;
>+}
>+
>+static void rlb_delete_table_entry_reverse(struct bonding *bond, u32 index)
>+{
>+ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
>+ u32 next_index = bond_info->rx_hashtbl[index].reverse_next;
>+ u32 prev_index = bond_info->rx_hashtbl[index].reverse_prev;
>+
>+ bond_info->rx_hashtbl[index].reverse_next = RLB_NULL_INDEX;
>+ bond_info->rx_hashtbl[index].reverse_prev = RLB_NULL_INDEX;
>+
>+ if (next_index != RLB_NULL_INDEX)
>+ bond_info->rx_hashtbl[next_index].reverse_prev = prev_index;
>+
>+ if (prev_index == RLB_NULL_INDEX)
>+ return;
>+
>+ /* is prev_index pointing to the head of this chain? */
>+ if (bond_info->rx_hashtbl[prev_index].reverse_first == index)
>+ bond_info->rx_hashtbl[prev_index].reverse_first = next_index;
>+ else
>+ bond_info->rx_hashtbl[prev_index].reverse_next = next_index;
>+
>+}
>+
>+static void rlb_delete_table_entry(struct bonding *bond, u32 index)
>+{
>+ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
>+ struct rlb_client_info *entry = &(bond_info->rx_hashtbl[index]);
>+
>+ rlb_delete_table_entry_forward(bond, index);
>+ rlb_init_table_entry_forward(entry);
>+
>+ rlb_delete_table_entry_reverse(bond, index);
>+}
>+
>+static void rlb_set_reverse_entry(struct bonding *bond, u32 ip_src_hash, u32 ip_dst_hash)
>+{
>+ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
>+ u32 next;
>+
>+ bond_info->rx_hashtbl[ip_dst_hash].reverse_prev = ip_src_hash;
>+ next = bond_info->rx_hashtbl[ip_src_hash].reverse_first;
>+ bond_info->rx_hashtbl[ip_dst_hash].reverse_next = next;
>+ if (next != RLB_NULL_INDEX)
>+ bond_info->rx_hashtbl[next].reverse_prev = ip_dst_hash;
>+ bond_info->rx_hashtbl[ip_src_hash].reverse_first = ip_dst_hash;
>+}
>+
>+/* deletes all rx_hashtbl entries with arp->ip_src if their mac_src does
>+ * not match arp->mac_src */
>+static void rlb_purge_src_ip(struct bonding *bond, struct arp_pkt *arp)
>+{
>+ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
>+ u32 ip_src_hash = _simple_hash((u8*)&(arp->ip_src), sizeof(arp->ip_src));
>+ u32 index;
>+
>+ _lock_rx_hashtbl_bh(bond);
>+
>+ index = bond_info->rx_hashtbl[ip_src_hash].reverse_first;
>+ while (index != RLB_NULL_INDEX) {
>+ struct rlb_client_info *entry = &(bond_info->rx_hashtbl[index]);
>+ u32 next_index = entry->reverse_next;
>+ if (entry->ip_src == arp->ip_src &&
>+ compare_ether_addr_64bits(arp->mac_src, entry->mac_src))
>+ rlb_delete_table_entry(bond, index);
>+ index = next_index;
>+ }
>+ _unlock_rx_hashtbl_bh(bond);
> }
>
> static int rlb_initialize(struct bonding *bond)
>@@ -831,21 +952,9 @@ static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id)
> while (curr_index != RLB_NULL_INDEX) {
> struct rlb_client_info *curr = &(bond_info->rx_hashtbl[curr_index]);
> u32 next_index = bond_info->rx_hashtbl[curr_index].next;
>- u32 prev_index = bond_info->rx_hashtbl[curr_index].prev;
>-
>- if (curr->tag && (curr->vlan_id == vlan_id)) {
>- if (curr_index == bond_info->rx_hashtbl_head) {
>- bond_info->rx_hashtbl_head = next_index;
>- }
>- if (prev_index != RLB_NULL_INDEX) {
>- bond_info->rx_hashtbl[prev_index].next = next_index;
>- }
>- if (next_index != RLB_NULL_INDEX) {
>- bond_info->rx_hashtbl[next_index].prev = prev_index;
>- }
>
>- rlb_init_table_entry(curr);
>- }
>+ if (curr->tag && (curr->vlan_id == vlan_id))
>+ rlb_delete_table_entry(bond, curr_index);
>
> curr_index = next_index;
> }
>diff --git a/drivers/net/bonding/bond_alb.h b/drivers/net/bonding/bond_alb.h
>index 90f140a..8286df52 100644
>--- a/drivers/net/bonding/bond_alb.h
>+++ b/drivers/net/bonding/bond_alb.h
>@@ -100,6 +100,7 @@ struct tlb_client_info {
> struct rlb_client_info {
> __be32 ip_src; /* the server IP address */
> __be32 ip_dst; /* the client IP address */
>+ u8 mac_src[ETH_ALEN]; /* the server MAC address */
> u8 mac_dst[ETH_ALEN]; /* the client MAC address */
> u32 next; /* The next Hash table entry index */
> u32 prev; /* The previous Hash table entry index */
>@@ -108,6 +109,9 @@ struct rlb_client_info {
> struct slave *slave; /* the slave assigned to this client */
> u8 tag; /* flag - need to tag skb */
> unsigned short vlan_id; /* VLAN tag associated with IP address */
>+ u32 reverse_next; /* next entry with same hash(ip_src) */
>+ u32 reverse_prev; /* prev entry with same hash(ip_src) */
>+ u32 reverse_first; /* first entry with hash(ip_src) == this entry's index */
> };
>
> struct tlb_slave_info {
>
>
>
>--
>Jiri Bohac <jbohac@suse.cz>
>SUSE Labs, SUSE CZ
>
---
-Jay Vosburgh, IBM Linux Technology Center, fubar@us.ibm.com
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH][RFC] bonding: delete migrated IP addresses from the rlb hash table
2012-04-26 20:18 ` Jay Vosburgh
@ 2012-04-27 21:03 ` Jiri Bohac
0 siblings, 0 replies; 17+ messages in thread
From: Jiri Bohac @ 2012-04-27 21:03 UTC (permalink / raw)
To: Jay Vosburgh; +Cc: Jiri Bohac, Andy Gospodarek, netdev
On Thu, Apr 26, 2012 at 01:18:22PM -0700, Jay Vosburgh wrote:
> >On Wed, Mar 07, 2012 at 05:02:16PM +0100, Jiri Bohac wrote:
> >> I think it would be better to also
> >> store the source (server) MAC address in struct client_info and
> >> only flush the hash table entries if the MAC address from the
> >> incoming APR packet and the source MAC address stored in the hash
> >> table differ.
>
> Just to make sure I understand: the additional check you propose
> (beyond a check that the IP source address is not locally in use) is for
> the purpose of minimizing unnecessary flushes, by insuring that the
> address really has moved. Correct?
No. There is no check whether the IP source address is local.
The check looks if the IP source address is stored in the rlb
hash table as ip_src. Could be a result of using the IP address
locally, or by other hosts bridged with the bonding master.
And the additional check that prevents unnecessarry flushes
(caused by ARP packets sent out from the bond being looped back
by a switch) is a check for the MAC address in the ARP packet.
If the MAC address is different from the MAC address stored in
the rlb hash table, it means the host with IP address ip_src does
no longer use mac_src and we must not send out client updates
with this ip_src/mac_src combination.
If the MAC address in the ARP packet is equal to the mac_src, it
could mean two things:
- the IP address has not moved and is still used locally or
bridged to the bonding master; we received this packet because
a switch looped it back to another slave of the bond
- the IP address has actually moved, but the MAC address remained
the same (think of a virtual machine migration, keeping the
virtual NIC's MAC address). We don't mind keeping the
corresponding rlb entry, because the ip/mac combination is
still valid and will not polute ARP caches with invalid
information.
> I'm going to give this a spin this afternoon, but just skimming
> through it, I'm still not that thrilled about the "forward" and
> "reverse" terminology applying to "hash by dst" and "hash by src"; why
> not just call 'em "dst_next" and "src_next", et al, and cut out the
> middle man?
How about these naming changes - patch below:
next -> used_next
prev -> used_prev
rx_hashtbl_head -> rx_hashtbl_used_head
the currect linked list is _not_ a list of hash table
entries with colliding ip_dst. It's a list of entries that are
being used; its purpose is to avoid walking the whole hash table
when looking for used entries;
reverse_next -> src_next
reverse_prev -> src_prev
reverse_first -> src_first
I also renamed some of the functions, e.g.
rlb_src_unlink/rlb_src_link instead of
rlb_delete_table_entry_reverse/rlb_set_reverse_entry
because they actually link the existing entries to the
corresponding linked list.
Signed-off-by: Jiri Bohac <jbohac@suse.cz>
diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index 9abfde4..a7809ae 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -84,6 +84,9 @@ static inline struct arp_pkt *arp_pkt(const struct sk_buff *skb)
/* Forward declaration */
static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[]);
+static void rlb_purge_src_ip(struct bonding *bond, struct arp_pkt *arp);
+static void rlb_src_unlink(struct bonding *bond, u32 index);
+static void rlb_src_link(struct bonding *bond, u32 ip_src_hash, u32 ip_dst_hash);
static inline u8 _simple_hash(const u8 *hash_start, int hash_size)
{
@@ -364,6 +367,17 @@ static void rlb_arp_recv(struct sk_buff *skb, struct bonding *bond,
return;
}
+ /* We received an ARP from arp->ip_src.
+ * We might have used this IP address previously (on the bonding host
+ * itself or on a system that is bridged together with the bond).
+ * However, if arp->mac_src is different than what is stored in
+ * rx_hashtbl, some other host is now using the IP and we must prevent
+ * sending out client updates with this IP address and the old MAC address.
+ * Clean up all hash table entries that have this address as ip_src but
+ * have a dirrerent mac_src.
+ */
+ rlb_purge_src_ip(bond, arp);
+
if (arp->op_code == htons(ARPOP_REPLY)) {
/* update rx hash table for this ARP */
rlb_update_entry_from_arp(bond, arp);
@@ -440,9 +454,9 @@ static void rlb_clear_slave(struct bonding *bond, struct slave *slave)
_lock_rx_hashtbl_bh(bond);
rx_hash_table = bond_info->rx_hashtbl;
- index = bond_info->rx_hashtbl_head;
+ index = bond_info->rx_hashtbl_used_head;
for (; index != RLB_NULL_INDEX; index = next_index) {
- next_index = rx_hash_table[index].next;
+ next_index = rx_hash_table[index].used_next;
if (rx_hash_table[index].slave == slave) {
struct slave *assigned_slave = rlb_next_rx_slave(bond);
@@ -527,8 +541,8 @@ static void rlb_update_rx_clients(struct bonding *bond)
_lock_rx_hashtbl_bh(bond);
- hash_index = bond_info->rx_hashtbl_head;
- for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) {
+ hash_index = bond_info->rx_hashtbl_used_head;
+ for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->used_next) {
client_info = &(bond_info->rx_hashtbl[hash_index]);
if (client_info->ntt) {
rlb_update_client(client_info);
@@ -556,8 +570,8 @@ static void rlb_req_update_slave_clients(struct bonding *bond, struct slave *sla
_lock_rx_hashtbl_bh(bond);
- hash_index = bond_info->rx_hashtbl_head;
- for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) {
+ hash_index = bond_info->rx_hashtbl_used_head;
+ for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->used_next) {
client_info = &(bond_info->rx_hashtbl[hash_index]);
if ((client_info->slave == slave) &&
@@ -586,8 +600,8 @@ static void rlb_req_update_subnet_clients(struct bonding *bond, __be32 src_ip)
_lock_rx_hashtbl(bond);
- hash_index = bond_info->rx_hashtbl_head;
- for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) {
+ hash_index = bond_info->rx_hashtbl_used_head;
+ for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->used_next) {
client_info = &(bond_info->rx_hashtbl[hash_index]);
if (!client_info->slave) {
@@ -633,6 +647,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
/* update mac address from arp */
memcpy(client_info->mac_dst, arp->mac_dst, ETH_ALEN);
}
+ memcpy(client_info->mac_src, arp->mac_src, ETH_ALEN);
assigned_slave = client_info->slave;
if (assigned_slave) {
@@ -655,6 +670,13 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
assigned_slave = rlb_next_rx_slave(bond);
if (assigned_slave) {
+ if (!(client_info->assigned && client_info->ip_src == arp->ip_src)) {
+ /* ip_src is going to be updated, fix the src hash list */
+ u32 hash_src = _simple_hash((u8 *)&arp->ip_src, sizeof(arp->ip_src));
+ rlb_src_unlink(bond, hash_index);
+ rlb_src_link(bond, hash_src, hash_index);
+ }
+
client_info->ip_src = arp->ip_src;
client_info->ip_dst = arp->ip_dst;
/* arp->mac_dst is broadcast for arp reqeusts.
@@ -662,6 +684,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
* upon receiving an arp reply.
*/
memcpy(client_info->mac_dst, arp->mac_dst, ETH_ALEN);
+ memcpy(client_info->mac_src, arp->mac_src, ETH_ALEN);
client_info->slave = assigned_slave;
if (compare_ether_addr_64bits(client_info->mac_dst, mac_bcast)) {
@@ -677,11 +700,11 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
}
if (!client_info->assigned) {
- u32 prev_tbl_head = bond_info->rx_hashtbl_head;
- bond_info->rx_hashtbl_head = hash_index;
- client_info->next = prev_tbl_head;
+ u32 prev_tbl_head = bond_info->rx_hashtbl_used_head;
+ bond_info->rx_hashtbl_used_head = hash_index;
+ client_info->used_next = prev_tbl_head;
if (prev_tbl_head != RLB_NULL_INDEX) {
- bond_info->rx_hashtbl[prev_tbl_head].prev =
+ bond_info->rx_hashtbl[prev_tbl_head].used_prev =
hash_index;
}
client_info->assigned = 1;
@@ -748,8 +771,8 @@ static void rlb_rebalance(struct bonding *bond)
_lock_rx_hashtbl_bh(bond);
ntt = 0;
- hash_index = bond_info->rx_hashtbl_head;
- for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) {
+ hash_index = bond_info->rx_hashtbl_used_head;
+ for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->used_next) {
client_info = &(bond_info->rx_hashtbl[hash_index]);
assigned_slave = rlb_next_rx_slave(bond);
if (assigned_slave && (client_info->slave != assigned_slave)) {
@@ -767,11 +790,113 @@ static void rlb_rebalance(struct bonding *bond)
}
/* Caller must hold rx_hashtbl lock */
+static void rlb_init_table_entry_dst(struct rlb_client_info *entry)
+{
+ entry->used_next = RLB_NULL_INDEX;
+ entry->used_prev = RLB_NULL_INDEX;
+ entry->assigned = 0;
+ entry->slave = NULL;
+ entry->tag = 0;
+}
+static void rlb_init_table_entry_src(struct rlb_client_info *entry)
+{
+ entry->src_first = RLB_NULL_INDEX;
+ entry->src_prev = RLB_NULL_INDEX;
+ entry->src_next = RLB_NULL_INDEX;
+}
+
static void rlb_init_table_entry(struct rlb_client_info *entry)
{
memset(entry, 0, sizeof(struct rlb_client_info));
- entry->next = RLB_NULL_INDEX;
- entry->prev = RLB_NULL_INDEX;
+ rlb_init_table_entry_dst(entry);
+ rlb_init_table_entry_src(entry);
+}
+
+static void rlb_delete_table_entry_dst(struct bonding *bond, u32 index)
+{
+ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
+ u32 next_index = bond_info->rx_hashtbl[index].used_next;
+ u32 prev_index = bond_info->rx_hashtbl[index].used_prev;
+
+ if (index == bond_info->rx_hashtbl_used_head)
+ bond_info->rx_hashtbl_used_head = next_index;
+ if (prev_index != RLB_NULL_INDEX)
+ bond_info->rx_hashtbl[prev_index].used_next = next_index;
+ if (next_index != RLB_NULL_INDEX)
+ bond_info->rx_hashtbl[next_index].used_prev = prev_index;
+}
+
+/* unlink a rlb hash table entry from the src list */
+static void rlb_src_unlink(struct bonding *bond, u32 index)
+{
+ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
+ u32 next_index = bond_info->rx_hashtbl[index].src_next;
+ u32 prev_index = bond_info->rx_hashtbl[index].src_prev;
+
+ bond_info->rx_hashtbl[index].src_next = RLB_NULL_INDEX;
+ bond_info->rx_hashtbl[index].src_prev = RLB_NULL_INDEX;
+
+ if (next_index != RLB_NULL_INDEX)
+ bond_info->rx_hashtbl[next_index].src_prev = prev_index;
+
+ if (prev_index == RLB_NULL_INDEX)
+ return;
+
+ /* is prev_index pointing to the head of this list? */
+ if (bond_info->rx_hashtbl[prev_index].src_first == index)
+ bond_info->rx_hashtbl[prev_index].src_first = next_index;
+ else
+ bond_info->rx_hashtbl[prev_index].src_next = next_index;
+
+}
+
+static void rlb_delete_table_entry(struct bonding *bond, u32 index)
+{
+ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
+ struct rlb_client_info *entry = &(bond_info->rx_hashtbl[index]);
+
+ rlb_delete_table_entry_dst(bond, index);
+ rlb_init_table_entry_dst(entry);
+
+ rlb_src_unlink(bond, index);
+}
+
+/* add the rx_hashtbl[ip_dst_hash] entry to the list
+ * of entries with identical ip_src_hash
+ */
+static void rlb_src_link(struct bonding *bond, u32 ip_src_hash, u32 ip_dst_hash)
+{
+ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
+ u32 next;
+
+ bond_info->rx_hashtbl[ip_dst_hash].src_prev = ip_src_hash;
+ next = bond_info->rx_hashtbl[ip_src_hash].src_first;
+ bond_info->rx_hashtbl[ip_dst_hash].src_next = next;
+ if (next != RLB_NULL_INDEX)
+ bond_info->rx_hashtbl[next].src_prev = ip_dst_hash;
+ bond_info->rx_hashtbl[ip_src_hash].src_first = ip_dst_hash;
+}
+
+/* deletes all rx_hashtbl entries with arp->ip_src if their mac_src does
+ * not match arp->mac_src */
+static void rlb_purge_src_ip(struct bonding *bond, struct arp_pkt *arp)
+{
+ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
+ u32 ip_src_hash = _simple_hash((u8*)&(arp->ip_src), sizeof(arp->ip_src));
+ u32 index;
+
+ _lock_rx_hashtbl_bh(bond);
+
+ index = bond_info->rx_hashtbl[ip_src_hash].src_first;
+ while (index != RLB_NULL_INDEX) {
+ struct rlb_client_info *entry = &(bond_info->rx_hashtbl[index]);
+ u32 next_index = entry->src_next;
+ if (entry->ip_src == arp->ip_src &&
+ compare_ether_addr_64bits(arp->mac_src, entry->mac_src))
+ rlb_delete_table_entry(bond, index);
+ index = next_index;
+ }
+ _unlock_rx_hashtbl_bh(bond);
}
static int rlb_initialize(struct bonding *bond)
@@ -789,7 +914,7 @@ static int rlb_initialize(struct bonding *bond)
bond_info->rx_hashtbl = new_hashtbl;
- bond_info->rx_hashtbl_head = RLB_NULL_INDEX;
+ bond_info->rx_hashtbl_used_head = RLB_NULL_INDEX;
for (i = 0; i < RLB_HASH_TABLE_SIZE; i++) {
rlb_init_table_entry(bond_info->rx_hashtbl + i);
@@ -811,7 +936,7 @@ static void rlb_deinitialize(struct bonding *bond)
kfree(bond_info->rx_hashtbl);
bond_info->rx_hashtbl = NULL;
- bond_info->rx_hashtbl_head = RLB_NULL_INDEX;
+ bond_info->rx_hashtbl_used_head = RLB_NULL_INDEX;
_unlock_rx_hashtbl_bh(bond);
}
@@ -823,25 +948,13 @@ static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id)
_lock_rx_hashtbl_bh(bond);
- curr_index = bond_info->rx_hashtbl_head;
+ curr_index = bond_info->rx_hashtbl_used_head;
while (curr_index != RLB_NULL_INDEX) {
struct rlb_client_info *curr = &(bond_info->rx_hashtbl[curr_index]);
- u32 next_index = bond_info->rx_hashtbl[curr_index].next;
- u32 prev_index = bond_info->rx_hashtbl[curr_index].prev;
-
- if (curr->tag && (curr->vlan_id == vlan_id)) {
- if (curr_index == bond_info->rx_hashtbl_head) {
- bond_info->rx_hashtbl_head = next_index;
- }
- if (prev_index != RLB_NULL_INDEX) {
- bond_info->rx_hashtbl[prev_index].next = next_index;
- }
- if (next_index != RLB_NULL_INDEX) {
- bond_info->rx_hashtbl[next_index].prev = prev_index;
- }
+ u32 next_index = bond_info->rx_hashtbl[curr_index].used_next;
- rlb_init_table_entry(curr);
- }
+ if (curr->tag && (curr->vlan_id == vlan_id))
+ rlb_delete_table_entry(bond, curr_index);
curr_index = next_index;
}
diff --git a/drivers/net/bonding/bond_alb.h b/drivers/net/bonding/bond_alb.h
index 90f140a..1fbc938 100644
--- a/drivers/net/bonding/bond_alb.h
+++ b/drivers/net/bonding/bond_alb.h
@@ -100,9 +100,18 @@ struct tlb_client_info {
struct rlb_client_info {
__be32 ip_src; /* the server IP address */
__be32 ip_dst; /* the client IP address */
+ u8 mac_src[ETH_ALEN]; /* the server MAC address */
u8 mac_dst[ETH_ALEN]; /* the client MAC address */
- u32 next; /* The next Hash table entry index */
- u32 prev; /* The previous Hash table entry index */
+
+ /* list of used hash table entries, starting at rx_hashtbl_used_head */
+ u32 used_next;
+ u32 used_prev;
+
+ /* ip_src based hashing */
+ u32 src_next; /* next entry with same hash(ip_src) */
+ u32 src_prev; /* prev entry with same hash(ip_src) */
+ u32 src_first; /* first entry with hash(ip_src) == this entry's index */
+
u8 assigned; /* checking whether this entry is assigned */
u8 ntt; /* flag - need to transmit client info */
struct slave *slave; /* the slave assigned to this client */
@@ -131,7 +140,7 @@ struct alb_bond_info {
int rlb_enabled;
struct rlb_client_info *rx_hashtbl; /* Receive hash table */
spinlock_t rx_hashtbl_lock;
- u32 rx_hashtbl_head;
+ u32 rx_hashtbl_used_head;
u8 rx_ntt; /* flag - need to transmit
* to all rx clients
*/
diff --git a/drivers/net/bonding/bond_debugfs.c b/drivers/net/bonding/bond_debugfs.c
index 3680aa2..a570843 100644
--- a/drivers/net/bonding/bond_debugfs.c
+++ b/drivers/net/bonding/bond_debugfs.c
@@ -31,8 +31,8 @@ static int bond_debug_rlb_hash_show(struct seq_file *m, void *v)
spin_lock_bh(&(BOND_ALB_INFO(bond).rx_hashtbl_lock));
- hash_index = bond_info->rx_hashtbl_head;
- for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) {
+ hash_index = bond_info->rx_hashtbl_used_head;
+ for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->used_next) {
client_info = &(bond_info->rx_hashtbl[hash_index]);
seq_printf(m, "%-15pI4 %-15pI4 %-17pM %s\n",
&client_info->ip_src,
--
Jiri Bohac <jbohac@suse.cz>
SUSE Labs, SUSE CZ
^ permalink raw reply related [flat|nested] 17+ messages in thread
end of thread, other threads:[~2012-04-27 21:03 UTC | newest]
Thread overview: 17+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-02-27 17:34 [PATCH][RFC] bonding: delete migrated IP addresses from the rlb hash table Jiri Bohac
2012-02-27 17:46 ` bonding: should rlb rx_hashtbl be reimplemented? Jiri Bohac
2012-02-29 13:55 ` [PATCH net] bonding:update rlb entry for arp request Weiping Pan
2012-02-29 18:26 ` Jiri Bohac
2012-02-29 13:58 ` [PATCH][RFC] bonding: delete migrated IP addresses from the rlb hash table WeipingPan
2012-02-29 18:49 ` Jiri Bohac
2012-02-29 18:59 ` Jay Vosburgh
2012-03-01 2:12 ` Jay Vosburgh
2012-03-01 3:58 ` WeipingPan
2012-03-01 4:01 ` [PATCH net] delete rlb entry if ip of bonding is deleted Weiping Pan
2012-03-01 4:19 ` Jay Vosburgh
2012-03-07 16:02 ` [PATCH][RFC] bonding: delete migrated IP addresses from the rlb hash table Jiri Bohac
2012-04-20 18:56 ` Jiri Bohac
2012-04-26 20:18 ` Jay Vosburgh
2012-04-27 21:03 ` Jiri Bohac
2012-03-23 7:10 ` WeipingPan
2012-03-23 10:33 ` Jiri Bohac
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).