From: Jay Vosburgh <jay.vosburgh@canonical.com>
To: sujing <sujing@kylinos.cn>
Cc: davem@davemloft.net, edumazet@google.com, kuba@kernel.org,
pabeni@redhat.com, andy@greyhouse.net, netdev@vger.kernel.org,
linux-kernel@vger.kernel.org
Subject: Re: [PATCH] net: bonding: avoid use-after-free with tx_hashtbl/rx_hashtbl
Date: Mon, 27 Mar 2023 21:38:04 -0700 [thread overview]
Message-ID: <23772.1679978284@famine> (raw)
In-Reply-To: <20230328034037.2076930-1-sujing@kylinos.cn>
sujing <sujing@kylinos.cn> wrote:
>In bonding mode 6 (Balance-alb),
>there are some potential race conditions between the 'bond_close' process
>and the tx/rx processes that use tx_hashtbl/rx_hashtbl,
>which may lead to use-after-free.
>
>For instance, when the bond6 device is in the 'bond_close' process
>while some backlogged packets from upper level are transmitted
>to 'bond_start_xmit', there is a spinlock contention between
>'tlb_deinitialize' and 'tlb_choose_channel'.
>
>If 'tlb_deinitialize' preempts the lock before 'tlb_choose_channel',
>a NULL pointer kernel panic will be triggered.
>
>Here's the timeline:
>
>bond_close ------------------ bond_start_xmit
>bond_alb_deinitialize ------- __bond_start_xmit
>tlb_deinitialize ------------ bond_alb_xmit
>spin_lock_bh ---------------- bond_xmit_alb_slave_get
>tx_hashtbl = NULL ----------- tlb_choose_channel
>spin_unlock_bh -------------- //wait for spin_lock_bh
>------------------------------ spin_lock_bh
>------------------------------ __tlb_choose_channel
>causing kernel panic ========> tx_hashtbl[hash_index].tx_slave
>------------------------------ spin_unlock_bh
I'm still thinking on the race here, but have some questions
below about the implementation in the meantime.
>Signed-off-by: sujing <sujing@kylinos.cn>
>---
> drivers/net/bonding/bond_alb.c | 32 +++++++++------------------
> drivers/net/bonding/bond_main.c | 39 +++++++++++++++++++++++++++------
> include/net/bond_alb.h | 5 ++++-
> 3 files changed, 46 insertions(+), 30 deletions(-)
>
>diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
>index b9dbad3a8af8..f6ff5ea835c4 100644
>--- a/drivers/net/bonding/bond_alb.c
>+++ b/drivers/net/bonding/bond_alb.c
>@@ -71,7 +71,7 @@ static inline u8 _simple_hash(const u8 *hash_start, int hash_size)
>
> /*********************** tlb specific functions ***************************/
>
>-static inline void tlb_init_table_entry(struct tlb_client_info *entry, int save_load)
>+void tlb_init_table_entry(struct tlb_client_info *entry, int save_load)
> {
> if (save_load) {
> entry->load_history = 1 + entry->tx_bytes /
>@@ -269,8 +269,8 @@ static void rlb_update_entry_from_arp(struct bonding *bond, struct arp_pkt *arp)
> spin_unlock_bh(&bond->mode_lock);
> }
>
>-static int rlb_arp_recv(const struct sk_buff *skb, struct bonding *bond,
>- struct slave *slave)
>+int rlb_arp_recv(const struct sk_buff *skb, struct bonding *bond,
>+ struct slave *slave)
> {
> struct arp_pkt *arp, _arp;
>
>@@ -756,7 +756,7 @@ static void rlb_init_table_entry_src(struct rlb_client_info *entry)
> entry->src_next = RLB_NULL_INDEX;
> }
>
>-static void rlb_init_table_entry(struct rlb_client_info *entry)
>+void rlb_init_table_entry(struct rlb_client_info *entry)
> {
> memset(entry, 0, sizeof(struct rlb_client_info));
> rlb_init_table_entry_dst(entry);
>@@ -874,9 +874,6 @@ static int rlb_initialize(struct bonding *bond)
>
> spin_unlock_bh(&bond->mode_lock);
>
>- /* register to receive ARPs */
>- bond->recv_probe = rlb_arp_recv;
>-
> return 0;
> }
>
>@@ -888,7 +885,6 @@ static void rlb_deinitialize(struct bonding *bond)
>
> kfree(bond_info->rx_hashtbl);
> bond_info->rx_hashtbl = NULL;
>- bond_info->rx_hashtbl_used_head = RLB_NULL_INDEX;
Why remove this line?
> spin_unlock_bh(&bond->mode_lock);
> }
>@@ -1303,7 +1299,7 @@ static bool alb_determine_nd(struct sk_buff *skb, struct bonding *bond)
>
> /************************ exported alb functions ************************/
>
>-int bond_alb_initialize(struct bonding *bond, int rlb_enabled)
>+int bond_alb_initialize(struct bonding *bond)
> {
> int res;
>
>@@ -1311,15 +1307,10 @@ int bond_alb_initialize(struct bonding *bond, int rlb_enabled)
> if (res)
> return res;
>
>- if (rlb_enabled) {
>- res = rlb_initialize(bond);
>- if (res) {
>- tlb_deinitialize(bond);
>- return res;
>- }
>- bond->alb_info.rlb_enabled = 1;
>- } else {
>- bond->alb_info.rlb_enabled = 0;
>+ res = rlb_initialize(bond);
>+ if (res) {
>+ tlb_deinitialize(bond);
>+ return res;
> }
>
> return 0;
>@@ -1327,12 +1318,9 @@ int bond_alb_initialize(struct bonding *bond, int rlb_enabled)
>
> void bond_alb_deinitialize(struct bonding *bond)
> {
>- struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
>-
> tlb_deinitialize(bond);
>
>- if (bond_info->rlb_enabled)
>- rlb_deinitialize(bond);
>+ rlb_deinitialize(bond);
Why is rlb_deinitialize() now unconditionally called here and in
bond_alb_initialize()? if rlb_enabled is false, why set up / tear down
the RLB hash table that won't be used?
> }
>
> static netdev_tx_t bond_do_alb_xmit(struct sk_buff *skb, struct bonding *bond,
>diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
>index 236e5219c811..8fcb5d3ac0a2 100644
>--- a/drivers/net/bonding/bond_main.c
>+++ b/drivers/net/bonding/bond_main.c
>@@ -4217,6 +4217,7 @@ static int bond_open(struct net_device *bond_dev)
> struct bonding *bond = netdev_priv(bond_dev);
> struct list_head *iter;
> struct slave *slave;
>+ int i;
>
> if (BOND_MODE(bond) == BOND_MODE_ROUNDROBIN && !bond->rr_tx_counter) {
> bond->rr_tx_counter = alloc_percpu(u32);
>@@ -4239,11 +4240,29 @@ static int bond_open(struct net_device *bond_dev)
> }
>
> if (bond_is_lb(bond)) {
>- /* bond_alb_initialize must be called before the timer
>- * is started.
>- */
>- if (bond_alb_initialize(bond, (BOND_MODE(bond) == BOND_MODE_ALB)))
>- return -ENOMEM;
>+ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
>+
>+ spin_lock_bh(&bond->mode_lock);
>+
>+ for (i = 0; i < TLB_HASH_TABLE_SIZE; i++)
>+ tlb_init_table_entry(&bond_info->tx_hashtbl[i], 0);
>+
>+ spin_unlock_bh(&bond->mode_lock);
>+
>+ if (BOND_MODE(bond) == BOND_MODE_ALB) {
>+ bond->alb_info.rlb_enabled = 1;
>+ spin_lock_bh(&bond->mode_lock);
>+
>+ bond_info->rx_hashtbl_used_head = RLB_NULL_INDEX;
>+ for (i = 0; i < RLB_HASH_TABLE_SIZE; i++)
>+ rlb_init_table_entry(bond_info->rx_hashtbl + i);
>+
>+ spin_unlock_bh(&bond->mode_lock);
>+ bond->recv_probe = rlb_arp_recv;
>+ } else {
>+ bond->alb_info.rlb_enabled = 0;
>+ }
>+
Why is all of the above done directly in bond_open() and not in
bond_alb.c somewhere? That would reduce some churn (changing some
functions away from static).
Also, I see that bond_alb_initialize() is now called from
bond_init() instead of bond_open(), and it only calls rlb_initialize().
However, this now duplicates most of the functionality of
rlb_initialize() and tlb_initialize() here. Why?
In general, the described race is TX vs. close processing, so
why is there so much change to the open processing?
-J
> if (bond->params.tlb_dynamic_lb || BOND_MODE(bond) == BOND_MODE_ALB)
> queue_delayed_work(bond->wq, &bond->alb_work, 0);
> }
>@@ -4279,8 +4298,6 @@ static int bond_close(struct net_device *bond_dev)
>
> bond_work_cancel_all(bond);
> bond->send_peer_notif = 0;
>- if (bond_is_lb(bond))
>- bond_alb_deinitialize(bond);
> bond->recv_probe = NULL;
>
> if (bond_uses_primary(bond)) {
>@@ -5854,6 +5871,8 @@ static void bond_uninit(struct net_device *bond_dev)
> struct list_head *iter;
> struct slave *slave;
>
>+ bond_alb_deinitialize(bond);
>+
> bond_netpoll_cleanup(bond_dev);
>
> /* Release the bonded slaves */
>@@ -6295,6 +6314,12 @@ static int bond_init(struct net_device *bond_dev)
> bond_dev->addr_assign_type == NET_ADDR_PERM)
> eth_hw_addr_random(bond_dev);
>
>+ /* bond_alb_initialize must be called before the timer
>+ * is started.
>+ */
>+ if (bond_alb_initialize(bond))
>+ return -ENOMEM;
>+
> return 0;
> }
>
>diff --git a/include/net/bond_alb.h b/include/net/bond_alb.h
>index 9dc082b2d543..9fd16e20ef82 100644
>--- a/include/net/bond_alb.h
>+++ b/include/net/bond_alb.h
>@@ -150,7 +150,7 @@ struct alb_bond_info {
> */
> };
>
>-int bond_alb_initialize(struct bonding *bond, int rlb_enabled);
>+int bond_alb_initialize(struct bonding *bond);
> void bond_alb_deinitialize(struct bonding *bond);
> int bond_alb_init_slave(struct bonding *bond, struct slave *slave);
> void bond_alb_deinit_slave(struct bonding *bond, struct slave *slave);
>@@ -165,5 +165,8 @@ struct slave *bond_xmit_tlb_slave_get(struct bonding *bond,
> void bond_alb_monitor(struct work_struct *);
> int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr);
> void bond_alb_clear_vlan(struct bonding *bond, unsigned short vlan_id);
>+int rlb_arp_recv(const struct sk_buff *skb, struct bonding *bond, struct slave *slave);
>+void tlb_init_table_entry(struct tlb_client_info *entry, int save_load);
>+void rlb_init_table_entry(struct rlb_client_info *entry);
> #endif /* _NET_BOND_ALB_H */
>
>--
>2.27.0
>
---
-Jay Vosburgh, jay.vosburgh@canonical.com
next prev parent reply other threads:[~2023-03-28 4:38 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-03-28 3:40 [PATCH] net: bonding: avoid use-after-free with tx_hashtbl/rx_hashtbl sujing
2023-03-28 4:38 ` Jay Vosburgh [this message]
2023-04-12 3:42 ` sujing
2023-04-12 7:06 ` sujing
2023-03-28 8:14 ` Nikolay Aleksandrov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=23772.1679978284@famine \
--to=jay.vosburgh@canonical.com \
--cc=andy@greyhouse.net \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=kuba@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=sujing@kylinos.cn \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.