From: Jay Vosburgh <jay.vosburgh@canonical.com>
To: sujing <sujing@kylinos.cn>
Cc: davem@davemloft.net, edumazet@google.com, kuba@kernel.org,
pabeni@redhat.com, andy@greyhouse.net, netdev@vger.kernel.org,
linux-kernel@vger.kernel.org
Subject: Re: [PATCH] net: bonding: avoid use-after-free with tx_hashtbl/rx_hashtbl
Date: Mon, 27 Mar 2023 21:38:04 -0700 [thread overview]
Message-ID: <23772.1679978284@famine> (raw)
In-Reply-To: <20230328034037.2076930-1-sujing@kylinos.cn>
sujing <sujing@kylinos.cn> wrote:
>In bonding mode 6 (Balance-alb),
>there are some potential race conditions between the 'bond_close' process
>and the tx/rx processes that use tx_hashtbl/rx_hashtbl,
>which may lead to use-after-free.
>
>For instance, when the bond6 device is in the 'bond_close' process
>while some backlogged packets from upper level are transmitted
>to 'bond_start_xmit', there is a spinlock contention between
>'tlb_deinitialize' and 'tlb_choose_channel'.
>
>If 'tlb_deinitialize' preempts the lock before 'tlb_choose_channel',
>a NULL pointer kernel panic will be triggered.
>
>Here's the timeline:
>
>bond_close ------------------ bond_start_xmit
>bond_alb_deinitialize ------- __bond_start_xmit
>tlb_deinitialize ------------ bond_alb_xmit
>spin_lock_bh ---------------- bond_xmit_alb_slave_get
>tx_hashtbl = NULL ----------- tlb_choose_channel
>spin_unlock_bh -------------- //wait for spin_lock_bh
>------------------------------ spin_lock_bh
>------------------------------ __tlb_choose_channel
>causing kernel panic ========> tx_hashtbl[hash_index].tx_slave
>------------------------------ spin_unlock_bh
I'm still thinking on the race here, but have some questions
below about the implementation in the meantime.
>Signed-off-by: sujing <sujing@kylinos.cn>
>---
> drivers/net/bonding/bond_alb.c | 32 +++++++++------------------
> drivers/net/bonding/bond_main.c | 39 +++++++++++++++++++++++++++------
> include/net/bond_alb.h | 5 ++++-
> 3 files changed, 46 insertions(+), 30 deletions(-)
>
>diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
>index b9dbad3a8af8..f6ff5ea835c4 100644
>--- a/drivers/net/bonding/bond_alb.c
>+++ b/drivers/net/bonding/bond_alb.c
>@@ -71,7 +71,7 @@ static inline u8 _simple_hash(const u8 *hash_start, int hash_size)
>
> /*********************** tlb specific functions ***************************/
>
>-static inline void tlb_init_table_entry(struct tlb_client_info *entry, int save_load)
>+void tlb_init_table_entry(struct tlb_client_info *entry, int save_load)
> {
> if (save_load) {
> entry->load_history = 1 + entry->tx_bytes /
>@@ -269,8 +269,8 @@ static void rlb_update_entry_from_arp(struct bonding *bond, struct arp_pkt *arp)
> spin_unlock_bh(&bond->mode_lock);
> }
>
>-static int rlb_arp_recv(const struct sk_buff *skb, struct bonding *bond,
>- struct slave *slave)
>+int rlb_arp_recv(const struct sk_buff *skb, struct bonding *bond,
>+ struct slave *slave)
> {
> struct arp_pkt *arp, _arp;
>
>@@ -756,7 +756,7 @@ static void rlb_init_table_entry_src(struct rlb_client_info *entry)
> entry->src_next = RLB_NULL_INDEX;
> }
>
>-static void rlb_init_table_entry(struct rlb_client_info *entry)
>+void rlb_init_table_entry(struct rlb_client_info *entry)
> {
> memset(entry, 0, sizeof(struct rlb_client_info));
> rlb_init_table_entry_dst(entry);
>@@ -874,9 +874,6 @@ static int rlb_initialize(struct bonding *bond)
>
> spin_unlock_bh(&bond->mode_lock);
>
>- /* register to receive ARPs */
>- bond->recv_probe = rlb_arp_recv;
>-
> return 0;
> }
>
>@@ -888,7 +885,6 @@ static void rlb_deinitialize(struct bonding *bond)
>
> kfree(bond_info->rx_hashtbl);
> bond_info->rx_hashtbl = NULL;
>- bond_info->rx_hashtbl_used_head = RLB_NULL_INDEX;
Why remove this line?
> spin_unlock_bh(&bond->mode_lock);
> }
>@@ -1303,7 +1299,7 @@ static bool alb_determine_nd(struct sk_buff *skb, struct bonding *bond)
>
> /************************ exported alb functions ************************/
>
>-int bond_alb_initialize(struct bonding *bond, int rlb_enabled)
>+int bond_alb_initialize(struct bonding *bond)
> {
> int res;
>
>@@ -1311,15 +1307,10 @@ int bond_alb_initialize(struct bonding *bond, int rlb_enabled)
> if (res)
> return res;
>
>- if (rlb_enabled) {
>- res = rlb_initialize(bond);
>- if (res) {
>- tlb_deinitialize(bond);
>- return res;
>- }
>- bond->alb_info.rlb_enabled = 1;
>- } else {
>- bond->alb_info.rlb_enabled = 0;
>+ res = rlb_initialize(bond);
>+ if (res) {
>+ tlb_deinitialize(bond);
>+ return res;
> }
>
> return 0;
>@@ -1327,12 +1318,9 @@ int bond_alb_initialize(struct bonding *bond, int rlb_enabled)
>
> void bond_alb_deinitialize(struct bonding *bond)
> {
>- struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
>-
> tlb_deinitialize(bond);
>
>- if (bond_info->rlb_enabled)
>- rlb_deinitialize(bond);
>+ rlb_deinitialize(bond);
Why is rlb_deinitialize() now unconditionally called here and in
bond_alb_initialize()? if rlb_enabled is false, why set up / tear down
the RLB hash table that won't be used?
> }
>
> static netdev_tx_t bond_do_alb_xmit(struct sk_buff *skb, struct bonding *bond,
>diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
>index 236e5219c811..8fcb5d3ac0a2 100644
>--- a/drivers/net/bonding/bond_main.c
>+++ b/drivers/net/bonding/bond_main.c
>@@ -4217,6 +4217,7 @@ static int bond_open(struct net_device *bond_dev)
> struct bonding *bond = netdev_priv(bond_dev);
> struct list_head *iter;
> struct slave *slave;
>+ int i;
>
> if (BOND_MODE(bond) == BOND_MODE_ROUNDROBIN && !bond->rr_tx_counter) {
> bond->rr_tx_counter = alloc_percpu(u32);
>@@ -4239,11 +4240,29 @@ static int bond_open(struct net_device *bond_dev)
> }
>
> if (bond_is_lb(bond)) {
>- /* bond_alb_initialize must be called before the timer
>- * is started.
>- */
>- if (bond_alb_initialize(bond, (BOND_MODE(bond) == BOND_MODE_ALB)))
>- return -ENOMEM;
>+ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
>+
>+ spin_lock_bh(&bond->mode_lock);
>+
>+ for (i = 0; i < TLB_HASH_TABLE_SIZE; i++)
>+ tlb_init_table_entry(&bond_info->tx_hashtbl[i], 0);
>+
>+ spin_unlock_bh(&bond->mode_lock);
>+
>+ if (BOND_MODE(bond) == BOND_MODE_ALB) {
>+ bond->alb_info.rlb_enabled = 1;
>+ spin_lock_bh(&bond->mode_lock);
>+
>+ bond_info->rx_hashtbl_used_head = RLB_NULL_INDEX;
>+ for (i = 0; i < RLB_HASH_TABLE_SIZE; i++)
>+ rlb_init_table_entry(bond_info->rx_hashtbl + i);
>+
>+ spin_unlock_bh(&bond->mode_lock);
>+ bond->recv_probe = rlb_arp_recv;
>+ } else {
>+ bond->alb_info.rlb_enabled = 0;
>+ }
>+
Why is all of the above done directly in bond_open() and not in
bond_alb.c somewhere? That would reduce some churn (changing some
functions away from static).
Also, I see that bond_alb_initialize() is now called from
bond_init() instead of bond_open(), and it only calls rlb_initialize().
However, this now duplicates most of the functionality of
rlb_initialize() and tlb_initialize() here. Why?
In general, the described race is TX vs. close processing, so
why is there so much change to the open processing?
-J
> if (bond->params.tlb_dynamic_lb || BOND_MODE(bond) == BOND_MODE_ALB)
> queue_delayed_work(bond->wq, &bond->alb_work, 0);
> }
>@@ -4279,8 +4298,6 @@ static int bond_close(struct net_device *bond_dev)
>
> bond_work_cancel_all(bond);
> bond->send_peer_notif = 0;
>- if (bond_is_lb(bond))
>- bond_alb_deinitialize(bond);
> bond->recv_probe = NULL;
>
> if (bond_uses_primary(bond)) {
>@@ -5854,6 +5871,8 @@ static void bond_uninit(struct net_device *bond_dev)
> struct list_head *iter;
> struct slave *slave;
>
>+ bond_alb_deinitialize(bond);
>+
> bond_netpoll_cleanup(bond_dev);
>
> /* Release the bonded slaves */
>@@ -6295,6 +6314,12 @@ static int bond_init(struct net_device *bond_dev)
> bond_dev->addr_assign_type == NET_ADDR_PERM)
> eth_hw_addr_random(bond_dev);
>
>+ /* bond_alb_initialize must be called before the timer
>+ * is started.
>+ */
>+ if (bond_alb_initialize(bond))
>+ return -ENOMEM;
>+
> return 0;
> }
>
>diff --git a/include/net/bond_alb.h b/include/net/bond_alb.h
>index 9dc082b2d543..9fd16e20ef82 100644
>--- a/include/net/bond_alb.h
>+++ b/include/net/bond_alb.h
>@@ -150,7 +150,7 @@ struct alb_bond_info {
> */
> };
>
>-int bond_alb_initialize(struct bonding *bond, int rlb_enabled);
>+int bond_alb_initialize(struct bonding *bond);
> void bond_alb_deinitialize(struct bonding *bond);
> int bond_alb_init_slave(struct bonding *bond, struct slave *slave);
> void bond_alb_deinit_slave(struct bonding *bond, struct slave *slave);
>@@ -165,5 +165,8 @@ struct slave *bond_xmit_tlb_slave_get(struct bonding *bond,
> void bond_alb_monitor(struct work_struct *);
> int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr);
> void bond_alb_clear_vlan(struct bonding *bond, unsigned short vlan_id);
>+int rlb_arp_recv(const struct sk_buff *skb, struct bonding *bond, struct slave *slave);
>+void tlb_init_table_entry(struct tlb_client_info *entry, int save_load);
>+void rlb_init_table_entry(struct rlb_client_info *entry);
> #endif /* _NET_BOND_ALB_H */
>
>--
>2.27.0
>
---
-Jay Vosburgh, jay.vosburgh@canonical.com
next prev parent reply other threads:[~2023-03-28 4:38 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-03-28 3:40 [PATCH] net: bonding: avoid use-after-free with tx_hashtbl/rx_hashtbl sujing
2023-03-28 4:38 ` Jay Vosburgh [this message]
2023-04-12 3:42 ` sujing
2023-04-12 7:06 ` sujing
2023-03-28 8:14 ` Nikolay Aleksandrov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=23772.1679978284@famine \
--to=jay.vosburgh@canonical.com \
--cc=andy@greyhouse.net \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=kuba@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=sujing@kylinos.cn \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).