From: Stanislav Fomichev <sdf.kernel@gmail.com>
To: netdev@vger.kernel.org
Cc: davem@davemloft.net, edumazet@google.com, kuba@kernel.org,
pabeni@redhat.com
Subject: [PATCH net-next v6 03/14] net: cache snapshot entries for ndo_set_rx_mode_async
Date: Tue, 7 Apr 2026 08:30:50 -0700 [thread overview]
Message-ID: <20260407153101.3694714-4-sdf@fomichev.me> (raw)
In-Reply-To: <20260407153101.3694714-1-sdf@fomichev.me>
Add a per-device netdev_hw_addr_list cache (rx_mode_addr_cache) that
allows __hw_addr_list_snapshot() and __hw_addr_list_reconcile() to
reuse previously allocated entries instead of hitting GFP_ATOMIC on
every snapshot cycle.
snapshot pops entries from the cache when available, falling back to
__hw_addr_create(). reconcile splices both snapshot lists back into
the cache via __hw_addr_splice(). The cache is flushed in
free_netdev().
Signed-off-by: Stanislav Fomichev <sdf@fomichev.me>
(cherry picked from commit ba3ab1832a511f660fdc6231245b14bf610c05bd)
---
include/linux/netdevice.h | 7 ++--
net/core/dev.c | 3 ++
net/core/dev_addr_lists.c | 66 ++++++++++++++++++++++++----------
net/core/dev_addr_lists_test.c | 60 +++++++++++++++++++++----------
4 files changed, 97 insertions(+), 39 deletions(-)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 0a41b216cbcf..0c11bfd716a9 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1919,6 +1919,7 @@ enum netdev_reg_state {
* does not implement ndo_set_rx_mode()
* @rx_mode_node: List entry for rx_mode work processing
* @rx_mode_tracker: Refcount tracker for rx_mode work
+ * @rx_mode_addr_cache: Recycled snapshot entries for rx_mode work
* @uc: unicast mac addresses
* @mc: multicast mac addresses
* @dev_addrs: list of device hw addresses
@@ -2312,6 +2313,7 @@ struct net_device {
bool uc_promisc;
struct list_head rx_mode_node;
netdevice_tracker rx_mode_tracker;
+ struct netdev_hw_addr_list rx_mode_addr_cache;
#ifdef CONFIG_LOCKDEP
unsigned char nested_level;
#endif
@@ -5016,10 +5018,11 @@ void __hw_addr_init(struct netdev_hw_addr_list *list);
void __hw_addr_flush(struct netdev_hw_addr_list *list);
int __hw_addr_list_snapshot(struct netdev_hw_addr_list *snap,
const struct netdev_hw_addr_list *list,
- int addr_len);
+ int addr_len, struct netdev_hw_addr_list *cache);
void __hw_addr_list_reconcile(struct netdev_hw_addr_list *real_list,
struct netdev_hw_addr_list *work,
- struct netdev_hw_addr_list *ref, int addr_len);
+ struct netdev_hw_addr_list *ref, int addr_len,
+ struct netdev_hw_addr_list *cache);
/* Functions used for device addresses handling */
void dev_addr_mod(struct net_device *dev, unsigned int offset,
diff --git a/net/core/dev.c b/net/core/dev.c
index fe33feacc4f3..23a832c9facc 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -12074,6 +12074,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
mutex_init(&dev->lock);
INIT_LIST_HEAD(&dev->rx_mode_node);
+ __hw_addr_init(&dev->rx_mode_addr_cache);
dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
setup(dev);
@@ -12178,6 +12179,8 @@ void free_netdev(struct net_device *dev)
kfree(rcu_dereference_protected(dev->ingress_queue, 1));
+ __hw_addr_flush(&dev->rx_mode_addr_cache);
+
/* Flush device addresses */
dev_addr_flush(dev);
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index 477392127e8a..88e995db15dd 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -511,30 +511,50 @@ void __hw_addr_init(struct netdev_hw_addr_list *list)
}
EXPORT_SYMBOL(__hw_addr_init);
+static void __hw_addr_splice(struct netdev_hw_addr_list *dst,
+ struct netdev_hw_addr_list *src)
+{
+ src->tree = RB_ROOT;
+ list_splice_init(&src->list, &dst->list);
+ dst->count += src->count;
+ src->count = 0;
+}
+
/**
* __hw_addr_list_snapshot - create a snapshot copy of an address list
* @snap: destination snapshot list (needs to be __hw_addr_init-initialized)
* @list: source address list to snapshot
* @addr_len: length of addresses
+ * @cache: entry cache to reuse entries from; falls back to GFP_ATOMIC
*
- * Creates a copy of @list with individually allocated entries suitable
- * for use with __hw_addr_sync_dev() and other list manipulation helpers.
- * Each entry is allocated with GFP_ATOMIC; must be called under a spinlock.
+ * Creates a copy of @list reusing entries from @cache when available.
+ * Must be called under a spinlock.
*
* Return: 0 on success, -errno on failure.
*/
int __hw_addr_list_snapshot(struct netdev_hw_addr_list *snap,
const struct netdev_hw_addr_list *list,
- int addr_len)
+ int addr_len, struct netdev_hw_addr_list *cache)
{
struct netdev_hw_addr *ha, *entry;
list_for_each_entry(ha, &list->list, list) {
- entry = __hw_addr_create(ha->addr, addr_len, ha->type,
- false, false);
- if (!entry) {
- __hw_addr_flush(snap);
- return -ENOMEM;
+ if (cache->count) {
+ entry = list_first_entry(&cache->list,
+ struct netdev_hw_addr, list);
+ list_del(&entry->list);
+ cache->count--;
+ memcpy(entry->addr, ha->addr, addr_len);
+ entry->type = ha->type;
+ entry->global_use = false;
+ entry->synced = 0;
+ } else {
+ entry = __hw_addr_create(ha->addr, addr_len, ha->type,
+ false, false);
+ if (!entry) {
+ __hw_addr_flush(snap);
+ return -ENOMEM;
+ }
}
entry->sync_cnt = ha->sync_cnt;
entry->refcount = ha->refcount;
@@ -554,15 +574,17 @@ EXPORT_SYMBOL_IF_KUNIT(__hw_addr_list_snapshot);
* @work: the working snapshot (modified by driver via __hw_addr_sync_dev)
* @ref: the reference snapshot (untouched copy of original state)
* @addr_len: length of addresses
+ * @cache: entry cache to return snapshot entries to for reuse
*
* Walks the reference snapshot and compares each entry against the work
* snapshot to compute sync_cnt deltas. Applies those deltas to @real_list.
- * Frees both snapshots when done.
+ * Returns snapshot entries to @cache for reuse; frees both snapshots.
* Caller must hold netif_addr_lock_bh.
*/
void __hw_addr_list_reconcile(struct netdev_hw_addr_list *real_list,
struct netdev_hw_addr_list *work,
- struct netdev_hw_addr_list *ref, int addr_len)
+ struct netdev_hw_addr_list *ref, int addr_len,
+ struct netdev_hw_addr_list *cache)
{
struct netdev_hw_addr *ref_ha, *tmp, *work_ha, *real_ha;
int delta;
@@ -611,8 +633,8 @@ void __hw_addr_list_reconcile(struct netdev_hw_addr_list *real_list,
}
}
- __hw_addr_flush(work);
- __hw_addr_flush(ref);
+ __hw_addr_splice(cache, work);
+ __hw_addr_splice(cache, ref);
}
EXPORT_SYMBOL_IF_KUNIT(__hw_addr_list_reconcile);
@@ -1173,14 +1195,18 @@ static int netif_addr_lists_snapshot(struct net_device *dev,
{
int err;
- err = __hw_addr_list_snapshot(uc_snap, &dev->uc, dev->addr_len);
+ err = __hw_addr_list_snapshot(uc_snap, &dev->uc, dev->addr_len,
+ &dev->rx_mode_addr_cache);
if (!err)
- err = __hw_addr_list_snapshot(uc_ref, &dev->uc, dev->addr_len);
+ err = __hw_addr_list_snapshot(uc_ref, &dev->uc, dev->addr_len,
+ &dev->rx_mode_addr_cache);
if (!err)
err = __hw_addr_list_snapshot(mc_snap, &dev->mc,
- dev->addr_len);
+ dev->addr_len,
+ &dev->rx_mode_addr_cache);
if (!err)
- err = __hw_addr_list_snapshot(mc_ref, &dev->mc, dev->addr_len);
+ err = __hw_addr_list_snapshot(mc_ref, &dev->mc, dev->addr_len,
+ &dev->rx_mode_addr_cache);
if (err) {
__hw_addr_flush(uc_snap);
@@ -1197,8 +1223,10 @@ static void netif_addr_lists_reconcile(struct net_device *dev,
struct netdev_hw_addr_list *uc_ref,
struct netdev_hw_addr_list *mc_ref)
{
- __hw_addr_list_reconcile(&dev->uc, uc_snap, uc_ref, dev->addr_len);
- __hw_addr_list_reconcile(&dev->mc, mc_snap, mc_ref, dev->addr_len);
+ __hw_addr_list_reconcile(&dev->uc, uc_snap, uc_ref, dev->addr_len,
+ &dev->rx_mode_addr_cache);
+ __hw_addr_list_reconcile(&dev->mc, mc_snap, mc_ref, dev->addr_len,
+ &dev->rx_mode_addr_cache);
}
static void netif_rx_mode_run(struct net_device *dev)
diff --git a/net/core/dev_addr_lists_test.c b/net/core/dev_addr_lists_test.c
index fba926d5ec0d..260e71a2399f 100644
--- a/net/core/dev_addr_lists_test.c
+++ b/net/core/dev_addr_lists_test.c
@@ -251,8 +251,8 @@ static void dev_addr_test_add_excl(struct kunit *test)
*/
static void dev_addr_test_snapshot_sync(struct kunit *test)
{
+ struct netdev_hw_addr_list snap, ref, cache;
struct net_device *netdev = test->priv;
- struct netdev_hw_addr_list snap, ref;
struct dev_addr_test_priv *datp;
struct netdev_hw_addr *ha;
u8 addr[ETH_ALEN];
@@ -268,10 +268,13 @@ static void dev_addr_test_snapshot_sync(struct kunit *test)
netif_addr_lock_bh(netdev);
__hw_addr_init(&snap);
__hw_addr_init(&ref);
+ __hw_addr_init(&cache);
KUNIT_EXPECT_EQ(test, 0,
- __hw_addr_list_snapshot(&snap, &netdev->uc, ETH_ALEN));
+ __hw_addr_list_snapshot(&snap, &netdev->uc, ETH_ALEN,
+ &cache));
KUNIT_EXPECT_EQ(test, 0,
- __hw_addr_list_snapshot(&ref, &netdev->uc, ETH_ALEN));
+ __hw_addr_list_snapshot(&ref, &netdev->uc, ETH_ALEN,
+ &cache));
netif_addr_unlock_bh(netdev);
/* Driver syncs ADDR_A to hardware */
@@ -283,7 +286,8 @@ static void dev_addr_test_snapshot_sync(struct kunit *test)
/* Reconcile: delta=+1 applied to real entry */
netif_addr_lock_bh(netdev);
- __hw_addr_list_reconcile(&netdev->uc, &snap, &ref, ETH_ALEN);
+ __hw_addr_list_reconcile(&netdev->uc, &snap, &ref, ETH_ALEN,
+ &cache);
netif_addr_unlock_bh(netdev);
/* Real entry should now reflect the sync: sync_cnt=1, refcount=2 */
@@ -301,6 +305,7 @@ static void dev_addr_test_snapshot_sync(struct kunit *test)
KUNIT_EXPECT_EQ(test, 0, datp->addr_unsynced);
KUNIT_EXPECT_EQ(test, 1, netdev->uc.count);
+ __hw_addr_flush(&cache);
rtnl_unlock();
}
@@ -310,8 +315,8 @@ static void dev_addr_test_snapshot_sync(struct kunit *test)
*/
static void dev_addr_test_snapshot_remove_during_sync(struct kunit *test)
{
+ struct netdev_hw_addr_list snap, ref, cache;
struct net_device *netdev = test->priv;
- struct netdev_hw_addr_list snap, ref;
struct dev_addr_test_priv *datp;
struct netdev_hw_addr *ha;
u8 addr[ETH_ALEN];
@@ -327,10 +332,13 @@ static void dev_addr_test_snapshot_remove_during_sync(struct kunit *test)
netif_addr_lock_bh(netdev);
__hw_addr_init(&snap);
__hw_addr_init(&ref);
+ __hw_addr_init(&cache);
KUNIT_EXPECT_EQ(test, 0,
- __hw_addr_list_snapshot(&snap, &netdev->uc, ETH_ALEN));
+ __hw_addr_list_snapshot(&snap, &netdev->uc, ETH_ALEN,
+ &cache));
KUNIT_EXPECT_EQ(test, 0,
- __hw_addr_list_snapshot(&ref, &netdev->uc, ETH_ALEN));
+ __hw_addr_list_snapshot(&ref, &netdev->uc, ETH_ALEN,
+ &cache));
netif_addr_unlock_bh(netdev);
/* Driver syncs ADDR_A to hardware */
@@ -349,7 +357,8 @@ static void dev_addr_test_snapshot_remove_during_sync(struct kunit *test)
* so it gets re-inserted as stale (sync_cnt=1, refcount=1).
*/
netif_addr_lock_bh(netdev);
- __hw_addr_list_reconcile(&netdev->uc, &snap, &ref, ETH_ALEN);
+ __hw_addr_list_reconcile(&netdev->uc, &snap, &ref, ETH_ALEN,
+ &cache);
netif_addr_unlock_bh(netdev);
KUNIT_EXPECT_EQ(test, 1, netdev->uc.count);
@@ -366,6 +375,7 @@ static void dev_addr_test_snapshot_remove_during_sync(struct kunit *test)
KUNIT_EXPECT_EQ(test, 1 << ADDR_A, datp->addr_unsynced);
KUNIT_EXPECT_EQ(test, 0, netdev->uc.count);
+ __hw_addr_flush(&cache);
rtnl_unlock();
}
@@ -376,8 +386,8 @@ static void dev_addr_test_snapshot_remove_during_sync(struct kunit *test)
*/
static void dev_addr_test_snapshot_readd_during_unsync(struct kunit *test)
{
+ struct netdev_hw_addr_list snap, ref, cache;
struct net_device *netdev = test->priv;
- struct netdev_hw_addr_list snap, ref;
struct dev_addr_test_priv *datp;
struct netdev_hw_addr *ha;
u8 addr[ETH_ALEN];
@@ -403,10 +413,13 @@ static void dev_addr_test_snapshot_readd_during_unsync(struct kunit *test)
netif_addr_lock_bh(netdev);
__hw_addr_init(&snap);
__hw_addr_init(&ref);
+ __hw_addr_init(&cache);
KUNIT_EXPECT_EQ(test, 0,
- __hw_addr_list_snapshot(&snap, &netdev->uc, ETH_ALEN));
+ __hw_addr_list_snapshot(&snap, &netdev->uc, ETH_ALEN,
+ &cache));
KUNIT_EXPECT_EQ(test, 0,
- __hw_addr_list_snapshot(&ref, &netdev->uc, ETH_ALEN));
+ __hw_addr_list_snapshot(&ref, &netdev->uc, ETH_ALEN,
+ &cache));
netif_addr_unlock_bh(netdev);
/* Driver unsyncs stale ADDR_A from hardware */
@@ -426,7 +439,8 @@ static void dev_addr_test_snapshot_readd_during_unsync(struct kunit *test)
* applied. Result: sync_cnt=0, refcount=1 (fresh).
*/
netif_addr_lock_bh(netdev);
- __hw_addr_list_reconcile(&netdev->uc, &snap, &ref, ETH_ALEN);
+ __hw_addr_list_reconcile(&netdev->uc, &snap, &ref, ETH_ALEN,
+ &cache);
netif_addr_unlock_bh(netdev);
/* Entry survives as fresh: needs re-sync to HW */
@@ -443,6 +457,7 @@ static void dev_addr_test_snapshot_readd_during_unsync(struct kunit *test)
KUNIT_EXPECT_EQ(test, 1 << ADDR_A, datp->addr_synced);
KUNIT_EXPECT_EQ(test, 0, datp->addr_unsynced);
+ __hw_addr_flush(&cache);
rtnl_unlock();
}
@@ -452,8 +467,8 @@ static void dev_addr_test_snapshot_readd_during_unsync(struct kunit *test)
*/
static void dev_addr_test_snapshot_add_and_remove(struct kunit *test)
{
+ struct netdev_hw_addr_list snap, ref, cache;
struct net_device *netdev = test->priv;
- struct netdev_hw_addr_list snap, ref;
struct dev_addr_test_priv *datp;
struct netdev_hw_addr *ha;
u8 addr[ETH_ALEN];
@@ -480,10 +495,13 @@ static void dev_addr_test_snapshot_add_and_remove(struct kunit *test)
netif_addr_lock_bh(netdev);
__hw_addr_init(&snap);
__hw_addr_init(&ref);
+ __hw_addr_init(&cache);
KUNIT_EXPECT_EQ(test, 0,
- __hw_addr_list_snapshot(&snap, &netdev->uc, ETH_ALEN));
+ __hw_addr_list_snapshot(&snap, &netdev->uc, ETH_ALEN,
+ &cache));
KUNIT_EXPECT_EQ(test, 0,
- __hw_addr_list_snapshot(&ref, &netdev->uc, ETH_ALEN));
+ __hw_addr_list_snapshot(&ref, &netdev->uc, ETH_ALEN,
+ &cache));
netif_addr_unlock_bh(netdev);
/* Driver syncs snapshot: ADDR_C is new -> synced; A,B already synced */
@@ -502,7 +520,8 @@ static void dev_addr_test_snapshot_add_and_remove(struct kunit *test)
* so nothing to apply to ADDR_B.
*/
netif_addr_lock_bh(netdev);
- __hw_addr_list_reconcile(&netdev->uc, &snap, &ref, ETH_ALEN);
+ __hw_addr_list_reconcile(&netdev->uc, &snap, &ref, ETH_ALEN,
+ &cache);
netif_addr_unlock_bh(netdev);
/* ADDR_A: unchanged (sync_cnt=1, refcount=2)
@@ -536,13 +555,14 @@ static void dev_addr_test_snapshot_add_and_remove(struct kunit *test)
KUNIT_EXPECT_EQ(test, 1 << ADDR_B, datp->addr_unsynced);
KUNIT_EXPECT_EQ(test, 2, netdev->uc.count);
+ __hw_addr_flush(&cache);
rtnl_unlock();
}
static void dev_addr_test_snapshot_benchmark(struct kunit *test)
{
struct net_device *netdev = test->priv;
- struct netdev_hw_addr_list snap;
+ struct netdev_hw_addr_list snap, cache;
u8 addr[ETH_ALEN];
s64 duration = 0;
ktime_t start;
@@ -557,6 +577,8 @@ static void dev_addr_test_snapshot_benchmark(struct kunit *test)
KUNIT_EXPECT_EQ(test, 0, dev_uc_add(netdev, addr));
}
+ __hw_addr_init(&cache);
+
for (iter = 0; iter < 1000; iter++) {
netif_addr_lock_bh(netdev);
__hw_addr_init(&snap);
@@ -564,13 +586,15 @@ static void dev_addr_test_snapshot_benchmark(struct kunit *test)
start = ktime_get();
KUNIT_EXPECT_EQ(test, 0,
__hw_addr_list_snapshot(&snap, &netdev->uc,
- ETH_ALEN));
+ ETH_ALEN, &cache));
duration += ktime_to_ns(ktime_sub(ktime_get(), start));
netif_addr_unlock_bh(netdev);
__hw_addr_flush(&snap);
}
+ __hw_addr_flush(&cache);
+
kunit_info(test,
"1024 addrs x 1000 snapshots: %lld ns total, %lld ns/iter",
duration, div_s64(duration, 1000));
--
2.52.0
next prev parent reply other threads:[~2026-04-07 15:31 UTC|newest]
Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-04-07 15:30 [PATCH net-next v6 00/14] net: sleepable ndo_set_rx_mode Stanislav Fomichev
2026-04-07 15:30 ` [PATCH net-next v6 01/14] net: add address list snapshot and reconciliation infrastructure Stanislav Fomichev
2026-04-07 15:30 ` [PATCH net-next v6 02/14] net: introduce ndo_set_rx_mode_async and netdev_rx_mode_work Stanislav Fomichev
2026-04-07 15:30 ` Stanislav Fomichev [this message]
2026-04-07 15:30 ` [PATCH net-next v6 04/14] net: move promiscuity handling into netdev_rx_mode_work Stanislav Fomichev
2026-04-07 15:30 ` [PATCH net-next v6 05/14] fbnic: convert to ndo_set_rx_mode_async Stanislav Fomichev
2026-04-07 15:30 ` [PATCH net-next v6 06/14] mlx5: " Stanislav Fomichev
2026-04-07 15:30 ` [PATCH net-next v6 07/14] bnxt: " Stanislav Fomichev
2026-04-07 15:30 ` [PATCH net-next v6 08/14] bnxt: use snapshot in bnxt_cfg_rx_mode Stanislav Fomichev
2026-04-08 5:48 ` Michael Chan
2026-04-07 15:30 ` [PATCH net-next v6 09/14] iavf: convert to ndo_set_rx_mode_async Stanislav Fomichev
2026-04-07 15:30 ` [PATCH net-next v6 10/14] netdevsim: " Stanislav Fomichev
2026-04-07 15:30 ` [PATCH net-next v6 11/14] dummy: " Stanislav Fomichev
2026-04-07 15:30 ` [PATCH net-next v6 12/14] net: warn ops-locked drivers still using ndo_set_rx_mode Stanislav Fomichev
2026-04-07 15:31 ` [PATCH net-next v6 13/14] selftests: net: add team_bridge_macvlan rx_mode test Stanislav Fomichev
2026-04-07 15:31 ` [PATCH net-next v6 14/14] selftests: net: use ip commands instead of teamd in team " Stanislav Fomichev
2026-04-10 3:44 ` [PATCH net-next v6 00/14] net: sleepable ndo_set_rx_mode Jakub Kicinski
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260407153101.3694714-4-sdf@fomichev.me \
--to=sdf.kernel@gmail.com \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=kuba@kernel.org \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox