Netdev List
 help / color / mirror / Atom feed
* [PATCH v3 net-next 14/15] net/sched: mq: no longer acquire qdisc spinlocks in dump operations
From: Eric Dumazet @ 2026-04-10 18:22 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: Simon Horman, Jamal Hadi Salim, Jiri Pirko, netdev, eric.dumazet,
	Eric Dumazet
In-Reply-To: <20260410182257.774311-1-edumazet@google.com>

Prepare mq_dump_common(), mqprio_dump() and mqprio_dump_class_stats()
for RTNL avoidance.

Use private variables instead of assuming sch->bstats and sch->qstats
can be used when folding stats from children.

This means the children qdisc spinlocks no longer need to be acquired.

Add qdisc_qlen_lockless() helper, and change gnet_stats_add_basic()
prototype.

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 include/net/gen_stats.h   |  9 +++--
 include/net/sch_generic.h | 14 ++++++++
 net/core/gen_estimator.c  | 24 ++++++-------
 net/core/gen_stats.c      | 17 +++++-----
 net/sched/sch_mq.c        | 33 +++++++++++-------
 net/sched/sch_mqprio.c    | 71 +++++++++++++++++++--------------------
 6 files changed, 95 insertions(+), 73 deletions(-)

diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h
index 7aa2b8e1fb298c4f994a745b114fc4da785ddf4b..5484b67298e3fe94fe84f0e929799362d21499df 100644
--- a/include/net/gen_stats.h
+++ b/include/net/gen_stats.h
@@ -21,6 +21,11 @@ struct gnet_stats_basic_sync {
 	struct u64_stats_sync syncp;
 } __aligned(2 * sizeof(u64));
 
+struct gnet_stats {
+	u64	bytes;
+	u64	packets;
+};
+
 struct net_rate_estimator;
 
 struct gnet_dump {
@@ -49,9 +54,9 @@ int gnet_stats_start_copy_compat(struct sk_buff *skb, int type,
 int gnet_stats_copy_basic(struct gnet_dump *d,
 			  struct gnet_stats_basic_sync __percpu *cpu,
 			  struct gnet_stats_basic_sync *b, bool running);
-void gnet_stats_add_basic(struct gnet_stats_basic_sync *bstats,
+void gnet_stats_add_basic(struct gnet_stats *bstats,
 			  struct gnet_stats_basic_sync __percpu *cpu,
-			  struct gnet_stats_basic_sync *b, bool running);
+			  const struct gnet_stats_basic_sync *b, bool running);
 int gnet_stats_copy_basic_hw(struct gnet_dump *d,
 			     struct gnet_stats_basic_sync __percpu *cpu,
 			     struct gnet_stats_basic_sync *b, bool running);
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index b0564a39caf4471619b74179a06a0e41e3765d94..92683be33527bb0a5147d095ba08f5f8494933dd 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -542,6 +542,11 @@ static inline int qdisc_qlen(const struct Qdisc *q)
 	return q->q.qlen;
 }
 
+static inline int qdisc_qlen_lockless(const struct Qdisc *q)
+{
+	return READ_ONCE(q->q.qlen);
+}
+
 static inline void qdisc_qlen_inc(struct Qdisc *q)
 {
 	WRITE_ONCE(q->q.qlen, q->q.qlen + 1);
@@ -947,6 +952,15 @@ static inline void _bstats_update(struct gnet_stats_basic_sync *bstats,
 	u64_stats_update_end(&bstats->syncp);
 }
 
+static inline void _bstats_set(struct gnet_stats_basic_sync *bstats,
+			       u64 bytes, u64 packets)
+{
+	u64_stats_update_begin(&bstats->syncp);
+	u64_stats_set(&bstats->bytes, bytes);
+	u64_stats_set(&bstats->packets, packets);
+	u64_stats_update_end(&bstats->syncp);
+}
+
 static inline void bstats_update(struct gnet_stats_basic_sync *bstats,
 				 const struct sk_buff *skb)
 {
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index c34e58c6c3e666743e72978f9a78cf7f95a360c3..40990aee45590f2c56c070b0d28f856fc82d1f55 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -60,9 +60,10 @@ struct net_rate_estimator {
 };
 
 static void est_fetch_counters(struct net_rate_estimator *e,
-			       struct gnet_stats_basic_sync *b)
+			       struct gnet_stats *b)
 {
-	gnet_stats_basic_sync_init(b);
+	b->packets = 0;
+	b->bytes = 0;
 	if (e->stats_lock)
 		spin_lock(e->stats_lock);
 
@@ -76,18 +77,15 @@ static void est_fetch_counters(struct net_rate_estimator *e,
 static void est_timer(struct timer_list *t)
 {
 	struct net_rate_estimator *est = timer_container_of(est, t, timer);
-	struct gnet_stats_basic_sync b;
-	u64 b_bytes, b_packets;
+	struct gnet_stats b;
 	u64 rate, brate;
 
 	est_fetch_counters(est, &b);
-	b_bytes = u64_stats_read(&b.bytes);
-	b_packets = u64_stats_read(&b.packets);
 
-	brate = (b_bytes - est->last_bytes) << (10 - est->intvl_log);
+	brate = (b.bytes - est->last_bytes) << (10 - est->intvl_log);
 	brate = (brate >> est->ewma_log) - (est->avbps >> est->ewma_log);
 
-	rate = (b_packets - est->last_packets) << (10 - est->intvl_log);
+	rate = (b.packets - est->last_packets) << (10 - est->intvl_log);
 	rate = (rate >> est->ewma_log) - (est->avpps >> est->ewma_log);
 
 	preempt_disable_nested();
@@ -97,8 +95,8 @@ static void est_timer(struct timer_list *t)
 	write_seqcount_end(&est->seq);
 	preempt_enable_nested();
 
-	est->last_bytes = b_bytes;
-	est->last_packets = b_packets;
+	est->last_bytes = b.bytes;
+	est->last_packets = b.packets;
 
 	est->next_jiffies += ((HZ/4) << est->intvl_log);
 
@@ -138,7 +136,7 @@ int gen_new_estimator(struct gnet_stats_basic_sync *bstats,
 {
 	struct gnet_estimator *parm = nla_data(opt);
 	struct net_rate_estimator *old, *est;
-	struct gnet_stats_basic_sync b;
+	struct gnet_stats b;
 	int intvl_log;
 
 	if (nla_len(opt) < sizeof(*parm))
@@ -172,8 +170,8 @@ int gen_new_estimator(struct gnet_stats_basic_sync *bstats,
 	est_fetch_counters(est, &b);
 	if (lock)
 		local_bh_enable();
-	est->last_bytes = u64_stats_read(&b.bytes);
-	est->last_packets = u64_stats_read(&b.packets);
+	est->last_bytes = b.bytes;
+	est->last_packets = b.packets;
 
 	if (lock)
 		spin_lock_bh(lock);
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index 1a2380e74272de8eaf3d4ef453e56105a31e9edf..14ee7a4e3709ad5c64a158d3c8d1177ada3a32b0 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -123,10 +123,9 @@ void gnet_stats_basic_sync_init(struct gnet_stats_basic_sync *b)
 }
 EXPORT_SYMBOL(gnet_stats_basic_sync_init);
 
-static void gnet_stats_add_basic_cpu(struct gnet_stats_basic_sync *bstats,
+static void gnet_stats_add_basic_cpu(struct gnet_stats *bstats,
 				     struct gnet_stats_basic_sync __percpu *cpu)
 {
-	u64 t_bytes = 0, t_packets = 0;
 	int i;
 
 	for_each_possible_cpu(i) {
@@ -140,19 +139,18 @@ static void gnet_stats_add_basic_cpu(struct gnet_stats_basic_sync *bstats,
 			packets = u64_stats_read(&bcpu->packets);
 		} while (u64_stats_fetch_retry(&bcpu->syncp, start));
 
-		t_bytes += bytes;
-		t_packets += packets;
+		bstats->bytes += bytes;
+		bstats->packets += packets;
 	}
-	_bstats_update(bstats, t_bytes, t_packets);
 }
 
-void gnet_stats_add_basic(struct gnet_stats_basic_sync *bstats,
+void gnet_stats_add_basic(struct gnet_stats *bstats,
 			  struct gnet_stats_basic_sync __percpu *cpu,
-			  struct gnet_stats_basic_sync *b, bool running)
+			  const struct gnet_stats_basic_sync *b, bool running)
 {
 	unsigned int start;
-	u64 bytes = 0;
 	u64 packets = 0;
+	u64 bytes = 0;
 
 	WARN_ON_ONCE((cpu || running) && in_hardirq());
 
@@ -167,7 +165,8 @@ void gnet_stats_add_basic(struct gnet_stats_basic_sync *bstats,
 		packets = u64_stats_read(&b->packets);
 	} while (running && u64_stats_fetch_retry(&b->syncp, start));
 
-	_bstats_update(bstats, bytes, packets);
+	bstats->bytes += bytes;
+	bstats->packets += packets;
 }
 EXPORT_SYMBOL(gnet_stats_add_basic);
 
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index ec8c91d3fde04e59daec2aecdb14d6bf50715e15..0d83e69f2f679988d56920c16acb659d2d1ba636 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -143,30 +143,39 @@ EXPORT_SYMBOL_NS_GPL(mq_attach, "NET_SCHED_INTERNAL");
 void mq_dump_common(struct Qdisc *sch, struct sk_buff *skb)
 {
 	struct net_device *dev = qdisc_dev(sch);
+	struct gnet_stats_queue qstats = { 0 };
+	struct gnet_stats bstats = { 0 };
+	const struct Qdisc *qdisc;
 	unsigned int qlen = 0;
-	struct Qdisc *qdisc;
 	unsigned int ntx;
 
-	gnet_stats_basic_sync_init(&sch->bstats);
-	memset(&sch->qstats, 0, sizeof(sch->qstats));
-
 	/* MQ supports lockless qdiscs. However, statistics accounting needs
 	 * to account for all, none, or a mix of locked and unlocked child
 	 * qdiscs. Percpu stats are added to counters in-band and locking
 	 * qdisc totals are added at end.
 	 */
+	rcu_read_lock();
 	for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
-		qdisc = rtnl_dereference(netdev_get_tx_queue(dev, ntx)->qdisc_sleeping);
-		spin_lock_bh(qdisc_lock(qdisc));
+		qdisc = rcu_dereference(netdev_get_tx_queue(dev, ntx)->qdisc_sleeping);
 
-		gnet_stats_add_basic(&sch->bstats, qdisc->cpu_bstats,
-				     &qdisc->bstats, false);
-		gnet_stats_add_queue(&sch->qstats, qdisc->cpu_qstats,
+		gnet_stats_add_basic(&bstats, qdisc->cpu_bstats,
+				     &qdisc->bstats, true);
+		gnet_stats_add_queue(&qstats, qdisc->cpu_qstats,
 				     &qdisc->qstats);
-		qlen += qdisc_qlen(qdisc);
-
-		spin_unlock_bh(qdisc_lock(qdisc));
+		qlen += qdisc_qlen_lockless(qdisc);
 	}
+	rcu_read_unlock();
+
+	spin_lock_bh(qdisc_lock(sch));
+	_bstats_set(&sch->bstats, bstats.bytes, bstats.packets);
+	spin_unlock_bh(qdisc_lock(sch));
+
+	WRITE_ONCE(sch->qstats.qlen, qstats.qlen);
+	WRITE_ONCE(sch->qstats.backlog, qstats.backlog);
+	WRITE_ONCE(sch->qstats.drops, qstats.drops);
+	WRITE_ONCE(sch->qstats.requeues, qstats.requeues);
+	WRITE_ONCE(sch->qstats.overlimits, qstats.overlimits);
+
 	WRITE_ONCE(sch->q.qlen, qlen);
 }
 EXPORT_SYMBOL_NS_GPL(mq_dump_common, "NET_SCHED_INTERNAL");
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index 91a92992cd24ab6c30bf7db2288c08cd493c7bc3..0f58b3a3e99a100df929de110fe0bda1a44cc7d6 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -554,32 +554,40 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
 	struct net_device *dev = qdisc_dev(sch);
 	struct mqprio_sched *priv = qdisc_priv(sch);
 	struct nlattr *nla = (struct nlattr *)skb_tail_pointer(skb);
+	struct gnet_stats_queue qstats = { 0 };
 	struct tc_mqprio_qopt opt = { 0 };
+	struct gnet_stats bstats = { 0 };
+	const struct Qdisc *qdisc;
 	unsigned int qlen = 0;
-	struct Qdisc *qdisc;
 	unsigned int ntx;
 
-	qlen = 0;
-	gnet_stats_basic_sync_init(&sch->bstats);
-	memset(&sch->qstats, 0, sizeof(sch->qstats));
-
 	/* MQ supports lockless qdiscs. However, statistics accounting needs
 	 * to account for all, none, or a mix of locked and unlocked child
 	 * qdiscs. Percpu stats are added to counters in-band and locking
 	 * qdisc totals are added at end.
 	 */
+	rcu_read_lock();
 	for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
-		qdisc = rtnl_dereference(netdev_get_tx_queue(dev, ntx)->qdisc_sleeping);
-		spin_lock_bh(qdisc_lock(qdisc));
+		qdisc = rcu_dereference(netdev_get_tx_queue(dev, ntx)->qdisc_sleeping);
 
-		gnet_stats_add_basic(&sch->bstats, qdisc->cpu_bstats,
-				     &qdisc->bstats, false);
-		gnet_stats_add_queue(&sch->qstats, qdisc->cpu_qstats,
+		gnet_stats_add_basic(&bstats, qdisc->cpu_bstats,
+				     &qdisc->bstats, true);
+		gnet_stats_add_queue(&qstats, qdisc->cpu_qstats,
 				     &qdisc->qstats);
-		qlen += qdisc_qlen(qdisc);
-
-		spin_unlock_bh(qdisc_lock(qdisc));
+		qlen += qdisc_qlen_lockless(qdisc);
 	}
+	rcu_read_unlock();
+
+	spin_lock_bh(qdisc_lock(sch));
+	_bstats_set(&sch->bstats, bstats.bytes, bstats.packets);
+	spin_unlock_bh(qdisc_lock(sch));
+
+	WRITE_ONCE(sch->qstats.qlen, qstats.qlen);
+	WRITE_ONCE(sch->qstats.backlog, qstats.backlog);
+	WRITE_ONCE(sch->qstats.drops, qstats.drops);
+	WRITE_ONCE(sch->qstats.requeues, qstats.requeues);
+	WRITE_ONCE(sch->qstats.overlimits, qstats.overlimits);
+
 	WRITE_ONCE(sch->q.qlen, qlen);
 
 	mqprio_qopt_reconstruct(dev, &opt);
@@ -661,45 +669,34 @@ static int mqprio_dump_class(struct Qdisc *sch, unsigned long cl,
 
 static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
 				   struct gnet_dump *d)
-	__releases(d->lock)
-	__acquires(d->lock)
 {
 	if (cl >= TC_H_MIN_PRIORITY) {
 		struct net_device *dev = qdisc_dev(sch);
 		struct netdev_tc_txq tc = dev->tc_to_txq[cl & TC_BITMASK];
-		struct gnet_stats_queue qstats = {0};
+		struct gnet_stats_queue qstats = { 0 };
 		struct gnet_stats_basic_sync bstats;
+		struct gnet_stats _bstats = { 0 };
 		u32 qlen = 0;
 		int i;
 
-		gnet_stats_basic_sync_init(&bstats);
-		/* Drop lock here it will be reclaimed before touching
-		 * statistics this is required because the d->lock we
-		 * hold here is the look on dev_queue->qdisc_sleeping
-		 * also acquired below.
-		 */
-		if (d->lock)
-			spin_unlock_bh(d->lock);
-
+		rcu_read_lock();
 		for (i = tc.offset; i < tc.offset + tc.count; i++) {
-			struct netdev_queue *q = netdev_get_tx_queue(dev, i);
-			struct Qdisc *qdisc = rtnl_dereference(q->qdisc);
-
-			spin_lock_bh(qdisc_lock(qdisc));
+			const struct netdev_queue *q = netdev_get_tx_queue(dev, i);
+			const struct Qdisc *qdisc = rcu_dereference(q->qdisc);
 
-			gnet_stats_add_basic(&bstats, qdisc->cpu_bstats,
-					     &qdisc->bstats, false);
+			gnet_stats_add_basic(&_bstats, qdisc->cpu_bstats,
+					     &qdisc->bstats, true);
 			gnet_stats_add_queue(&qstats, qdisc->cpu_qstats,
 					     &qdisc->qstats);
-			qlen += qdisc_qlen(qdisc);
-
-			spin_unlock_bh(qdisc_lock(qdisc));
+			qlen += qdisc_qlen_lockless(qdisc);
 		}
+		rcu_read_unlock();
+		u64_stats_init(&bstats.syncp);
+		u64_stats_set(&bstats.bytes, _bstats.bytes);
+		u64_stats_set(&bstats.packets, _bstats.packets);
+
 		qlen = qlen + qstats.qlen;
 
-		/* Reclaim root sleeping lock before completing stats */
-		if (d->lock)
-			spin_lock_bh(d->lock);
 		if (gnet_stats_copy_basic(d, NULL, &bstats, false) < 0 ||
 		    gnet_stats_copy_queue(d, NULL, &qstats, qlen) < 0)
 			return -1;
-- 
2.53.0.1213.gd9a14994de-goog


^ permalink raw reply related

* Re: [PATCH net] netrom: do some basic forms of validation on incoming frames
From: Dan Cross @ 2026-04-10 18:23 UTC (permalink / raw)
  To: jj
  Cc: Simon Horman, Greg Kroah-Hartman, Jakub Kicinski, netdev,
	linux-kernel, David S. Miller, Eric Dumazet, Paolo Abeni,
	linux-hams, Yizhe Zhuang, stable
In-Reply-To: <18e3df62-34f9-4de0-903b-19919d7ae2ca@eastlink.ca>

On Fri, Apr 10, 2026 at 11:49 AM jj <ve1jot@eastlink.ca> wrote:
> This is NOT an obsolete protocol..this is in use by amateur radio
> operators world-wide...we use it for RF comms usually, because what
> happens if the internet goes "down", we can still provide comms over
> slower RF links....(plus it's a fun mode)please PLEASE do not drop...and
> sorry for the noise...

There are at least three separable issues being conflated here.

One is whether amateur radio operators are using AX.25, NET/ROM, and
ROSE.  They are; that's indisputable.

Another is whether those operators are using the implementation in the
Linux kernel.  Some are (myself included), though many fewer than are
using the protocols generally.

The third is whether preserving the implementation of these in the
kernel is the best mechanism for using those protocols on Linux-based
systems.  For that, I would argue that no, it is not.

Taking just AX.25, the current implementation has known deficiencies:
it is buggy, implements an older version of the protocol, and at best
receives nominal maintenance: notably, the newer networking tools
(`ip`, `ss`, etc) meant as replacements for `netstat`, `route`, and
`ifconfig` have not been updated to incorporate information about the
amateur radio protocols, and recent changes have left them broken for
long stretches of time.  More details are available online, such as at
https://blog.habets.se/2021/11/AX25-user-space.html

There is very little to recommend the kernel implementations, and any
unique functionality they once provided, such as IP over AX.25, can be
done via other means in userspace; e.g., one can use TAP/TUN for IP
over AX.25.

Therefore, it would be better to remove these from the kernel, and
implement them in userspace instead, or use an existing userspace
implementation (e.g., LinBPQ or similar).  Backwards compatibility
with existing Linux applications that expect to use the sockets API
with amateur radio could `LD_PRELOAD` a shim compatibility library
that simulates the current programming interface.  There is simply no
reason to preserve these in the kernel, and bluntly, the
implementation is pure drag at this point.

Note that this doesn't preclude anyone from using AX.25 et al on
Linux, or force dependency on the Internet: it just moves the
implementation of those protocols out of the kernel and into a normal
userspace program, which is arguably easier to maintain and iterate on
for the ham community, anyway.

        - Dan C.
          (KZ2X)

> On 2026-04-10 07:28, Simon Horman wrote:
> > On Fri, Apr 10, 2026 at 07:24:36AM +0200, Greg Kroah-Hartman wrote:
> >> On Thu, Apr 09, 2026 at 08:32:35PM -0700, Jakub Kicinski wrote:
> >>> On Thu, 9 Apr 2026 20:03:28 +0100 Simon Horman wrote:
> >>>> I expect that checking skb->len isn't sufficient here
> >>>> and pskb_may_pull needs to be used to ensure that
> >>>> the data is also available in the linear section of the skb.
> >>> Or for simplicity we could also be testing against skb_headlen()
> >>> since we don't expect any legit non-linear frames here? Dunno.
> > Sure, that's find by me if it leads to simpler code than
> > using pskb_may_pull(). Else I'd lean towards pskb_may_pull()
> > as it is a more general approach that feels worth proliferating.
> >
> >> I'll be glad to change this either way, your call.  Given that this is
> >> an obsolete protocol that seems to only be a target for drive-by fuzzers
> >> to attack, whatever the simplest thing to do to quiet them up I'll be
> >> glad to implement.
> >>
> >> Or can we just delete this stuff entirely?  :)
> > Deleting sounds good to me.
> > But we likely need a deprecation process.
> > In which case fixing these bugs still makes sense for the short term.
> >
>

^ permalink raw reply

* [PATCH v3 net-next 03/15] net/sched: add READ_ONCE() in gnet_stats_add_queue[_cpu]
From: Eric Dumazet @ 2026-04-10 18:22 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: Simon Horman, Jamal Hadi Salim, Jiri Pirko, netdev, eric.dumazet,
	Eric Dumazet
In-Reply-To: <20260410182257.774311-1-edumazet@google.com>

Stats are read locklessly, add READ_ONCE() to prevent load-stearing.

Write side will be handled in separate patches.

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/core/gen_stats.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index b71ccaec0991461333dbe465ee619bca4a06e75b..1a2380e74272de8eaf3d4ef453e56105a31e9edf 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -345,11 +345,11 @@ static void gnet_stats_add_queue_cpu(struct gnet_stats_queue *qstats,
 	for_each_possible_cpu(i) {
 		const struct gnet_stats_queue *qcpu = per_cpu_ptr(q, i);
 
-		qstats->qlen += qcpu->qlen;
-		qstats->backlog += qcpu->backlog;
-		qstats->drops += qcpu->drops;
-		qstats->requeues += qcpu->requeues;
-		qstats->overlimits += qcpu->overlimits;
+		qstats->qlen += READ_ONCE(qcpu->qlen);
+		qstats->backlog += READ_ONCE(qcpu->backlog);
+		qstats->drops += READ_ONCE(qcpu->drops);
+		qstats->requeues += READ_ONCE(qcpu->requeues);
+		qstats->overlimits += READ_ONCE(qcpu->overlimits);
 	}
 }
 
@@ -360,11 +360,11 @@ void gnet_stats_add_queue(struct gnet_stats_queue *qstats,
 	if (cpu) {
 		gnet_stats_add_queue_cpu(qstats, cpu);
 	} else {
-		qstats->qlen += q->qlen;
-		qstats->backlog += q->backlog;
-		qstats->drops += q->drops;
-		qstats->requeues += q->requeues;
-		qstats->overlimits += q->overlimits;
+		qstats->qlen += READ_ONCE(q->qlen);
+		qstats->backlog += READ_ONCE(q->backlog);
+		qstats->drops += READ_ONCE(q->drops);
+		qstats->requeues += READ_ONCE(q->requeues);
+		qstats->overlimits += READ_ONCE(q->overlimits);
 	}
 }
 EXPORT_SYMBOL(gnet_stats_add_queue);
-- 
2.53.0.1213.gd9a14994de-goog


^ permalink raw reply related

* [PATCH v3 net-next 09/15] net/sched: sch_pie: annotate data-races in pie_dump_stats()
From: Eric Dumazet @ 2026-04-10 18:22 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: Simon Horman, Jamal Hadi Salim, Jiri Pirko, netdev, eric.dumazet,
	Eric Dumazet
In-Reply-To: <20260410182257.774311-1-edumazet@google.com>

pie_dump_stats() only runs with RTNL held,
reading fields that can be changed in qdisc fast path.

Add READ_ONCE()/WRITE_ONCE() annotations.

Alternative would be to acquire the qdisc spinlock, but our long-term
goal is to make qdisc dump operations lockless as much as we can.

tc_pie_xstats fields don't need to be latched atomically,
otherwise this bug would have been caught earlier.

Fixes: edb09eb17ed8 ("net: sched: do not acquire qdisc spinlock in qdisc/class stats dump")
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 include/net/pie.h   |  2 +-
 net/sched/sch_pie.c | 38 +++++++++++++++++++-------------------
 2 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/include/net/pie.h b/include/net/pie.h
index 01cbc66825a40bd21c0a044b1180cbbc346785df..1f3db0c355149b41823a891c9156cac625122031 100644
--- a/include/net/pie.h
+++ b/include/net/pie.h
@@ -104,7 +104,7 @@ static inline void pie_vars_init(struct pie_vars *vars)
 	vars->dq_tstamp = DTIME_INVALID;
 	vars->accu_prob = 0;
 	vars->dq_count = DQCOUNT_INVALID;
-	vars->avg_dq_rate = 0;
+	WRITE_ONCE(vars->avg_dq_rate, 0);
 }
 
 static inline struct pie_skb_cb *get_pie_cb(const struct sk_buff *skb)
diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c
index 16f3f629cb8e4be71431f7e50a278e3c7fdba8d0..fb53fbf0e328571be72b66ba4e75a938e1963422 100644
--- a/net/sched/sch_pie.c
+++ b/net/sched/sch_pie.c
@@ -90,7 +90,7 @@ static int pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 	bool enqueue = false;
 
 	if (unlikely(qdisc_qlen(sch) >= sch->limit)) {
-		q->stats.overlimit++;
+		WRITE_ONCE(q->stats.overlimit, q->stats.overlimit + 1);
 		goto out;
 	}
 
@@ -104,7 +104,7 @@ static int pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 		/* If packet is ecn capable, mark it if drop probability
 		 * is lower than 10%, else drop it.
 		 */
-		q->stats.ecn_mark++;
+		WRITE_ONCE(q->stats.ecn_mark, q->stats.ecn_mark + 1);
 		enqueue = true;
 	}
 
@@ -114,15 +114,15 @@ static int pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 		if (!q->params.dq_rate_estimator)
 			pie_set_enqueue_time(skb);
 
-		q->stats.packets_in++;
+		WRITE_ONCE(q->stats.packets_in, q->stats.packets_in + 1);
 		if (qdisc_qlen(sch) > q->stats.maxq)
-			q->stats.maxq = qdisc_qlen(sch);
+			WRITE_ONCE(q->stats.maxq, qdisc_qlen(sch));
 
 		return qdisc_enqueue_tail(skb, sch);
 	}
 
 out:
-	q->stats.dropped++;
+	WRITE_ONCE(q->stats.dropped, q->stats.dropped + 1);
 	q->vars.accu_prob = 0;
 	return qdisc_drop_reason(skb, sch, to_free, reason);
 }
@@ -267,11 +267,11 @@ void pie_process_dequeue(struct sk_buff *skb, struct pie_params *params,
 			count = count / dtime;
 
 			if (vars->avg_dq_rate == 0)
-				vars->avg_dq_rate = count;
+				WRITE_ONCE(vars->avg_dq_rate, count);
 			else
-				vars->avg_dq_rate =
+				WRITE_ONCE(vars->avg_dq_rate,
 				    (vars->avg_dq_rate -
-				     (vars->avg_dq_rate >> 3)) + (count >> 3);
+				     (vars->avg_dq_rate >> 3)) + (count >> 3));
 
 			/* If the queue has receded below the threshold, we hold
 			 * on to the last drain rate calculated, else we reset
@@ -381,7 +381,7 @@ void pie_calculate_probability(struct pie_params *params, struct pie_vars *vars,
 	if (delta > 0) {
 		/* prevent overflow */
 		if (vars->prob < oldprob) {
-			vars->prob = MAX_PROB;
+			WRITE_ONCE(vars->prob, MAX_PROB);
 			/* Prevent normalization error. If probability is at
 			 * maximum value already, we normalize it here, and
 			 * skip the check to do a non-linear drop in the next
@@ -392,7 +392,7 @@ void pie_calculate_probability(struct pie_params *params, struct pie_vars *vars,
 	} else {
 		/* prevent underflow */
 		if (vars->prob > oldprob)
-			vars->prob = 0;
+			WRITE_ONCE(vars->prob, 0);
 	}
 
 	/* Non-linear drop in probability: Reduce drop probability quickly if
@@ -403,7 +403,7 @@ void pie_calculate_probability(struct pie_params *params, struct pie_vars *vars,
 		/* Reduce drop probability to 98.4% */
 		vars->prob -= vars->prob / 64;
 
-	vars->qdelay = qdelay;
+	WRITE_ONCE(vars->qdelay, qdelay);
 	vars->backlog_old = backlog;
 
 	/* We restart the measurement cycle if the following conditions are met
@@ -502,21 +502,21 @@ static int pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
 	struct pie_sched_data *q = qdisc_priv(sch);
 	struct tc_pie_xstats st = {
 		.prob		= q->vars.prob << BITS_PER_BYTE,
-		.delay		= ((u32)PSCHED_TICKS2NS(q->vars.qdelay)) /
+		.delay		= ((u32)PSCHED_TICKS2NS(READ_ONCE(q->vars.qdelay))) /
 				   NSEC_PER_USEC,
-		.packets_in	= q->stats.packets_in,
-		.overlimit	= q->stats.overlimit,
-		.maxq		= q->stats.maxq,
-		.dropped	= q->stats.dropped,
-		.ecn_mark	= q->stats.ecn_mark,
+		.packets_in	= READ_ONCE(q->stats.packets_in),
+		.overlimit	= READ_ONCE(q->stats.overlimit),
+		.maxq		= READ_ONCE(q->stats.maxq),
+		.dropped	= READ_ONCE(q->stats.dropped),
+		.ecn_mark	= READ_ONCE(q->stats.ecn_mark),
 	};
 
 	/* avg_dq_rate is only valid if dq_rate_estimator is enabled */
 	st.dq_rate_estimating = q->params.dq_rate_estimator;
 
 	/* unscale and return dq_rate in bytes per sec */
-	if (q->params.dq_rate_estimator)
-		st.avg_dq_rate = q->vars.avg_dq_rate *
+	if (st.dq_rate_estimating)
+		st.avg_dq_rate = READ_ONCE(q->vars.avg_dq_rate) *
 				 (PSCHED_TICKS_PER_SEC) >> PIE_SCALE;
 
 	return gnet_stats_copy_app(d, &st, sizeof(st));
-- 
2.53.0.1213.gd9a14994de-goog


^ permalink raw reply related

* [PATCH v3 net-next 01/15] net/sched: rename qstats_overlimit_inc() to qstats_cpu_overlimit_inc()
From: Eric Dumazet @ 2026-04-10 18:22 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: Simon Horman, Jamal Hadi Salim, Jiri Pirko, netdev, eric.dumazet,
	Eric Dumazet
In-Reply-To: <20260410182257.774311-1-edumazet@google.com>

qstats_overlimit_inc() is only used to increment per cpu overlimits.

It can use this_cpu_inc() to avoid this_cpu_ptr() extra cost
and avoid potential store tearing.

Change qstats_overlimit_inc() name and its argument type.

Also add a WRITE_ONCE() in qdisc_qstats_overlimit() to prevent
store tearing.

$ scripts/bloat-o-meter -t vmlinux.0 vmlinux.1
add/remove: 0/0 grow/shrink: 0/5 up/down: 0/-72 (-72)
Function                                     old     new   delta
tcf_skbmod_act                               772     764      -8
tcf_police_act                               733     725      -8
tcf_mirred_to_dev                           1126    1114     -12
tcf_ife_act                                 1077    1061     -16
tcf_mirred_act                              1324    1296     -28
Total: Before=29610901, After=29610829, chg -0.00%

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 include/net/act_api.h     | 2 +-
 include/net/sch_generic.h | 6 +++---
 net/sched/act_ife.c       | 4 ++--
 net/sched/act_police.c    | 2 +-
 net/sched/act_skbmod.c    | 2 +-
 5 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/include/net/act_api.h b/include/net/act_api.h
index d11b791079302f50c47e174979767e0b24afc59a..2ec4ef9a5d0c8e9110f92f135cc3c31a38af0479 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -250,7 +250,7 @@ static inline void tcf_action_inc_drop_qstats(struct tc_action *a)
 static inline void tcf_action_inc_overlimit_qstats(struct tc_action *a)
 {
 	if (likely(a->cpu_qstats)) {
-		qstats_overlimit_inc(this_cpu_ptr(a->cpu_qstats));
+		qstats_cpu_overlimit_inc(a->cpu_qstats);
 		return;
 	}
 	atomic_inc(&a->tcfa_overlimits);
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 5af262ec4bbd2d5021904df127a849e52c26178a..3ee383c6fc3f66f1aecd9ebc675fbd143852c150 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -1004,9 +1004,9 @@ static inline void qstats_drop_inc(struct gnet_stats_queue *qstats)
 	qstats->drops++;
 }
 
-static inline void qstats_overlimit_inc(struct gnet_stats_queue *qstats)
+static inline void qstats_cpu_overlimit_inc(struct gnet_stats_queue __percpu *qstats)
 {
-	qstats->overlimits++;
+	this_cpu_inc(qstats->overlimits);
 }
 
 static inline void qdisc_qstats_drop(struct Qdisc *sch)
@@ -1021,7 +1021,7 @@ static inline void qdisc_qstats_cpu_drop(struct Qdisc *sch)
 
 static inline void qdisc_qstats_overlimit(struct Qdisc *sch)
 {
-	sch->qstats.overlimits++;
+	WRITE_ONCE(sch->qstats.overlimits, sch->qstats.overlimits + 1);
 }
 
 static inline int qdisc_qstats_copy(struct gnet_dump *d, struct Qdisc *sch)
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index d5e8a91bb4eb9f1f1f084e199b5ada4e7f7e7205..e1b825e14900d6f46bbfd1b7f72ab6cd554d8a73 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -750,7 +750,7 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a,
 			 */
 			pr_info_ratelimited("Unknown metaid %d dlen %d\n",
 					    mtype, dlen);
-			qstats_overlimit_inc(this_cpu_ptr(ife->common.cpu_qstats));
+			qstats_cpu_overlimit_inc(ife->common.cpu_qstats);
 		}
 	}
 
@@ -814,7 +814,7 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a,
 		/* abuse overlimits to count when we allow packet
 		 * with no metadata
 		 */
-		qstats_overlimit_inc(this_cpu_ptr(ife->common.cpu_qstats));
+		qstats_cpu_overlimit_inc(ife->common.cpu_qstats);
 		return action;
 	}
 	/* could be stupid policy setup or mtu config
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 12ea9e5a600536b603ea73cc99b4c00381287219..8060f43e4d11c0a26e1475db06b76426f50c5975 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -307,7 +307,7 @@ TC_INDIRECT_SCOPE int tcf_police_act(struct sk_buff *skb,
 	}
 
 inc_overlimits:
-	qstats_overlimit_inc(this_cpu_ptr(police->common.cpu_qstats));
+	qstats_cpu_overlimit_inc(police->common.cpu_qstats);
 inc_drops:
 	if (ret == TC_ACT_SHOT)
 		qstats_drop_inc(this_cpu_ptr(police->common.cpu_qstats));
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index 23ca46138f040d38de37684439873921bc9c86af..a464b0a3c1b81dba6c28c1141aa38c5c7cad3acb 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -87,7 +87,7 @@ TC_INDIRECT_SCOPE int tcf_skbmod_act(struct sk_buff *skb,
 	return p->action;
 
 drop:
-	qstats_overlimit_inc(this_cpu_ptr(d->common.cpu_qstats));
+	qstats_cpu_overlimit_inc(d->common.cpu_qstats);
 	return TC_ACT_SHOT;
 }
 
-- 
2.53.0.1213.gd9a14994de-goog


^ permalink raw reply related

* [PATCH v3 net-next 10/15] net/sched: sch_fq_pie: annotate data-races in fq_pie_dump_stats()
From: Eric Dumazet @ 2026-04-10 18:22 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: Simon Horman, Jamal Hadi Salim, Jiri Pirko, netdev, eric.dumazet,
	Eric Dumazet
In-Reply-To: <20260410182257.774311-1-edumazet@google.com>

fq_codel_dump_stats() acquires the qdisc spinlock a bit too late.

Move this acquisition before we fill tc_fq_pie_xstats with live data.

Alternative would be to add READ_ONCE() and WRITE_ONCE() annotations,
but the spinlock is needed anyway.

Fixes: ec97ecf1ebe4 ("net: sched: add Flow Queue PIE packet scheduler")
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/sched/sch_fq_pie.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c
index 197f0df0a6eb06ab4ce25eefe01d32a35dbd84af..72f48fa4010bebbe6be212938b457db21ff3c5a0 100644
--- a/net/sched/sch_fq_pie.c
+++ b/net/sched/sch_fq_pie.c
@@ -509,18 +509,19 @@ static int fq_pie_dump(struct Qdisc *sch, struct sk_buff *skb)
 static int fq_pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
 {
 	struct fq_pie_sched_data *q = qdisc_priv(sch);
-	struct tc_fq_pie_xstats st = {
-		.packets_in	= q->stats.packets_in,
-		.overlimit	= q->stats.overlimit,
-		.overmemory	= q->overmemory,
-		.dropped	= q->stats.dropped,
-		.ecn_mark	= q->stats.ecn_mark,
-		.new_flow_count = q->new_flow_count,
-		.memory_usage   = q->memory_usage,
-	};
+	struct tc_fq_pie_xstats st = { 0 };
 	struct list_head *pos;
 
 	sch_tree_lock(sch);
+
+	st.packets_in	= q->stats.packets_in;
+	st.overlimit	= q->stats.overlimit;
+	st.overmemory	= q->overmemory;
+	st.dropped	= q->stats.dropped;
+	st.ecn_mark	= q->stats.ecn_mark;
+	st.new_flow_count = q->new_flow_count;
+	st.memory_usage   = q->memory_usage;
+
 	list_for_each(pos, &q->new_flows)
 		st.new_flows_len++;
 
-- 
2.53.0.1213.gd9a14994de-goog


^ permalink raw reply related

* [PATCH v3 net-next 13/15] net/sched: sch_cake: annotate data-races in cake_dump_stats()
From: Eric Dumazet @ 2026-04-10 18:22 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: Simon Horman, Jamal Hadi Salim, Jiri Pirko, netdev, eric.dumazet,
	Eric Dumazet, Toke Høiland-Jørgensen
In-Reply-To: <20260410182257.774311-1-edumazet@google.com>

cake_dump_stats() and cake_dump_class_stats() run without qdisc
spinlock being held.

Add READ_ONCE()/WRITE_ONCE() annotations.

Fixes: 046f6fd5daef ("sched: Add Common Applications Kept Enhanced (cake) qdisc")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: "Toke Høiland-Jørgensen" <toke@toke.dk>
---
 net/sched/sch_cake.c | 404 ++++++++++++++++++++++++-------------------
 1 file changed, 225 insertions(+), 179 deletions(-)

diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index 32e672820c00a88c6d8fe77a6308405e016525ea..f523f0aa4d830e9d3ec4d43bb123e1dc4f8f289d 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -399,14 +399,14 @@ static void cake_configure_rates(struct Qdisc *sch, u64 rate, bool rate_adjust);
  * Here, invsqrt is a fixed point number (< 1.0), 32bit mantissa, aka Q0.32
  */
 
-static void cobalt_newton_step(struct cobalt_vars *vars)
+static void cobalt_newton_step(struct cobalt_vars *vars, u32 count)
 {
 	u32 invsqrt, invsqrt2;
 	u64 val;
 
 	invsqrt = vars->rec_inv_sqrt;
 	invsqrt2 = ((u64)invsqrt * invsqrt) >> 32;
-	val = (3LL << 32) - ((u64)vars->count * invsqrt2);
+	val = (3LL << 32) - ((u64)count * invsqrt2);
 
 	val >>= 2; /* avoid overflow in following multiply */
 	val = (val * invsqrt) >> (32 - 2 + 1);
@@ -414,12 +414,12 @@ static void cobalt_newton_step(struct cobalt_vars *vars)
 	vars->rec_inv_sqrt = val;
 }
 
-static void cobalt_invsqrt(struct cobalt_vars *vars)
+static void cobalt_invsqrt(struct cobalt_vars *vars, u32 count)
 {
-	if (vars->count < REC_INV_SQRT_CACHE)
-		vars->rec_inv_sqrt = inv_sqrt_cache[vars->count];
+	if (count < REC_INV_SQRT_CACHE)
+		vars->rec_inv_sqrt = inv_sqrt_cache[count];
 	else
-		cobalt_newton_step(vars);
+		cobalt_newton_step(vars, count);
 }
 
 static void cobalt_vars_init(struct cobalt_vars *vars)
@@ -449,16 +449,19 @@ static bool cobalt_queue_full(struct cobalt_vars *vars,
 	bool up = false;
 
 	if (ktime_to_ns(ktime_sub(now, vars->blue_timer)) > p->target) {
-		up = !vars->p_drop;
-		vars->p_drop += p->p_inc;
-		if (vars->p_drop < p->p_inc)
-			vars->p_drop = ~0;
-		vars->blue_timer = now;
-	}
-	vars->dropping = true;
-	vars->drop_next = now;
+		u32 p_drop = vars->p_drop;
+
+		up = !p_drop;
+		p_drop += p->p_inc;
+		if (p_drop < p->p_inc)
+			p_drop = ~0;
+		WRITE_ONCE(vars->p_drop, p_drop);
+		WRITE_ONCE(vars->blue_timer, now);
+	}
+	WRITE_ONCE(vars->dropping, true);
+	WRITE_ONCE(vars->drop_next, now);
 	if (!vars->count)
-		vars->count = 1;
+		WRITE_ONCE(vars->count, 1);
 
 	return up;
 }
@@ -474,21 +477,25 @@ static bool cobalt_queue_empty(struct cobalt_vars *vars,
 
 	if (vars->p_drop &&
 	    ktime_to_ns(ktime_sub(now, vars->blue_timer)) > p->target) {
-		if (vars->p_drop < p->p_dec)
-			vars->p_drop = 0;
+		u32 p_drop = vars->p_drop;
+
+		if (p_drop < p->p_dec)
+			p_drop = 0;
 		else
-			vars->p_drop -= p->p_dec;
-		vars->blue_timer = now;
-		down = !vars->p_drop;
+			p_drop -= p->p_dec;
+		WRITE_ONCE(vars->p_drop, p_drop);
+		WRITE_ONCE(vars->blue_timer, now);
+		down = !p_drop;
 	}
-	vars->dropping = false;
+	WRITE_ONCE(vars->dropping, false);
 
 	if (vars->count && ktime_to_ns(ktime_sub(now, vars->drop_next)) >= 0) {
-		vars->count--;
-		cobalt_invsqrt(vars);
-		vars->drop_next = cobalt_control(vars->drop_next,
-						 p->interval,
-						 vars->rec_inv_sqrt);
+		WRITE_ONCE(vars->count, vars->count - 1);
+		cobalt_invsqrt(vars, vars->count);
+		WRITE_ONCE(vars->drop_next,
+			   cobalt_control(vars->drop_next,
+					  p->interval,
+					  vars->rec_inv_sqrt));
 	}
 
 	return down;
@@ -507,6 +514,7 @@ static enum qdisc_drop_reason cobalt_should_drop(struct cobalt_vars *vars,
 	bool next_due, over_target;
 	ktime_t schedule;
 	u64 sojourn;
+	u32 count;
 
 /* The 'schedule' variable records, in its sign, whether 'now' is before or
  * after 'drop_next'.  This allows 'drop_next' to be updated before the next
@@ -528,45 +536,50 @@ static enum qdisc_drop_reason cobalt_should_drop(struct cobalt_vars *vars,
 	over_target = sojourn > p->target &&
 		      sojourn > p->mtu_time * bulk_flows * 2 &&
 		      sojourn > p->mtu_time * 4;
-	next_due = vars->count && ktime_to_ns(schedule) >= 0;
+	count = vars->count;
+	next_due = count && ktime_to_ns(schedule) >= 0;
 
 	vars->ecn_marked = false;
 
 	if (over_target) {
 		if (!vars->dropping) {
-			vars->dropping = true;
-			vars->drop_next = cobalt_control(now,
-							 p->interval,
-							 vars->rec_inv_sqrt);
+			WRITE_ONCE(vars->dropping, true);
+			WRITE_ONCE(vars->drop_next,
+				   cobalt_control(now,
+						  p->interval,
+						  vars->rec_inv_sqrt));
 		}
-		if (!vars->count)
-			vars->count = 1;
+		if (!count)
+			count = 1;
 	} else if (vars->dropping) {
-		vars->dropping = false;
+		WRITE_ONCE(vars->dropping, false);
 	}
 
 	if (next_due && vars->dropping) {
 		/* Use ECN mark if possible, otherwise drop */
-		if (!(vars->ecn_marked = INET_ECN_set_ce(skb)))
+		vars->ecn_marked = INET_ECN_set_ce(skb);
+		if (!vars->ecn_marked)
 			reason = QDISC_DROP_CONGESTED;
 
-		vars->count++;
-		if (!vars->count)
-			vars->count--;
-		cobalt_invsqrt(vars);
-		vars->drop_next = cobalt_control(vars->drop_next,
-						 p->interval,
-						 vars->rec_inv_sqrt);
+		count++;
+		if (!count)
+			count--;
+		cobalt_invsqrt(vars, count);
+		WRITE_ONCE(vars->drop_next,
+			   cobalt_control(vars->drop_next,
+					  p->interval,
+					  vars->rec_inv_sqrt));
 		schedule = ktime_sub(now, vars->drop_next);
 	} else {
 		while (next_due) {
-			vars->count--;
-			cobalt_invsqrt(vars);
-			vars->drop_next = cobalt_control(vars->drop_next,
-							 p->interval,
-							 vars->rec_inv_sqrt);
+			count--;
+			cobalt_invsqrt(vars, count);
+			WRITE_ONCE(vars->drop_next,
+				   cobalt_control(vars->drop_next,
+						  p->interval,
+						  vars->rec_inv_sqrt));
 			schedule = ktime_sub(now, vars->drop_next);
-			next_due = vars->count && ktime_to_ns(schedule) >= 0;
+			next_due = count && ktime_to_ns(schedule) >= 0;
 		}
 	}
 
@@ -575,11 +588,12 @@ static enum qdisc_drop_reason cobalt_should_drop(struct cobalt_vars *vars,
 	    get_random_u32() < vars->p_drop)
 		reason = QDISC_DROP_FLOOD_PROTECTION;
 
+	WRITE_ONCE(vars->count, count);
 	/* Overload the drop_next field as an activity timeout */
-	if (!vars->count)
-		vars->drop_next = ktime_add_ns(now, p->interval);
+	if (count)
+		WRITE_ONCE(vars->drop_next, ktime_add_ns(now, p->interval));
 	else if (ktime_to_ns(schedule) > 0 && reason == QDISC_DROP_UNSPEC)
-		vars->drop_next = now;
+		WRITE_ONCE(vars->drop_next, now);
 
 	return reason;
 }
@@ -813,7 +827,7 @@ static u32 cake_hash(struct cake_tin_data *q, const struct sk_buff *skb,
 		     i++, k = (k + 1) % CAKE_SET_WAYS) {
 			if (q->tags[outer_hash + k] == flow_hash) {
 				if (i)
-					q->way_hits++;
+					WRITE_ONCE(q->way_hits, q->way_hits + 1);
 
 				if (!q->flows[outer_hash + k].set) {
 					/* need to increment host refcnts */
@@ -831,7 +845,7 @@ static u32 cake_hash(struct cake_tin_data *q, const struct sk_buff *skb,
 		for (i = 0; i < CAKE_SET_WAYS;
 			 i++, k = (k + 1) % CAKE_SET_WAYS) {
 			if (!q->flows[outer_hash + k].set) {
-				q->way_misses++;
+				WRITE_ONCE(q->way_misses, q->way_misses + 1);
 				allocate_src = cake_dsrc(flow_mode);
 				allocate_dst = cake_ddst(flow_mode);
 				goto found;
@@ -841,7 +855,7 @@ static u32 cake_hash(struct cake_tin_data *q, const struct sk_buff *skb,
 		/* With no empty queues, default to the original
 		 * queue, accept the collision, update the host tags.
 		 */
-		q->way_collisions++;
+		WRITE_ONCE(q->way_collisions, q->way_collisions + 1);
 		allocate_src = cake_dsrc(flow_mode);
 		allocate_dst = cake_ddst(flow_mode);
 
@@ -875,7 +889,8 @@ static u32 cake_hash(struct cake_tin_data *q, const struct sk_buff *skb,
 			q->flows[reduced_hash].srchost = srchost_idx;
 
 			if (q->flows[reduced_hash].set == CAKE_SET_BULK)
-				cake_inc_srchost_bulk_flow_count(q, &q->flows[reduced_hash], flow_mode);
+				cake_inc_srchost_bulk_flow_count(q, &q->flows[reduced_hash],
+								 flow_mode);
 		}
 
 		if (allocate_dst) {
@@ -899,7 +914,8 @@ static u32 cake_hash(struct cake_tin_data *q, const struct sk_buff *skb,
 			q->flows[reduced_hash].dsthost = dsthost_idx;
 
 			if (q->flows[reduced_hash].set == CAKE_SET_BULK)
-				cake_inc_dsthost_bulk_flow_count(q, &q->flows[reduced_hash], flow_mode);
+				cake_inc_dsthost_bulk_flow_count(q, &q->flows[reduced_hash],
+								 flow_mode);
 		}
 	}
 
@@ -1379,9 +1395,9 @@ static u32 cake_calc_overhead(struct cake_sched_data *qd, u32 len, u32 off)
 		len -= off;
 
 	if (qd->max_netlen < len)
-		qd->max_netlen = len;
+		WRITE_ONCE(qd->max_netlen, len);
 	if (qd->min_netlen > len)
-		qd->min_netlen = len;
+		WRITE_ONCE(qd->min_netlen, len);
 
 	len += q->rate_overhead;
 
@@ -1401,9 +1417,9 @@ static u32 cake_calc_overhead(struct cake_sched_data *qd, u32 len, u32 off)
 	}
 
 	if (qd->max_adjlen < len)
-		qd->max_adjlen = len;
+		WRITE_ONCE(qd->max_adjlen, len);
 	if (qd->min_adjlen > len)
-		qd->min_adjlen = len;
+		WRITE_ONCE(qd->min_adjlen, len);
 
 	return len;
 }
@@ -1416,7 +1432,7 @@ static u32 cake_overhead(struct cake_sched_data *q, const struct sk_buff *skb)
 	u16 segs = qdisc_pkt_segs(skb);
 	u32 len = qdisc_pkt_len(skb);
 
-	q->avg_netoff = cake_ewma(q->avg_netoff, off << 16, 8);
+	WRITE_ONCE(q->avg_netoff, cake_ewma(q->avg_netoff, off << 16, 8));
 
 	if (segs == 1)
 		return cake_calc_overhead(q, len, off);
@@ -1590,16 +1606,17 @@ static unsigned int cake_drop(struct Qdisc *sch, struct sk_buff **to_free)
 	}
 
 	if (cobalt_queue_full(&flow->cvars, &b->cparams, now))
-		b->unresponsive_flow_count++;
+		WRITE_ONCE(b->unresponsive_flow_count,
+			   b->unresponsive_flow_count + 1);
 
 	len = qdisc_pkt_len(skb);
 	q->buffer_used      -= skb->truesize;
-	b->backlogs[idx]    -= len;
-	b->tin_backlog      -= len;
+	WRITE_ONCE(b->backlogs[idx], b->backlogs[idx] - len);
+	WRITE_ONCE(b->tin_backlog, b->tin_backlog - len);
 	qstats_backlog_sub(sch, len);
 
-	flow->dropped++;
-	b->tin_dropped++;
+	WRITE_ONCE(flow->dropped, flow->dropped + 1);
+	WRITE_ONCE(b->tin_dropped, b->tin_dropped + 1);
 
 	if (q->config->rate_flags & CAKE_FLAG_INGRESS)
 		cake_advance_shaper(q, b, skb, now, true);
@@ -1795,7 +1812,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 	}
 
 	if (unlikely(len > b->max_skblen))
-		b->max_skblen = len;
+		WRITE_ONCE(b->max_skblen, len);
 
 	if (qdisc_pkt_segs(skb) > 1 && q->config->rate_flags & CAKE_FLAG_SPLIT_GSO) {
 		struct sk_buff *segs, *nskb;
@@ -1819,13 +1836,13 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 			numsegs++;
 			slen += segs->len;
 			q->buffer_used += segs->truesize;
-			b->packets++;
 		}
 
 		/* stats */
-		b->bytes	    += slen;
-		b->backlogs[idx]    += slen;
-		b->tin_backlog      += slen;
+		WRITE_ONCE(b->bytes, b->bytes + slen);
+		WRITE_ONCE(b->packets, b->packets + numsegs);
+		WRITE_ONCE(b->backlogs[idx], b->backlogs[idx] + slen);
+		WRITE_ONCE(b->tin_backlog, b->tin_backlog + slen);
 		qstats_backlog_add(sch, slen);
 		q->avg_window_bytes += slen;
 
@@ -1843,10 +1860,10 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 			ack = cake_ack_filter(q, flow);
 
 		if (ack) {
-			b->ack_drops++;
+			WRITE_ONCE(b->ack_drops, b->ack_drops + 1);
 			qdisc_qstats_drop(sch);
 			ack_pkt_len = qdisc_pkt_len(ack);
-			b->bytes += ack_pkt_len;
+			WRITE_ONCE(b->bytes, b->bytes + ack_pkt_len);
 			q->buffer_used += skb->truesize - ack->truesize;
 			if (q->config->rate_flags & CAKE_FLAG_INGRESS)
 				cake_advance_shaper(q, b, ack, now, true);
@@ -1859,10 +1876,10 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 		}
 
 		/* stats */
-		b->packets++;
-		b->bytes	    += len - ack_pkt_len;
-		b->backlogs[idx]    += len - ack_pkt_len;
-		b->tin_backlog      += len - ack_pkt_len;
+		WRITE_ONCE(b->packets, b->packets + 1);
+		WRITE_ONCE(b->bytes, b->bytes + len - ack_pkt_len);
+		WRITE_ONCE(b->backlogs[idx], b->backlogs[idx] + len - ack_pkt_len);
+		WRITE_ONCE(b->tin_backlog, b->tin_backlog + len - ack_pkt_len);
 		qstats_backlog_add(sch, len - ack_pkt_len);
 		q->avg_window_bytes += len - ack_pkt_len;
 	}
@@ -1894,9 +1911,9 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 			u64 b = q->avg_window_bytes * (u64)NSEC_PER_SEC;
 
 			b = div64_u64(b, window_interval);
-			q->avg_peak_bandwidth =
-				cake_ewma(q->avg_peak_bandwidth, b,
-					  b > q->avg_peak_bandwidth ? 2 : 8);
+			WRITE_ONCE(q->avg_peak_bandwidth,
+				   cake_ewma(q->avg_peak_bandwidth, b,
+					     b > q->avg_peak_bandwidth ? 2 : 8));
 			q->avg_window_bytes = 0;
 			q->avg_window_begin = now;
 
@@ -1917,27 +1934,30 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 		if (!flow->set) {
 			list_add_tail(&flow->flowchain, &b->new_flows);
 		} else {
-			b->decaying_flow_count--;
+			WRITE_ONCE(b->decaying_flow_count,
+				   b->decaying_flow_count - 1);
 			list_move_tail(&flow->flowchain, &b->new_flows);
 		}
 		flow->set = CAKE_SET_SPARSE;
-		b->sparse_flow_count++;
+		WRITE_ONCE(b->sparse_flow_count,
+			   b->sparse_flow_count + 1);
 
-		flow->deficit = cake_get_flow_quantum(b, flow, q->config->flow_mode);
+		WRITE_ONCE(flow->deficit,
+			   cake_get_flow_quantum(b, flow, q->config->flow_mode));
 	} else if (flow->set == CAKE_SET_SPARSE_WAIT) {
 		/* this flow was empty, accounted as a sparse flow, but actually
 		 * in the bulk rotation.
 		 */
 		flow->set = CAKE_SET_BULK;
-		b->sparse_flow_count--;
-		b->bulk_flow_count++;
+		WRITE_ONCE(b->sparse_flow_count, b->sparse_flow_count - 1);
+		WRITE_ONCE(b->bulk_flow_count, b->bulk_flow_count + 1);
 
 		cake_inc_srchost_bulk_flow_count(b, flow, q->config->flow_mode);
 		cake_inc_dsthost_bulk_flow_count(b, flow, q->config->flow_mode);
 	}
 
 	if (q->buffer_used > q->buffer_max_used)
-		q->buffer_max_used = q->buffer_used;
+		WRITE_ONCE(q->buffer_max_used, q->buffer_used);
 
 	if (q->buffer_used <= q->buffer_limit)
 		return NET_XMIT_SUCCESS;
@@ -1976,8 +1996,8 @@ static struct sk_buff *cake_dequeue_one(struct Qdisc *sch)
 	if (flow->head) {
 		skb = dequeue_head(flow);
 		len = qdisc_pkt_len(skb);
-		b->backlogs[q->cur_flow] -= len;
-		b->tin_backlog		 -= len;
+		WRITE_ONCE(b->backlogs[q->cur_flow], b->backlogs[q->cur_flow] - len);
+		WRITE_ONCE(b->tin_backlog, b->tin_backlog - len);
 		qstats_backlog_sub(sch, len);
 		q->buffer_used		 -= skb->truesize;
 		qdisc_qlen_dec(sch);
@@ -2042,7 +2062,7 @@ static struct sk_buff *cake_dequeue(struct Qdisc *sch)
 
 		cake_configure_rates(sch, new_rate, true);
 		q->last_checked_active = now;
-		q->active_queues = num_active_qs;
+		WRITE_ONCE(q->active_queues, num_active_qs);
 	}
 
 begin:
@@ -2149,8 +2169,10 @@ static struct sk_buff *cake_dequeue(struct Qdisc *sch)
 		 */
 		if (flow->set == CAKE_SET_SPARSE) {
 			if (flow->head) {
-				b->sparse_flow_count--;
-				b->bulk_flow_count++;
+				WRITE_ONCE(b->sparse_flow_count,
+					   b->sparse_flow_count - 1);
+				WRITE_ONCE(b->bulk_flow_count,
+					   b->bulk_flow_count + 1);
 
 				cake_inc_srchost_bulk_flow_count(b, flow, q->config->flow_mode);
 				cake_inc_dsthost_bulk_flow_count(b, flow, q->config->flow_mode);
@@ -2165,7 +2187,8 @@ static struct sk_buff *cake_dequeue(struct Qdisc *sch)
 			}
 		}
 
-		flow->deficit += cake_get_flow_quantum(b, flow, q->config->flow_mode);
+		WRITE_ONCE(flow->deficit,
+			   flow->deficit + cake_get_flow_quantum(b, flow, q->config->flow_mode));
 		list_move_tail(&flow->flowchain, &b->old_flows);
 
 		goto retry;
@@ -2177,7 +2200,8 @@ static struct sk_buff *cake_dequeue(struct Qdisc *sch)
 		if (!skb) {
 			/* this queue was actually empty */
 			if (cobalt_queue_empty(&flow->cvars, &b->cparams, now))
-				b->unresponsive_flow_count--;
+				WRITE_ONCE(b->unresponsive_flow_count,
+					   b->unresponsive_flow_count - 1);
 
 			if (flow->cvars.p_drop || flow->cvars.count ||
 			    ktime_before(now, flow->cvars.drop_next)) {
@@ -2187,16 +2211,22 @@ static struct sk_buff *cake_dequeue(struct Qdisc *sch)
 				list_move_tail(&flow->flowchain,
 					       &b->decaying_flows);
 				if (flow->set == CAKE_SET_BULK) {
-					b->bulk_flow_count--;
+					WRITE_ONCE(b->bulk_flow_count,
+						   b->bulk_flow_count - 1);
 
-					cake_dec_srchost_bulk_flow_count(b, flow, q->config->flow_mode);
-					cake_dec_dsthost_bulk_flow_count(b, flow, q->config->flow_mode);
+					cake_dec_srchost_bulk_flow_count(b, flow,
+									 q->config->flow_mode);
+					cake_dec_dsthost_bulk_flow_count(b, flow,
+									 q->config->flow_mode);
 
-					b->decaying_flow_count++;
+					WRITE_ONCE(b->decaying_flow_count,
+						   b->decaying_flow_count + 1);
 				} else if (flow->set == CAKE_SET_SPARSE ||
 					   flow->set == CAKE_SET_SPARSE_WAIT) {
-					b->sparse_flow_count--;
-					b->decaying_flow_count++;
+					WRITE_ONCE(b->sparse_flow_count,
+						   b->sparse_flow_count - 1);
+					WRITE_ONCE(b->decaying_flow_count,
+						   b->decaying_flow_count + 1);
 				}
 				flow->set = CAKE_SET_DECAYING;
 			} else {
@@ -2204,14 +2234,20 @@ static struct sk_buff *cake_dequeue(struct Qdisc *sch)
 				list_del_init(&flow->flowchain);
 				if (flow->set == CAKE_SET_SPARSE ||
 				    flow->set == CAKE_SET_SPARSE_WAIT)
-					b->sparse_flow_count--;
+					WRITE_ONCE(b->sparse_flow_count,
+						   b->sparse_flow_count - 1);
 				else if (flow->set == CAKE_SET_BULK) {
-					b->bulk_flow_count--;
+					WRITE_ONCE(b->bulk_flow_count,
+						   b->bulk_flow_count - 1);
 
-					cake_dec_srchost_bulk_flow_count(b, flow, q->config->flow_mode);
-					cake_dec_dsthost_bulk_flow_count(b, flow, q->config->flow_mode);
-				} else
-					b->decaying_flow_count--;
+					cake_dec_srchost_bulk_flow_count(b, flow,
+									 q->config->flow_mode);
+					cake_dec_dsthost_bulk_flow_count(b, flow,
+									 q->config->flow_mode);
+				} else {
+					WRITE_ONCE(b->decaying_flow_count,
+						   b->decaying_flow_count - 1);
+				}
 
 				flow->set = CAKE_SET_NONE;
 			}
@@ -2230,11 +2266,11 @@ static struct sk_buff *cake_dequeue(struct Qdisc *sch)
 		if (q->config->rate_flags & CAKE_FLAG_INGRESS) {
 			len = cake_advance_shaper(q, b, skb,
 						  now, true);
-			flow->deficit -= len;
+			WRITE_ONCE(flow->deficit, flow->deficit - len);
 			b->tin_deficit -= len;
 		}
-		flow->dropped++;
-		b->tin_dropped++;
+		WRITE_ONCE(flow->dropped, flow->dropped + 1);
+		WRITE_ONCE(b->tin_dropped, b->tin_dropped + 1);
 		qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb));
 		qdisc_qstats_drop(sch);
 		qdisc_dequeue_drop(sch, skb, reason);
@@ -2242,20 +2278,22 @@ static struct sk_buff *cake_dequeue(struct Qdisc *sch)
 			goto retry;
 	}
 
-	b->tin_ecn_mark += !!flow->cvars.ecn_marked;
+	WRITE_ONCE(b->tin_ecn_mark, b->tin_ecn_mark + !!flow->cvars.ecn_marked);
 	qdisc_bstats_update(sch, skb);
 	WRITE_ONCE(q->last_active, now);
 
 	/* collect delay stats */
 	delay = ktime_to_ns(ktime_sub(now, cobalt_get_enqueue_time(skb)));
-	b->avge_delay = cake_ewma(b->avge_delay, delay, 8);
-	b->peak_delay = cake_ewma(b->peak_delay, delay,
-				  delay > b->peak_delay ? 2 : 8);
-	b->base_delay = cake_ewma(b->base_delay, delay,
-				  delay < b->base_delay ? 2 : 8);
+	WRITE_ONCE(b->avge_delay, cake_ewma(b->avge_delay, delay, 8));
+	WRITE_ONCE(b->peak_delay,
+		   cake_ewma(b->peak_delay, delay,
+			     delay > b->peak_delay ? 2 : 8));
+	WRITE_ONCE(b->base_delay,
+		   cake_ewma(b->base_delay, delay,
+			     delay < b->base_delay ? 2 : 8));
 
 	len = cake_advance_shaper(q, b, skb, now, false);
-	flow->deficit -= len;
+	WRITE_ONCE(flow->deficit, flow->deficit - len);
 	b->tin_deficit -= len;
 
 	if (ktime_after(q->time_next_packet, now) && sch->q.qlen) {
@@ -2329,9 +2367,8 @@ static void cake_set_rate(struct cake_tin_data *b, u64 rate, u32 mtu,
 	u8  rate_shft = 0;
 	u64 rate_ns = 0;
 
-	b->flow_quantum = 1514;
 	if (rate) {
-		b->flow_quantum = max(min(rate >> 12, 1514ULL), 300ULL);
+		WRITE_ONCE(b->flow_quantum, max(min(rate >> 12, 1514ULL), 300ULL));
 		rate_shft = 34;
 		rate_ns = ((u64)NSEC_PER_SEC) << rate_shft;
 		rate_ns = div64_u64(rate_ns, max(MIN_RATE, rate));
@@ -2339,9 +2376,11 @@ static void cake_set_rate(struct cake_tin_data *b, u64 rate, u32 mtu,
 			rate_ns >>= 1;
 			rate_shft--;
 		}
-	} /* else unlimited, ie. zero delay */
-
-	b->tin_rate_bps  = rate;
+	} else {
+		/* else unlimited, ie. zero delay */
+		WRITE_ONCE(b->flow_quantum, 1514);
+	}
+	WRITE_ONCE(b->tin_rate_bps, rate);
 	b->tin_rate_ns   = rate_ns;
 	b->tin_rate_shft = rate_shft;
 
@@ -2350,10 +2389,11 @@ static void cake_set_rate(struct cake_tin_data *b, u64 rate, u32 mtu,
 
 	byte_target_ns = (byte_target * rate_ns) >> rate_shft;
 
-	b->cparams.target = max((byte_target_ns * 3) / 2, target_ns);
-	b->cparams.interval = max(rtt_est_ns +
-				     b->cparams.target - target_ns,
-				     b->cparams.target * 2);
+	WRITE_ONCE(b->cparams.target,
+		   max((byte_target_ns * 3) / 2, target_ns));
+	WRITE_ONCE(b->cparams.interval,
+		   max(rtt_est_ns + b->cparams.target - target_ns,
+		       b->cparams.target * 2));
 	b->cparams.mtu_time = byte_target_ns;
 	b->cparams.p_inc = 1 << 24; /* 1/256 */
 	b->cparams.p_dec = 1 << 20; /* 1/4096 */
@@ -2611,25 +2651,27 @@ static void cake_reconfigure(struct Qdisc *sch)
 {
 	struct cake_sched_data *qd = qdisc_priv(sch);
 	struct cake_sched_config *q = qd->config;
+	u32 buffer_limit;
 
 	cake_configure_rates(sch, qd->config->rate_bps, false);
 
 	if (q->buffer_config_limit) {
-		qd->buffer_limit = q->buffer_config_limit;
+		buffer_limit = q->buffer_config_limit;
 	} else if (q->rate_bps) {
 		u64 t = q->rate_bps * q->interval;
 
 		do_div(t, USEC_PER_SEC / 4);
-		qd->buffer_limit = max_t(u32, t, 4U << 20);
+		buffer_limit = max_t(u32, t, 4U << 20);
 	} else {
-		qd->buffer_limit = ~0;
+		buffer_limit = ~0;
 	}
 
 	sch->flags &= ~TCQ_F_CAN_BYPASS;
 
-	qd->buffer_limit = min(qd->buffer_limit,
-			       max(sch->limit * psched_mtu(qdisc_dev(sch)),
-				   q->buffer_config_limit));
+	WRITE_ONCE(qd->buffer_limit,
+		   min(buffer_limit,
+		       max(sch->limit * psched_mtu(qdisc_dev(sch)),
+			   q->buffer_config_limit)));
 }
 
 static int cake_config_change(struct cake_sched_config *q, struct nlattr *opt,
@@ -2774,10 +2816,10 @@ static int cake_change(struct Qdisc *sch, struct nlattr *opt,
 		return ret;
 
 	if (overhead_changed) {
-		qd->max_netlen = 0;
-		qd->max_adjlen = 0;
-		qd->min_netlen = ~0;
-		qd->min_adjlen = ~0;
+		WRITE_ONCE(qd->max_netlen, 0);
+		WRITE_ONCE(qd->max_adjlen, 0);
+		WRITE_ONCE(qd->min_netlen, ~0);
+		WRITE_ONCE(qd->min_adjlen, ~0);
 	}
 
 	if (qd->tins) {
@@ -2995,15 +3037,15 @@ static int cake_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
 			goto nla_put_failure;			       \
 	} while (0)
 
-	PUT_STAT_U64(CAPACITY_ESTIMATE64, q->avg_peak_bandwidth);
-	PUT_STAT_U32(MEMORY_LIMIT, q->buffer_limit);
-	PUT_STAT_U32(MEMORY_USED, q->buffer_max_used);
-	PUT_STAT_U32(AVG_NETOFF, ((q->avg_netoff + 0x8000) >> 16));
-	PUT_STAT_U32(MAX_NETLEN, q->max_netlen);
-	PUT_STAT_U32(MAX_ADJLEN, q->max_adjlen);
-	PUT_STAT_U32(MIN_NETLEN, q->min_netlen);
-	PUT_STAT_U32(MIN_ADJLEN, q->min_adjlen);
-	PUT_STAT_U32(ACTIVE_QUEUES, q->active_queues);
+	PUT_STAT_U64(CAPACITY_ESTIMATE64, READ_ONCE(q->avg_peak_bandwidth));
+	PUT_STAT_U32(MEMORY_LIMIT, READ_ONCE(q->buffer_limit));
+	PUT_STAT_U32(MEMORY_USED, READ_ONCE(q->buffer_max_used));
+	PUT_STAT_U32(AVG_NETOFF, ((READ_ONCE(q->avg_netoff) + 0x8000) >> 16));
+	PUT_STAT_U32(MAX_NETLEN, READ_ONCE(q->max_netlen));
+	PUT_STAT_U32(MAX_ADJLEN, READ_ONCE(q->max_adjlen));
+	PUT_STAT_U32(MIN_NETLEN, READ_ONCE(q->min_netlen));
+	PUT_STAT_U32(MIN_ADJLEN, READ_ONCE(q->min_adjlen));
+	PUT_STAT_U32(ACTIVE_QUEUES, READ_ONCE(q->active_queues));
 
 #undef PUT_STAT_U32
 #undef PUT_STAT_U64
@@ -3029,38 +3071,38 @@ static int cake_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
 		if (!ts)
 			goto nla_put_failure;
 
-		PUT_TSTAT_U64(THRESHOLD_RATE64, b->tin_rate_bps);
-		PUT_TSTAT_U64(SENT_BYTES64, b->bytes);
-		PUT_TSTAT_U32(BACKLOG_BYTES, b->tin_backlog);
+		PUT_TSTAT_U64(THRESHOLD_RATE64, READ_ONCE(b->tin_rate_bps));
+		PUT_TSTAT_U64(SENT_BYTES64, READ_ONCE(b->bytes));
+		PUT_TSTAT_U32(BACKLOG_BYTES, READ_ONCE(b->tin_backlog));
 
 		PUT_TSTAT_U32(TARGET_US,
-			      ktime_to_us(ns_to_ktime(b->cparams.target)));
+			ktime_to_us(ns_to_ktime(READ_ONCE(b->cparams.target))));
 		PUT_TSTAT_U32(INTERVAL_US,
-			      ktime_to_us(ns_to_ktime(b->cparams.interval)));
+			ktime_to_us(ns_to_ktime(READ_ONCE(b->cparams.interval))));
 
-		PUT_TSTAT_U32(SENT_PACKETS, b->packets);
-		PUT_TSTAT_U32(DROPPED_PACKETS, b->tin_dropped);
-		PUT_TSTAT_U32(ECN_MARKED_PACKETS, b->tin_ecn_mark);
-		PUT_TSTAT_U32(ACKS_DROPPED_PACKETS, b->ack_drops);
+		PUT_TSTAT_U32(SENT_PACKETS, READ_ONCE(b->packets));
+		PUT_TSTAT_U32(DROPPED_PACKETS, READ_ONCE(b->tin_dropped));
+		PUT_TSTAT_U32(ECN_MARKED_PACKETS, READ_ONCE(b->tin_ecn_mark));
+		PUT_TSTAT_U32(ACKS_DROPPED_PACKETS, READ_ONCE(b->ack_drops));
 
 		PUT_TSTAT_U32(PEAK_DELAY_US,
-			      ktime_to_us(ns_to_ktime(b->peak_delay)));
+			      ktime_to_us(ns_to_ktime(READ_ONCE(b->peak_delay))));
 		PUT_TSTAT_U32(AVG_DELAY_US,
-			      ktime_to_us(ns_to_ktime(b->avge_delay)));
+			      ktime_to_us(ns_to_ktime(READ_ONCE(b->avge_delay))));
 		PUT_TSTAT_U32(BASE_DELAY_US,
-			      ktime_to_us(ns_to_ktime(b->base_delay)));
+			      ktime_to_us(ns_to_ktime(READ_ONCE(b->base_delay))));
 
-		PUT_TSTAT_U32(WAY_INDIRECT_HITS, b->way_hits);
-		PUT_TSTAT_U32(WAY_MISSES, b->way_misses);
-		PUT_TSTAT_U32(WAY_COLLISIONS, b->way_collisions);
+		PUT_TSTAT_U32(WAY_INDIRECT_HITS, READ_ONCE(b->way_hits));
+		PUT_TSTAT_U32(WAY_MISSES, READ_ONCE(b->way_misses));
+		PUT_TSTAT_U32(WAY_COLLISIONS, READ_ONCE(b->way_collisions));
 
-		PUT_TSTAT_U32(SPARSE_FLOWS, b->sparse_flow_count +
-					    b->decaying_flow_count);
-		PUT_TSTAT_U32(BULK_FLOWS, b->bulk_flow_count);
-		PUT_TSTAT_U32(UNRESPONSIVE_FLOWS, b->unresponsive_flow_count);
-		PUT_TSTAT_U32(MAX_SKBLEN, b->max_skblen);
+		PUT_TSTAT_U32(SPARSE_FLOWS, READ_ONCE(b->sparse_flow_count) +
+					    READ_ONCE(b->decaying_flow_count));
+		PUT_TSTAT_U32(BULK_FLOWS, READ_ONCE(b->bulk_flow_count));
+		PUT_TSTAT_U32(UNRESPONSIVE_FLOWS, READ_ONCE(b->unresponsive_flow_count));
+		PUT_TSTAT_U32(MAX_SKBLEN, READ_ONCE(b->max_skblen));
 
-		PUT_TSTAT_U32(FLOW_QUANTUM, b->flow_quantum);
+		PUT_TSTAT_U32(FLOW_QUANTUM, READ_ONCE(b->flow_quantum));
 		nla_nest_end(d->skb, ts);
 	}
 
@@ -3128,7 +3170,7 @@ static int cake_dump_class_stats(struct Qdisc *sch, unsigned long cl,
 
 		flow = &b->flows[idx % CAKE_QUEUES];
 
-		if (flow->head) {
+		if (READ_ONCE(flow->head)) {
 			sch_tree_lock(sch);
 			skb = flow->head;
 			while (skb) {
@@ -3137,13 +3179,15 @@ static int cake_dump_class_stats(struct Qdisc *sch, unsigned long cl,
 			}
 			sch_tree_unlock(sch);
 		}
-		qs.backlog = b->backlogs[idx % CAKE_QUEUES];
-		qs.drops = flow->dropped;
+		qs.backlog = READ_ONCE(b->backlogs[idx % CAKE_QUEUES]);
+		qs.drops = READ_ONCE(flow->dropped);
 	}
 	if (gnet_stats_copy_queue(d, NULL, &qs, qs.qlen) < 0)
 		return -1;
 	if (flow) {
 		ktime_t now = ktime_get();
+		bool dropping;
+		u32 p_drop;
 
 		stats = nla_nest_start_noflag(d->skb, TCA_STATS_APP);
 		if (!stats)
@@ -3158,21 +3202,23 @@ static int cake_dump_class_stats(struct Qdisc *sch, unsigned long cl,
 			goto nla_put_failure;			       \
 	} while (0)
 
-		PUT_STAT_S32(DEFICIT, flow->deficit);
-		PUT_STAT_U32(DROPPING, flow->cvars.dropping);
-		PUT_STAT_U32(COBALT_COUNT, flow->cvars.count);
-		PUT_STAT_U32(P_DROP, flow->cvars.p_drop);
-		if (flow->cvars.p_drop) {
+		PUT_STAT_S32(DEFICIT, READ_ONCE(flow->deficit));
+		dropping = READ_ONCE(flow->cvars.dropping);
+		PUT_STAT_U32(DROPPING, dropping);
+		PUT_STAT_U32(COBALT_COUNT, READ_ONCE(flow->cvars.count));
+		p_drop = READ_ONCE(flow->cvars.p_drop);
+		PUT_STAT_U32(P_DROP, p_drop);
+		if (p_drop) {
 			PUT_STAT_S32(BLUE_TIMER_US,
 				     ktime_to_us(
 					     ktime_sub(now,
-						       flow->cvars.blue_timer)));
+						       READ_ONCE(flow->cvars.blue_timer))));
 		}
-		if (flow->cvars.dropping) {
+		if (dropping) {
 			PUT_STAT_S32(DROP_NEXT_US,
 				     ktime_to_us(
 					     ktime_sub(now,
-						       flow->cvars.drop_next)));
+						       READ_ONCE(flow->cvars.drop_next))));
 		}
 
 		if (nla_nest_end(d->skb, stats) < 0)
@@ -3298,10 +3344,10 @@ static int cake_mq_change(struct Qdisc *sch, struct nlattr *opt,
 		struct cake_sched_data *qd = qdisc_priv(chld);
 
 		if (overhead_changed) {
-			qd->max_netlen = 0;
-			qd->max_adjlen = 0;
-			qd->min_netlen = ~0;
-			qd->min_adjlen = ~0;
+			WRITE_ONCE(qd->max_netlen, 0);
+			WRITE_ONCE(qd->max_adjlen, 0);
+			WRITE_ONCE(qd->min_netlen, ~0);
+			WRITE_ONCE(qd->min_adjlen, ~0);
 		}
 
 		if (qd->tins) {
-- 
2.53.0.1213.gd9a14994de-goog


^ permalink raw reply related

* Re: [ovs-dev] [PATCH net-next v2] net: openvswitch: decouple flow_table from ovs_mutex
From: Aaron Conole @ 2026-04-10 18:52 UTC (permalink / raw)
  To: Adrian Moreno via dev
  Cc: netdev, Adrian Moreno, open list:OPENVSWITCH, Paolo Abeni,
	open list, Ilya Maximets, Eric Dumazet, Simon Horman,
	Jakub Kicinski, David S. Miller
In-Reply-To: <20260407120418.356718-1-amorenoz@redhat.com>

Hi Adrian,

Thanks for the patch.  A few questions inline.

Adrian Moreno via dev <ovs-dev@openvswitch.org> writes:

> Currently the entire ovs module is write-protected using the global
> ovs_mutex. While this simple approach works fine for control-plane
> operations (such as vport configurations), requiring the global mutex
> for flow modifications can be problematic.
>
> During periods of high control-plane operations, e.g: netdevs (vports)
> coming and going, RTNL can suffer contention. This contention is easily
> transferred to the ovs_mutex as RTNL nests inside ovs_mutex. Flow
> modifications, however, are done as part of packet processing and having
> them wait for RTNL pressure to go away can lead to packet drops.
>
> This patch decouples flow_table modifications from ovs_mutex by means of
> the following:
>
> 1 - Make flow_table an rcu-protected pointer inside the datapath.
> This allows both objects to be protected independently while reducing the
> amount of changes required in "flow_table.c".
>
> 2 - Create a new mutex inside the flow_table that protects it from
> concurrent modifications.
> Putting the mutex inside flow_table makes it easier to consume for
> functions inside flow_table.c that do not currently take pointers to the
> datapath.
> Some function signatures need to be changed to accept flow_table so that
> lockdep checks can be performed.
>
> 3 - Create a reference count to temporarily extend rcu protection from
> the datapath to the flow_table.
> In order to use the flow_table without locking ovs_mutex, the flow_table
> pointer must be first dereferenced within an rcu-protected region.
> Next, the table->mutex needs to be locked to protect it from
> concurrent writes but mutexes must not be locked inside an rcu-protected
> region, so the rcu-protected region must be left at which point the
> datapath can be concurrently freed.
> To extend the protection beyond the rcu region, a reference count is used.
> One reference is held by the datapath, the other is temporarily
> increased during flow modifications. For example:
>
> Datapath deletion:
>
>   ovs_lock();
>   table = rcu_dereference_protected(dp->table, ...);
>   rcu_assign_pointer(dp->table, NULL);
>   ovs_flow_tbl_put(table);
>   ovs_unlock();

I guess it's possible now to have flow operations succeed on
'removed-but-not-yet-freed' tables.  That's probably worth documenting
somewhere, since it is a slight behavior change.  More below

> Flow modification:
>
>   rcu_read_lock();
>   dp = get_dp(...);
>   table = rcu_dereference(dp->table);
>   ovs_flow_tbl_get(table);
>   rcu_read_unlock();
>
>   mutex_lock(&table->lock);
>   /* Perform modifications on the flow_table */
>   mutex_unlock(&table->lock);
>   ovs_flow_tbl_put(table);
>
> Signed-off-by: Adrian Moreno <amorenoz@redhat.com>
> ---
> v2: Fix argument in ovs_flow_tbl_put (sparse)
>     Remove rcu checks in ovs_dp_masks_rebalance
> ---
>  net/openvswitch/datapath.c   | 285 ++++++++++++++++++++++++-----------
>  net/openvswitch/datapath.h   |   2 +-
>  net/openvswitch/flow.c       |  13 +-
>  net/openvswitch/flow.h       |   9 +-
>  net/openvswitch/flow_table.c | 180 ++++++++++++++--------
>  net/openvswitch/flow_table.h |  51 ++++++-
>  6 files changed, 380 insertions(+), 160 deletions(-)
>
> diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
> index e209099218b4..9c234993520c 100644
> --- a/net/openvswitch/datapath.c
> +++ b/net/openvswitch/datapath.c
> @@ -88,13 +88,17 @@ static void ovs_notify(struct genl_family *family,
>   * DOC: Locking:
>   *
>   * All writes e.g. Writes to device state (add/remove datapath, port, set
> - * operations on vports, etc.), Writes to other state (flow table
> - * modifications, set miscellaneous datapath parameters, etc.) are protected
> - * by ovs_lock.
> + * operations on vports, etc.) and writes to other datapath parameters
> + * are protected by ovs_lock.
> + *
> + * Writes to the flow table are NOT protected by ovs_lock. Instead, a per-table
> + * mutex and reference count are used (see comment above "struct flow_table"
> + * definition). On some few occasions, the per-flow table mutex is nested
> + * inside ovs_mutex.
>   *
>   * Reads are protected by RCU.
>   *
> - * There are a few special cases (mostly stats) that have their own
> + * There are a few other special cases (mostly stats) that have their own
>   * synchronization but they nest under all of above and don't interact with
>   * each other.
>   *
> @@ -166,7 +170,6 @@ static void destroy_dp_rcu(struct rcu_head *rcu)
>  {
>  	struct datapath *dp = container_of(rcu, struct datapath, rcu);
>  
> -	ovs_flow_tbl_destroy(&dp->table);
>  	free_percpu(dp->stats_percpu);
>  	kfree(dp->ports);
>  	ovs_meters_exit(dp);
> @@ -247,6 +250,7 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
>  	struct ovs_pcpu_storage *ovs_pcpu = this_cpu_ptr(ovs_pcpu_storage);
>  	const struct vport *p = OVS_CB(skb)->input_vport;
>  	struct datapath *dp = p->dp;
> +	struct flow_table *table;
>  	struct sw_flow *flow;
>  	struct sw_flow_actions *sf_acts;
>  	struct dp_stats_percpu *stats;
> @@ -257,9 +261,16 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
>  	int error;
>  
>  	stats = this_cpu_ptr(dp->stats_percpu);
> +	table = rcu_dereference(dp->table);
> +	if (!table) {
> +		net_dbg_ratelimited("ovs: no flow table on datapath %s\n",
> +				    ovs_dp_name(dp));
> +		kfree_skb(skb);
> +		return;
> +	}
>  
>  	/* Look up flow. */
> -	flow = ovs_flow_tbl_lookup_stats(&dp->table, key, skb_get_hash(skb),
> +	flow = ovs_flow_tbl_lookup_stats(table, key, skb_get_hash(skb),
>  					 &n_mask_hit, &n_cache_hit);
>  	if (unlikely(!flow)) {
>  		struct dp_upcall_info upcall;
> @@ -752,12 +763,16 @@ static struct genl_family dp_packet_genl_family __ro_after_init = {
>  static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats,
>  			 struct ovs_dp_megaflow_stats *mega_stats)
>  {
> +	struct flow_table *table = ovsl_dereference(dp->table);
>  	int i;
>  
>  	memset(mega_stats, 0, sizeof(*mega_stats));
>  
> -	stats->n_flows = ovs_flow_tbl_count(&dp->table);
> -	mega_stats->n_masks = ovs_flow_tbl_num_masks(&dp->table);
> +	if (table) {
> +		stats->n_flows = ovs_flow_tbl_count(table);

Previously, when calling this we'd be under the ovs_mutex and the read
on table->count would be somewhat coherent (for some definition of
coherent).  BUT we are now doing a bare read.  I'm not sure if we should
take the lock here, or at least give some kind of barrier (READ_ONCE and
update the count setting sites with WRITE_ONCEs)?  WDYT?

> +		mega_stats->n_masks = ovs_flow_tbl_num_masks(table);
> +	}
> +
>  
>  	stats->n_hit = stats->n_missed = stats->n_lost = 0;
>  
> @@ -829,15 +844,16 @@ static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts,
>  		+ nla_total_size_64bit(8); /* OVS_FLOW_ATTR_USED */
>  }
>  
> -/* Called with ovs_mutex or RCU read lock. */
> +/* Called with table->lock or RCU read lock. */
>  static int ovs_flow_cmd_fill_stats(const struct sw_flow *flow,
> +				   const struct flow_table *table,
>  				   struct sk_buff *skb)
>  {
>  	struct ovs_flow_stats stats;
>  	__be16 tcp_flags;
>  	unsigned long used;
>  
> -	ovs_flow_stats_get(flow, &stats, &used, &tcp_flags);
> +	ovs_flow_stats_get(flow, table, &stats, &used, &tcp_flags);
>  
>  	if (used &&
>  	    nla_put_u64_64bit(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used),
> @@ -857,8 +873,9 @@ static int ovs_flow_cmd_fill_stats(const struct sw_flow *flow,
>  	return 0;
>  }
>  
> -/* Called with ovs_mutex or RCU read lock. */
> +/* Called with RCU read lock or table->lock held. */
>  static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow,
> +				     const struct flow_table *table,
>  				     struct sk_buff *skb, int skb_orig_len)
>  {
>  	struct nlattr *start;
> @@ -878,7 +895,7 @@ static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow,
>  	if (start) {
>  		const struct sw_flow_actions *sf_acts;
>  
> -		sf_acts = rcu_dereference_ovsl(flow->sf_acts);
> +		sf_acts = rcu_dereference_ovs_tbl(flow->sf_acts, table);
>  		err = ovs_nla_put_actions(sf_acts->actions,
>  					  sf_acts->actions_len, skb);
>  
> @@ -897,8 +914,10 @@ static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow,
>  	return 0;
>  }
>  
> -/* Called with ovs_mutex or RCU read lock. */
> -static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
> +/* Called with table->lock or RCU read lock. */
> +static int ovs_flow_cmd_fill_info(const struct sw_flow *flow,
> +				  const struct flow_table *table,
> +				  int dp_ifindex,
>  				  struct sk_buff *skb, u32 portid,
>  				  u32 seq, u32 flags, u8 cmd, u32 ufid_flags)
>  {
> @@ -929,12 +948,12 @@ static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
>  			goto error;
>  	}
>  
> -	err = ovs_flow_cmd_fill_stats(flow, skb);
> +	err = ovs_flow_cmd_fill_stats(flow, table, skb);
>  	if (err)
>  		goto error;
>  
>  	if (should_fill_actions(ufid_flags)) {
> -		err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len);
> +		err = ovs_flow_cmd_fill_actions(flow, table, skb, skb_orig_len);
>  		if (err)
>  			goto error;
>  	}
> @@ -968,8 +987,9 @@ static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *act
>  	return skb;
>  }
>  
> -/* Called with ovs_mutex. */
> +/* Called with table->lock. */
>  static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow,
> +					       const struct flow_table *table,
>  					       int dp_ifindex,
>  					       struct genl_info *info, u8 cmd,
>  					       bool always, u32 ufid_flags)
> @@ -977,12 +997,12 @@ static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow,
>  	struct sk_buff *skb;
>  	int retval;
>  
> -	skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts),
> +	skb = ovs_flow_cmd_alloc_info(ovs_tbl_dereference(flow->sf_acts, table),
>  				      &flow->id, info, always, ufid_flags);
>  	if (IS_ERR_OR_NULL(skb))
>  		return skb;
>  
> -	retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb,
> +	retval = ovs_flow_cmd_fill_info(flow, table, dp_ifindex, skb,
>  					info->snd_portid, info->snd_seq, 0,
>  					cmd, ufid_flags);
>  	if (WARN_ON_ONCE(retval < 0)) {
> @@ -998,6 +1018,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
>  	struct nlattr **a = info->attrs;
>  	struct ovs_header *ovs_header = genl_info_userhdr(info);
>  	struct sw_flow *flow = NULL, *new_flow;
> +	struct flow_table *table;
>  	struct sw_flow_mask mask;
>  	struct sk_buff *reply;
>  	struct datapath *dp;
> @@ -1064,30 +1085,43 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
>  		goto err_kfree_acts;
>  	}
>  

I think this can lead to a weird(?) behavior:

thread A (dp_destroy):                   thread b (ovs_flow_cmd_new):
rcu_assign_pointer(dp->table, NULL)
                                         rcu_read_lock();
                                         table =
                                         rcu_dereference(dp->table);
                                           [old table]
                                         ovs_flow_tbl_get(table)
                                             //refcnt change
                                         rcu_read_unlock()
ovs_flow_tbl_put(table) // refcnt chg
                                         mutex_lock(table->lock)
                                         ovs_flow_table_insert(...)
                                         [success reply]
                                         mutex_unlock(table->lock)
                                         ovs_flow_tbl_put(table)
                                         // table flow flush, etc.

I guess it isn't a huge deal (installing flow while deleting table would
be weird from a userspace perspective), and I think it is safe, but it
is worth mentioning that we can have such scenario now.

> -	ovs_lock();
> +	rcu_read_lock();
>  	dp = get_dp(net, ovs_header->dp_ifindex);
>  	if (unlikely(!dp)) {
>  		error = -ENODEV;
> -		goto err_unlock_ovs;
> +		rcu_read_unlock();
> +		goto err_kfree_reply;
>  	}
> +	table = rcu_dereference(dp->table);
> +	if (!table || !ovs_flow_tbl_get(table)) {
> +		error = -ENODEV;
> +		rcu_read_unlock();
> +		goto err_kfree_reply;
> +	}
> +	rcu_read_unlock();
> +
> +	/* It is safe to dereference "table" after leaving rcu read-protected
> +	 * region because it's pinned by refcount.
> +	 */
> +	mutex_lock(&table->lock);
>  
>  	/* Check if this is a duplicate flow */
>  	if (ovs_identifier_is_ufid(&new_flow->id))
> -		flow = ovs_flow_tbl_lookup_ufid(&dp->table, &new_flow->id);
> +		flow = ovs_flow_tbl_lookup_ufid(table, &new_flow->id);
>  	if (!flow)
> -		flow = ovs_flow_tbl_lookup(&dp->table, key);
> +		flow = ovs_flow_tbl_lookup(table, key);
>  	if (likely(!flow)) {
>  		rcu_assign_pointer(new_flow->sf_acts, acts);
>  
>  		/* Put flow in bucket. */
> -		error = ovs_flow_tbl_insert(&dp->table, new_flow, &mask);
> +		error = ovs_flow_tbl_insert(table, new_flow, &mask);
>  		if (unlikely(error)) {
>  			acts = NULL;
> -			goto err_unlock_ovs;
> +			goto err_unlock_tbl;
>  		}
>  
>  		if (unlikely(reply)) {
> -			error = ovs_flow_cmd_fill_info(new_flow,
> +			error = ovs_flow_cmd_fill_info(new_flow, table,
>  						       ovs_header->dp_ifindex,
>  						       reply, info->snd_portid,
>  						       info->snd_seq, 0,
> @@ -1095,7 +1129,8 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
>  						       ufid_flags);
>  			BUG_ON(error < 0);
>  		}
> -		ovs_unlock();
> +		mutex_unlock(&table->lock);
> +		ovs_flow_tbl_put(table);
>  	} else {
>  		struct sw_flow_actions *old_acts;
>  
> @@ -1108,28 +1143,28 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
>  		if (unlikely(info->nlhdr->nlmsg_flags & (NLM_F_CREATE
>  							 | NLM_F_EXCL))) {
>  			error = -EEXIST;
> -			goto err_unlock_ovs;
> +			goto err_unlock_tbl;
>  		}
>  		/* The flow identifier has to be the same for flow updates.
>  		 * Look for any overlapping flow.
>  		 */
>  		if (unlikely(!ovs_flow_cmp(flow, &match))) {
>  			if (ovs_identifier_is_key(&flow->id))
> -				flow = ovs_flow_tbl_lookup_exact(&dp->table,
> +				flow = ovs_flow_tbl_lookup_exact(table,
>  								 &match);
>  			else /* UFID matches but key is different */
>  				flow = NULL;
>  			if (!flow) {
>  				error = -ENOENT;
> -				goto err_unlock_ovs;
> +				goto err_unlock_tbl;
>  			}
>  		}
>  		/* Update actions. */
> -		old_acts = ovsl_dereference(flow->sf_acts);
> +		old_acts = ovs_tbl_dereference(flow->sf_acts, table);
>  		rcu_assign_pointer(flow->sf_acts, acts);
>  
>  		if (unlikely(reply)) {
> -			error = ovs_flow_cmd_fill_info(flow,
> +			error = ovs_flow_cmd_fill_info(flow, table,
>  						       ovs_header->dp_ifindex,
>  						       reply, info->snd_portid,
>  						       info->snd_seq, 0,
> @@ -1137,7 +1172,8 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
>  						       ufid_flags);
>  			BUG_ON(error < 0);
>  		}
> -		ovs_unlock();
> +		mutex_unlock(&table->lock);
> +		ovs_flow_tbl_put(table);
>  
>  		ovs_nla_free_flow_actions_rcu(old_acts);
>  		ovs_flow_free(new_flow, false);
> @@ -1149,8 +1185,10 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
>  	kfree(key);
>  	return 0;
>  
> -err_unlock_ovs:
> -	ovs_unlock();
> +err_unlock_tbl:
> +	mutex_unlock(&table->lock);
> +	ovs_flow_tbl_put(table);
> +err_kfree_reply:
>  	kfree_skb(reply);
>  err_kfree_acts:
>  	ovs_nla_free_flow_actions(acts);
> @@ -1244,6 +1282,7 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
>  	struct net *net = sock_net(skb->sk);
>  	struct nlattr **a = info->attrs;
>  	struct ovs_header *ovs_header = genl_info_userhdr(info);
> +	struct flow_table *table;
>  	struct sw_flow_key key;
>  	struct sw_flow *flow;
>  	struct sk_buff *reply = NULL;
> @@ -1278,29 +1317,43 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
>  		}
>  	}
>  
> -	ovs_lock();
> +	rcu_read_lock();
>  	dp = get_dp(net, ovs_header->dp_ifindex);
>  	if (unlikely(!dp)) {
>  		error = -ENODEV;
> -		goto err_unlock_ovs;
> +		rcu_read_unlock();
> +		goto err_free_reply;
>  	}
> +	table = rcu_dereference(dp->table);
> +	if (!table || !ovs_flow_tbl_get(table)) {
> +		rcu_read_unlock();
> +		error = -ENODEV;
> +		goto err_free_reply;
> +	}
> +	rcu_read_unlock();
> +
> +	/* It is safe to dereference "table" after leaving rcu read-protected
> +	 * region because it's pinned by refcount.
> +	 */
> +	mutex_lock(&table->lock);
> +
>  	/* Check that the flow exists. */
>  	if (ufid_present)
> -		flow = ovs_flow_tbl_lookup_ufid(&dp->table, &sfid);
> +		flow = ovs_flow_tbl_lookup_ufid(table, &sfid);
>  	else
> -		flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
> +		flow = ovs_flow_tbl_lookup_exact(table, &match);
>  	if (unlikely(!flow)) {
>  		error = -ENOENT;
> -		goto err_unlock_ovs;
> +		goto err_unlock_tbl;
>  	}
>  
>  	/* Update actions, if present. */
>  	if (likely(acts)) {
> -		old_acts = ovsl_dereference(flow->sf_acts);
> +		old_acts = ovs_tbl_dereference(flow->sf_acts, table);
>  		rcu_assign_pointer(flow->sf_acts, acts);
>  
>  		if (unlikely(reply)) {
> -			error = ovs_flow_cmd_fill_info(flow,
> +			error = ovs_flow_cmd_fill_info(flow, table,
>  						       ovs_header->dp_ifindex,
>  						       reply, info->snd_portid,
>  						       info->snd_seq, 0,
> @@ -1310,20 +1363,22 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
>  		}
>  	} else {
>  		/* Could not alloc without acts before locking. */
> -		reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex,
> +		reply = ovs_flow_cmd_build_info(flow, table,
> +						ovs_header->dp_ifindex,
>  						info, OVS_FLOW_CMD_SET, false,
>  						ufid_flags);
>  
>  		if (IS_ERR(reply)) {
>  			error = PTR_ERR(reply);
> -			goto err_unlock_ovs;
> +			goto err_unlock_tbl;
>  		}
>  	}
>  
>  	/* Clear stats. */
>  	if (a[OVS_FLOW_ATTR_CLEAR])
> -		ovs_flow_stats_clear(flow);
> -	ovs_unlock();
> +		ovs_flow_stats_clear(flow, table);
> +	mutex_unlock(&table->lock);
> +	ovs_flow_tbl_put(table);
>  
>  	if (reply)
>  		ovs_notify(&dp_flow_genl_family, reply, info);
> @@ -1332,8 +1387,10 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
>  
>  	return 0;
>  
> -err_unlock_ovs:
> -	ovs_unlock();
> +err_unlock_tbl:
> +	mutex_unlock(&table->lock);
> +	ovs_flow_tbl_put(table);
> +err_free_reply:
>  	kfree_skb(reply);
>  err_kfree_acts:
>  	ovs_nla_free_flow_actions(acts);
> @@ -1346,6 +1403,7 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
>  	struct nlattr **a = info->attrs;
>  	struct ovs_header *ovs_header = genl_info_userhdr(info);
>  	struct net *net = sock_net(skb->sk);
> +	struct flow_table *table;
>  	struct sw_flow_key key;
>  	struct sk_buff *reply;
>  	struct sw_flow *flow;
> @@ -1370,33 +1428,48 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
>  	if (err)
>  		return err;
>  
> -	ovs_lock();
> +	rcu_read_lock();
>  	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
>  	if (!dp) {
> -		err = -ENODEV;
> -		goto unlock;
> +		rcu_read_unlock();
> +		return -ENODEV;
>  	}
> +	table = rcu_dereference(dp->table);
> +	if (!table || !ovs_flow_tbl_get(table)) {
> +		rcu_read_unlock();
> +		return -ENODEV;
> +	}
> +	rcu_read_unlock();
> +
> +	/* It is safe to dereference "table" after leaving rcu read-protected
> +	 * region because it's pinned by refcount.
> +	 */
> +	mutex_lock(&table->lock);
> +
>  
>  	if (ufid_present)
> -		flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
> +		flow = ovs_flow_tbl_lookup_ufid(table, &ufid);
>  	else
> -		flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
> +		flow = ovs_flow_tbl_lookup_exact(table, &match);
>  	if (!flow) {
>  		err = -ENOENT;
>  		goto unlock;
>  	}
>  
> -	reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info,
> -					OVS_FLOW_CMD_GET, true, ufid_flags);
> +	reply = ovs_flow_cmd_build_info(flow, table, ovs_header->dp_ifindex,
> +					info, OVS_FLOW_CMD_GET, true,
> +					ufid_flags);
>  	if (IS_ERR(reply)) {
>  		err = PTR_ERR(reply);
>  		goto unlock;
>  	}
>  
> -	ovs_unlock();
> +	mutex_unlock(&table->lock);
> +	ovs_flow_tbl_put(table);
>  	return genlmsg_reply(reply, info);
>  unlock:
> -	ovs_unlock();
> +	mutex_unlock(&table->lock);
> +	ovs_flow_tbl_put(table);
>  	return err;
>  }
>  
> @@ -1405,6 +1478,7 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
>  	struct nlattr **a = info->attrs;
>  	struct ovs_header *ovs_header = genl_info_userhdr(info);
>  	struct net *net = sock_net(skb->sk);
> +	struct flow_table *table;
>  	struct sw_flow_key key;
>  	struct sk_buff *reply;
>  	struct sw_flow *flow = NULL;
> @@ -1425,36 +1499,49 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
>  			return err;
>  	}
>  
> -	ovs_lock();
> +	rcu_read_lock();
>  	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
>  	if (unlikely(!dp)) {
> -		err = -ENODEV;
> -		goto unlock;
> +		rcu_read_unlock();
> +		return -ENODEV;
>  	}
> +	table = rcu_dereference(dp->table);
> +	if (!table || !ovs_flow_tbl_get(table)) {
> +		rcu_read_unlock();
> +		return -ENODEV;
> +	}
> +	rcu_read_unlock();
> +
> +	/* It is safe to dereference "table" after leaving rcu read-protected
> +	 * region because it's pinned by refcount.
> +	 */
> +	mutex_lock(&table->lock);
> +
>  
>  	if (unlikely(!a[OVS_FLOW_ATTR_KEY] && !ufid_present)) {
> -		err = ovs_flow_tbl_flush(&dp->table);
> +		err = ovs_flow_tbl_flush(table);
>  		goto unlock;
>  	}
>  
>  	if (ufid_present)
> -		flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
> +		flow = ovs_flow_tbl_lookup_ufid(table, &ufid);
>  	else
> -		flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
> +		flow = ovs_flow_tbl_lookup_exact(table, &match);
>  	if (unlikely(!flow)) {
>  		err = -ENOENT;
>  		goto unlock;
>  	}
>  
> -	ovs_flow_tbl_remove(&dp->table, flow);
> -	ovs_unlock();
> +	ovs_flow_tbl_remove(table, flow);
> +	mutex_unlock(&table->lock);
>  
>  	reply = ovs_flow_cmd_alloc_info((const struct sw_flow_actions __force *) flow->sf_acts,
>  					&flow->id, info, false, ufid_flags);
>  	if (likely(reply)) {
>  		if (!IS_ERR(reply)) {
>  			rcu_read_lock();	/*To keep RCU checker happy. */
> -			err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex,
> +			err = ovs_flow_cmd_fill_info(flow, table,
> +						     ovs_header->dp_ifindex,
>  						     reply, info->snd_portid,
>  						     info->snd_seq, 0,
>  						     OVS_FLOW_CMD_DEL,
> @@ -1473,10 +1560,12 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
>  	}
>  
>  out_free:
> +	ovs_flow_tbl_put(table);
>  	ovs_flow_free(flow, true);
>  	return 0;
>  unlock:
> -	ovs_unlock();
> +	mutex_unlock(&table->lock);
> +	ovs_flow_tbl_put(table);
>  	return err;
>  }
>  
> @@ -1485,6 +1574,7 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
>  	struct nlattr *a[__OVS_FLOW_ATTR_MAX];
>  	struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
>  	struct table_instance *ti;
> +	struct flow_table *table;
>  	struct datapath *dp;
>  	u32 ufid_flags;
>  	int err;
> @@ -1501,8 +1591,13 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
>  		rcu_read_unlock();
>  		return -ENODEV;
>  	}
> +	table = rcu_dereference(dp->table);
> +	if (!table) {
> +		rcu_read_unlock();
> +		return -ENODEV;
> +	}
>  
> -	ti = rcu_dereference(dp->table.ti);
> +	ti = rcu_dereference(table->ti);
>  	for (;;) {
>  		struct sw_flow *flow;
>  		u32 bucket, obj;
> @@ -1513,8 +1608,8 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
>  		if (!flow)
>  			break;
>  
> -		if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb,
> -					   NETLINK_CB(cb->skb).portid,
> +		if (ovs_flow_cmd_fill_info(flow, table, ovs_header->dp_ifindex,
> +					   skb, NETLINK_CB(cb->skb).portid,
>  					   cb->nlh->nlmsg_seq, NLM_F_MULTI,
>  					   OVS_FLOW_CMD_GET, ufid_flags) < 0)
>  			break;
> @@ -1598,8 +1693,13 @@ static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
>  	struct ovs_dp_stats dp_stats;
>  	struct ovs_dp_megaflow_stats dp_megaflow_stats;
>  	struct dp_nlsk_pids *pids = ovsl_dereference(dp->upcall_portids);
> +	struct flow_table *table;
>  	int err, pids_len;
>  
> +	table = ovsl_dereference(dp->table);
> +	if (!table)
> +		return -ENODEV;
> +
>  	ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family,
>  				 flags, cmd);
>  	if (!ovs_header)
> @@ -1625,7 +1725,7 @@ static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
>  		goto nla_put_failure;
>  
>  	if (nla_put_u32(skb, OVS_DP_ATTR_MASKS_CACHE_SIZE,
> -			ovs_flow_tbl_masks_cache_size(&dp->table)))
> +			ovs_flow_tbl_masks_cache_size(table)))
>  		goto nla_put_failure;
>  
>  	if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU && pids) {
> @@ -1736,6 +1836,7 @@ u32 ovs_dp_get_upcall_portid(const struct datapath *dp, uint32_t cpu_id)
>  static int ovs_dp_change(struct datapath *dp, struct nlattr *a[])
>  {
>  	u32 user_features = 0, old_features = dp->user_features;
> +	struct flow_table *table;
>  	int err;
>  
>  	if (a[OVS_DP_ATTR_USER_FEATURES]) {
> @@ -1757,8 +1858,12 @@ static int ovs_dp_change(struct datapath *dp, struct nlattr *a[])
>  		int err;
>  		u32 cache_size;
>  
> +		table = ovsl_dereference(dp->table);
> +		if (!table)
> +			return -ENODEV;
> +
>  		cache_size = nla_get_u32(a[OVS_DP_ATTR_MASKS_CACHE_SIZE]);
> -		err = ovs_flow_tbl_masks_cache_resize(&dp->table, cache_size);
> +		err = ovs_flow_tbl_masks_cache_resize(table, cache_size);
>  		if (err)
>  			return err;
>  	}
> @@ -1810,6 +1915,7 @@ static int ovs_dp_vport_init(struct datapath *dp)
>  static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
>  {
>  	struct nlattr **a = info->attrs;
> +	struct flow_table *table;
>  	struct vport_parms parms;
>  	struct sk_buff *reply;
>  	struct datapath *dp;
> @@ -1833,9 +1939,12 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
>  	ovs_dp_set_net(dp, sock_net(skb->sk));
>  
>  	/* Allocate table. */
> -	err = ovs_flow_tbl_init(&dp->table);
> -	if (err)
> +	table = ovs_flow_tbl_alloc();
> +	if (IS_ERR(table)) {
> +		err = PTR_ERR(table);
>  		goto err_destroy_dp;
> +	}
> +	rcu_assign_pointer(dp->table, table);
>  
>  	err = ovs_dp_stats_init(dp);
>  	if (err)
> @@ -1905,7 +2014,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
>  err_destroy_stats:
>  	free_percpu(dp->stats_percpu);
>  err_destroy_table:
> -	ovs_flow_tbl_destroy(&dp->table);
> +	ovs_flow_tbl_put(table);
>  err_destroy_dp:
>  	kfree(dp);
>  err_destroy_reply:
> @@ -1917,7 +2026,8 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
>  /* Called with ovs_mutex. */
>  static void __dp_destroy(struct datapath *dp)
>  {
> -	struct flow_table *table = &dp->table;
> +	struct flow_table *table = rcu_dereference_protected(dp->table,
> +					lockdep_ovsl_is_held());
>  	int i;
>  
>  	if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING)
> @@ -1939,14 +2049,10 @@ static void __dp_destroy(struct datapath *dp)
>  	 */
>  	ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
>  
> -	/* Flush sw_flow in the tables. RCU cb only releases resource
> -	 * such as dp, ports and tables. That may avoid some issues
> -	 * such as RCU usage warning.
> -	 */
> -	table_instance_flow_flush(table, ovsl_dereference(table->ti),
> -				  ovsl_dereference(table->ufid_ti));
> +	rcu_assign_pointer(dp->table, NULL);
> +	ovs_flow_tbl_put(table);
>  
> -	/* RCU destroy the ports, meters and flow tables. */
> +	/* RCU destroy the ports and meters. */
>  	call_rcu(&dp->rcu, destroy_dp_rcu);
>  }
>  
> @@ -2554,13 +2660,18 @@ static void ovs_dp_masks_rebalance(struct work_struct *work)
>  {
>  	struct ovs_net *ovs_net = container_of(work, struct ovs_net,
>  					       masks_rebalance.work);
> +	struct flow_table *table;
>  	struct datapath *dp;
>  
>  	ovs_lock();
> -
> -	list_for_each_entry(dp, &ovs_net->dps, list_node)
> -		ovs_flow_masks_rebalance(&dp->table);
> -
> +	list_for_each_entry(dp, &ovs_net->dps, list_node) {
> +		table = ovsl_dereference(dp->table);
> +		if (!table)
> +			continue;

Should we take a reference for table here?  I guess it's kindof safe
because of the ovs_lock() above, but if that gets removed it's possible
someone misses that there isn't a refcnt pin here (but everywhere else
has a ovs_flow_tbl_get before it).

> +		mutex_lock(&table->lock);
> +		ovs_flow_masks_rebalance(table);
> +		mutex_unlock(&table->lock);
> +	}
>  	ovs_unlock();
>  
>  	schedule_delayed_work(&ovs_net->masks_rebalance,
> diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
> index db0c3e69d66c..44773bf9f645 100644
> --- a/net/openvswitch/datapath.h
> +++ b/net/openvswitch/datapath.h
> @@ -90,7 +90,7 @@ struct datapath {
>  	struct list_head list_node;
>  
>  	/* Flow table. */
> -	struct flow_table table;
> +	struct flow_table __rcu *table;
>  
>  	/* Switch ports. */
>  	struct hlist_head *ports;
> diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
> index 66366982f604..0a748cf20f53 100644
> --- a/net/openvswitch/flow.c
> +++ b/net/openvswitch/flow.c
> @@ -124,8 +124,9 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,
>  	spin_unlock(&stats->lock);
>  }
>  
> -/* Must be called with rcu_read_lock or ovs_mutex. */
> +/* Must be called with rcu_read_lock or table->lock held. */
>  void ovs_flow_stats_get(const struct sw_flow *flow,
> +			const struct flow_table *table,
>  			struct ovs_flow_stats *ovs_stats,
>  			unsigned long *used, __be16 *tcp_flags)
>  {
> @@ -136,7 +137,8 @@ void ovs_flow_stats_get(const struct sw_flow *flow,
>  	memset(ovs_stats, 0, sizeof(*ovs_stats));
>  
>  	for_each_cpu(cpu, flow->cpu_used_mask) {
> -		struct sw_flow_stats *stats = rcu_dereference_ovsl(flow->stats[cpu]);
> +		struct sw_flow_stats *stats =
> +			rcu_dereference_ovs_tbl(flow->stats[cpu], table);
>  
>  		if (stats) {
>  			/* Local CPU may write on non-local stats, so we must
> @@ -153,13 +155,14 @@ void ovs_flow_stats_get(const struct sw_flow *flow,
>  	}
>  }
>  
> -/* Called with ovs_mutex. */
> -void ovs_flow_stats_clear(struct sw_flow *flow)
> +/* Called with table->lock held. */
> +void ovs_flow_stats_clear(struct sw_flow *flow, struct flow_table *table)
>  {
>  	unsigned int cpu;
>  
>  	for_each_cpu(cpu, flow->cpu_used_mask) {
> -		struct sw_flow_stats *stats = ovsl_dereference(flow->stats[cpu]);
> +		struct sw_flow_stats *stats =
> +			ovs_tbl_dereference(flow->stats[cpu], table);
>  
>  		if (stats) {
>  			spin_lock_bh(&stats->lock);
> diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
> index b5711aff6e76..e05ed6796e4e 100644
> --- a/net/openvswitch/flow.h
> +++ b/net/openvswitch/flow.h
> @@ -23,6 +23,7 @@
>  #include <net/dst_metadata.h>
>  #include <net/nsh.h>
>  
> +struct flow_table;
>  struct sk_buff;
>  
>  enum sw_flow_mac_proto {
> @@ -280,9 +281,11 @@ static inline bool ovs_identifier_is_key(const struct sw_flow_id *sfid)
>  
>  void ovs_flow_stats_update(struct sw_flow *, __be16 tcp_flags,
>  			   const struct sk_buff *);
> -void ovs_flow_stats_get(const struct sw_flow *, struct ovs_flow_stats *,
> -			unsigned long *used, __be16 *tcp_flags);
> -void ovs_flow_stats_clear(struct sw_flow *);
> +void ovs_flow_stats_get(const struct sw_flow *flow,
> +			const struct flow_table *table,
> +			struct ovs_flow_stats *stats, unsigned long *used,
> +			__be16 *tcp_flags);
> +void ovs_flow_stats_clear(struct sw_flow *flow, struct flow_table *table);
>  u64 ovs_flow_used_time(unsigned long flow_jiffies);
>  
>  int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key);
> diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
> index 61c6a5f77c2e..d9dbe4b4807c 100644
> --- a/net/openvswitch/flow_table.c
> +++ b/net/openvswitch/flow_table.c
> @@ -45,6 +45,16 @@
>  static struct kmem_cache *flow_cache;
>  struct kmem_cache *flow_stats_cache __read_mostly;
>  
> +#ifdef CONFIG_LOCKDEP
> +int lockdep_ovs_tbl_is_held(const struct flow_table *table)
> +{
> +	if (debug_locks)
> +		return lockdep_is_held(&table->lock);
> +	else
> +		return 1;
> +}
> +#endif
> +
>  static u16 range_n_bytes(const struct sw_flow_key_range *range)
>  {
>  	return range->end - range->start;
> @@ -249,12 +259,12 @@ static int tbl_mask_array_realloc(struct flow_table *tbl, int size)
>  	if (!new)
>  		return -ENOMEM;
>  
> -	old = ovsl_dereference(tbl->mask_array);
> +	old = ovs_tbl_dereference(tbl->mask_array, tbl);
>  	if (old) {
>  		int i;
>  
>  		for (i = 0; i < old->max; i++) {
> -			if (ovsl_dereference(old->masks[i]))
> +			if (ovs_tbl_dereference(old->masks[i], tbl))
>  				new->masks[new->count++] = old->masks[i];
>  		}
>  		call_rcu(&old->rcu, mask_array_rcu_cb);
> @@ -268,7 +278,7 @@ static int tbl_mask_array_realloc(struct flow_table *tbl, int size)
>  static int tbl_mask_array_add_mask(struct flow_table *tbl,
>  				   struct sw_flow_mask *new)
>  {
> -	struct mask_array *ma = ovsl_dereference(tbl->mask_array);
> +	struct mask_array *ma = ovs_tbl_dereference(tbl->mask_array, tbl);
>  	int err, ma_count = READ_ONCE(ma->count);
>  
>  	if (ma_count >= ma->max) {
> @@ -277,7 +287,7 @@ static int tbl_mask_array_add_mask(struct flow_table *tbl,
>  		if (err)
>  			return err;
>  
> -		ma = ovsl_dereference(tbl->mask_array);
> +		ma = ovs_tbl_dereference(tbl->mask_array, tbl);
>  	} else {
>  		/* On every add or delete we need to reset the counters so
>  		 * every new mask gets a fair chance of being prioritized.
> @@ -285,7 +295,7 @@ static int tbl_mask_array_add_mask(struct flow_table *tbl,
>  		tbl_mask_array_reset_counters(ma);
>  	}
>  
> -	BUG_ON(ovsl_dereference(ma->masks[ma_count]));
> +	WARN_ON_ONCE(ovs_tbl_dereference(ma->masks[ma_count], tbl));
>  
>  	rcu_assign_pointer(ma->masks[ma_count], new);
>  	WRITE_ONCE(ma->count, ma_count + 1);
> @@ -296,12 +306,12 @@ static int tbl_mask_array_add_mask(struct flow_table *tbl,
>  static void tbl_mask_array_del_mask(struct flow_table *tbl,
>  				    struct sw_flow_mask *mask)
>  {
> -	struct mask_array *ma = ovsl_dereference(tbl->mask_array);
> +	struct mask_array *ma = ovs_tbl_dereference(tbl->mask_array, tbl);
>  	int i, ma_count = READ_ONCE(ma->count);
>  
>  	/* Remove the deleted mask pointers from the array */
>  	for (i = 0; i < ma_count; i++) {
> -		if (mask == ovsl_dereference(ma->masks[i]))
> +		if (mask == ovs_tbl_dereference(ma->masks[i], tbl))
>  			goto found;
>  	}
>  
> @@ -329,10 +339,10 @@ static void tbl_mask_array_del_mask(struct flow_table *tbl,
>  static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask)
>  {
>  	if (mask) {
> -		/* ovs-lock is required to protect mask-refcount and
> +		/* table lock is required to protect mask-refcount and
>  		 * mask list.
>  		 */
> -		ASSERT_OVSL();
> +		ASSERT_OVS_TBL(tbl);
>  		BUG_ON(!mask->ref_count);
>  		mask->ref_count--;
>  
> @@ -386,7 +396,8 @@ static struct mask_cache *tbl_mask_cache_alloc(u32 size)
>  }
>  int ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32 size)
>  {
> -	struct mask_cache *mc = rcu_dereference_ovsl(table->mask_cache);
> +	struct mask_cache *mc = rcu_dereference_ovs_tbl(table->mask_cache,
> +							table);
>  	struct mask_cache *new;
>  
>  	if (size == mc->cache_size)
> @@ -406,15 +417,23 @@ int ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32 size)
>  	return 0;
>  }
>  
> -int ovs_flow_tbl_init(struct flow_table *table)
> +struct flow_table *ovs_flow_tbl_alloc(void)
>  {
>  	struct table_instance *ti, *ufid_ti;
> +	struct flow_table *table;
>  	struct mask_cache *mc;
>  	struct mask_array *ma;
>  
> +	table = kzalloc_obj(*table, GFP_KERNEL);
> +	if (!table)
> +		return ERR_PTR(-ENOMEM);
> +
> +	mutex_init(&table->lock);
> +	refcount_set(&table->refcnt, 1);
> +
>  	mc = tbl_mask_cache_alloc(MC_DEFAULT_HASH_ENTRIES);
>  	if (!mc)
> -		return -ENOMEM;
> +		goto free_table;
>  
>  	ma = tbl_mask_array_alloc(MASK_ARRAY_SIZE_MIN);
>  	if (!ma)
> @@ -435,7 +454,7 @@ int ovs_flow_tbl_init(struct flow_table *table)
>  	table->last_rehash = jiffies;
>  	table->count = 0;
>  	table->ufid_count = 0;
> -	return 0;
> +	return table;
>  
>  free_ti:
>  	__table_instance_destroy(ti);
> @@ -443,7 +462,10 @@ int ovs_flow_tbl_init(struct flow_table *table)
>  	__mask_array_destroy(ma);
>  free_mask_cache:
>  	__mask_cache_destroy(mc);
> -	return -ENOMEM;
> +free_table:
> +	mutex_destroy(&table->lock);
> +	kfree(table);
> +	return ERR_PTR(-ENOMEM);
>  }
>  
>  static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu)
> @@ -470,7 +492,7 @@ static void table_instance_flow_free(struct flow_table *table,
>  	flow_mask_remove(table, flow->mask);
>  }
>  
> -/* Must be called with OVS mutex held. */
> +/* Must be called with table mutex held. */
>  void table_instance_flow_flush(struct flow_table *table,
>  			       struct table_instance *ti,
>  			       struct table_instance *ufid_ti)
> @@ -505,11 +527,11 @@ static void table_instance_destroy(struct table_instance *ti,
>  	call_rcu(&ufid_ti->rcu, flow_tbl_destroy_rcu_cb);
>  }
>  
> -/* No need for locking this function is called from RCU callback or
> - * error path.
> - */
> -void ovs_flow_tbl_destroy(struct flow_table *table)
> +/* No need for locking this function is called from RCU callback. */
> +static void ovs_flow_tbl_destroy_rcu(struct rcu_head *rcu)
>  {
> +	struct flow_table *table = container_of(rcu, struct flow_table, rcu);
> +
>  	struct table_instance *ti = rcu_dereference_raw(table->ti);
>  	struct table_instance *ufid_ti = rcu_dereference_raw(table->ufid_ti);
>  	struct mask_cache *mc = rcu_dereference_raw(table->mask_cache);
> @@ -518,6 +540,20 @@ void ovs_flow_tbl_destroy(struct flow_table *table)
>  	call_rcu(&mc->rcu, mask_cache_rcu_cb);
>  	call_rcu(&ma->rcu, mask_array_rcu_cb);
>  	table_instance_destroy(ti, ufid_ti);
> +	mutex_destroy(&table->lock);
> +	kfree(table);
> +}
> +
> +void ovs_flow_tbl_put(struct flow_table *table)
> +{
> +	if (refcount_dec_and_test(&table->refcnt)) {
> +		mutex_lock(&table->lock);
> +		table_instance_flow_flush(table,
> +					  ovs_tbl_dereference(table->ti, table),
> +					  ovs_tbl_dereference(table->ufid_ti, table));
> +		mutex_unlock(&table->lock);
> +		call_rcu(&table->rcu, ovs_flow_tbl_destroy_rcu);
> +	}
>  }
>  
>  struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti,
> @@ -571,7 +607,8 @@ static void ufid_table_instance_insert(struct table_instance *ti,
>  	hlist_add_head_rcu(&flow->ufid_table.node[ti->node_ver], head);
>  }
>  
> -static void flow_table_copy_flows(struct table_instance *old,
> +static void flow_table_copy_flows(struct flow_table *table,
> +				  struct table_instance *old,
>  				  struct table_instance *new, bool ufid)
>  {
>  	int old_ver;
> @@ -588,17 +625,18 @@ static void flow_table_copy_flows(struct table_instance *old,
>  		if (ufid)
>  			hlist_for_each_entry_rcu(flow, head,
>  						 ufid_table.node[old_ver],
> -						 lockdep_ovsl_is_held())
> +						 lockdep_ovs_tbl_is_held(table))
>  				ufid_table_instance_insert(new, flow);
>  		else
>  			hlist_for_each_entry_rcu(flow, head,
>  						 flow_table.node[old_ver],
> -						 lockdep_ovsl_is_held())
> +						 lockdep_ovs_tbl_is_held(table))
>  				table_instance_insert(new, flow);
>  	}
>  }
>  
> -static struct table_instance *table_instance_rehash(struct table_instance *ti,
> +static struct table_instance *table_instance_rehash(struct flow_table *table,
> +						    struct table_instance *ti,
>  						    int n_buckets, bool ufid)
>  {
>  	struct table_instance *new_ti;
> @@ -607,16 +645,19 @@ static struct table_instance *table_instance_rehash(struct table_instance *ti,
>  	if (!new_ti)
>  		return NULL;
>  
> -	flow_table_copy_flows(ti, new_ti, ufid);
> +	flow_table_copy_flows(table, ti, new_ti, ufid);
>  
>  	return new_ti;
>  }
>  
> +/* Must be called with flow_table->lock held. */
>  int ovs_flow_tbl_flush(struct flow_table *flow_table)
>  {
>  	struct table_instance *old_ti, *new_ti;
>  	struct table_instance *old_ufid_ti, *new_ufid_ti;
>  
> +	ASSERT_OVS_TBL(flow_table);
> +
>  	new_ti = table_instance_alloc(TBL_MIN_BUCKETS);
>  	if (!new_ti)
>  		return -ENOMEM;
> @@ -624,8 +665,8 @@ int ovs_flow_tbl_flush(struct flow_table *flow_table)
>  	if (!new_ufid_ti)
>  		goto err_free_ti;
>  
> -	old_ti = ovsl_dereference(flow_table->ti);
> -	old_ufid_ti = ovsl_dereference(flow_table->ufid_ti);
> +	old_ti = ovs_tbl_dereference(flow_table->ti, flow_table);
> +	old_ufid_ti = ovs_tbl_dereference(flow_table->ufid_ti, flow_table);
>  
>  	rcu_assign_pointer(flow_table->ti, new_ti);
>  	rcu_assign_pointer(flow_table->ufid_ti, new_ufid_ti);
> @@ -693,7 +734,8 @@ static bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
>  	return cmp_key(flow->id.unmasked_key, key, key_start, key_end);
>  }
>  
> -static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
> +static struct sw_flow *masked_flow_lookup(struct flow_table *tbl,
> +					  struct table_instance *ti,
>  					  const struct sw_flow_key *unmasked,
>  					  const struct sw_flow_mask *mask,
>  					  u32 *n_mask_hit)
> @@ -709,7 +751,7 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
>  	(*n_mask_hit)++;
>  
>  	hlist_for_each_entry_rcu(flow, head, flow_table.node[ti->node_ver],
> -				 lockdep_ovsl_is_held()) {
> +				 lockdep_ovs_tbl_is_held(tbl)) {
>  		if (flow->mask == mask && flow->flow_table.hash == hash &&
>  		    flow_cmp_masked_key(flow, &masked_key, &mask->range))
>  			return flow;
> @@ -736,9 +778,9 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl,
>  	int i;
>  
>  	if (likely(*index < ma->max)) {
> -		mask = rcu_dereference_ovsl(ma->masks[*index]);
> +		mask = rcu_dereference_ovs_tbl(ma->masks[*index], tbl);
>  		if (mask) {
> -			flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
> +			flow = masked_flow_lookup(tbl, ti, key, mask, n_mask_hit);
>  			if (flow) {
>  				u64_stats_update_begin(&stats->syncp);
>  				stats->usage_cntrs[*index]++;
> @@ -754,11 +796,11 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl,
>  		if (i == *index)
>  			continue;
>  
> -		mask = rcu_dereference_ovsl(ma->masks[i]);
> +		mask = rcu_dereference_ovs_tbl(ma->masks[i], tbl);
>  		if (unlikely(!mask))
>  			break;
>  
> -		flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
> +		flow = masked_flow_lookup(tbl, ti, key, mask, n_mask_hit);
>  		if (flow) { /* Found */
>  			*index = i;
>  			u64_stats_update_begin(&stats->syncp);
> @@ -845,8 +887,8 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
>  struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl,
>  				    const struct sw_flow_key *key)
>  {
> -	struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
> -	struct mask_array *ma = rcu_dereference_ovsl(tbl->mask_array);
> +	struct table_instance *ti = rcu_dereference_ovs_tbl(tbl->ti, tbl);
> +	struct mask_array *ma = rcu_dereference_ovs_tbl(tbl->mask_array, tbl);
>  	u32 __always_unused n_mask_hit;
>  	u32 __always_unused n_cache_hit;
>  	struct sw_flow *flow;
> @@ -865,21 +907,22 @@ struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl,
>  struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
>  					  const struct sw_flow_match *match)
>  {
> -	struct mask_array *ma = ovsl_dereference(tbl->mask_array);
> +	struct mask_array *ma = ovs_tbl_dereference(tbl->mask_array, tbl);
>  	int i;
>  
> -	/* Always called under ovs-mutex. */
> +	/* Always called under tbl->lock. */
>  	for (i = 0; i < ma->max; i++) {
> -		struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
> +		struct table_instance *ti =
> +				rcu_dereference_ovs_tbl(tbl->ti, tbl);
>  		u32 __always_unused n_mask_hit;
>  		struct sw_flow_mask *mask;
>  		struct sw_flow *flow;
>  
> -		mask = ovsl_dereference(ma->masks[i]);
> +		mask = ovs_tbl_dereference(ma->masks[i], tbl);
>  		if (!mask)
>  			continue;
>  
> -		flow = masked_flow_lookup(ti, match->key, mask, &n_mask_hit);
> +		flow = masked_flow_lookup(tbl, ti, match->key, mask, &n_mask_hit);
>  		if (flow && ovs_identifier_is_key(&flow->id) &&
>  		    ovs_flow_cmp_unmasked_key(flow, match)) {
>  			return flow;
> @@ -915,7 +958,7 @@ bool ovs_flow_cmp(const struct sw_flow *flow,
>  struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *tbl,
>  					 const struct sw_flow_id *ufid)
>  {
> -	struct table_instance *ti = rcu_dereference_ovsl(tbl->ufid_ti);
> +	struct table_instance *ti = rcu_dereference_ovs_tbl(tbl->ufid_ti, tbl);
>  	struct sw_flow *flow;
>  	struct hlist_head *head;
>  	u32 hash;
> @@ -923,7 +966,7 @@ struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *tbl,
>  	hash = ufid_hash(ufid);
>  	head = find_bucket(ti, hash);
>  	hlist_for_each_entry_rcu(flow, head, ufid_table.node[ti->node_ver],
> -				 lockdep_ovsl_is_held()) {
> +				 lockdep_ovs_tbl_is_held(tbl)) {
>  		if (flow->ufid_table.hash == hash &&
>  		    ovs_flow_cmp_ufid(flow, ufid))
>  			return flow;
> @@ -933,28 +976,33 @@ struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *tbl,
>  
>  int ovs_flow_tbl_num_masks(const struct flow_table *table)
>  {
> -	struct mask_array *ma = rcu_dereference_ovsl(table->mask_array);
> +	struct mask_array *ma = rcu_dereference_ovs_tbl(table->mask_array,
> +							table);
>  	return READ_ONCE(ma->count);
>  }
>  
>  u32 ovs_flow_tbl_masks_cache_size(const struct flow_table *table)
>  {
> -	struct mask_cache *mc = rcu_dereference_ovsl(table->mask_cache);
> +	struct mask_cache *mc = rcu_dereference_ovs_tbl(table->mask_cache,
> +							table);
>  
>  	return READ_ONCE(mc->cache_size);
>  }
>  
> -static struct table_instance *table_instance_expand(struct table_instance *ti,
> +static struct table_instance *table_instance_expand(struct flow_table *table,
> +						    struct table_instance *ti,
>  						    bool ufid)
>  {
> -	return table_instance_rehash(ti, ti->n_buckets * 2, ufid);
> +	return table_instance_rehash(table, ti, ti->n_buckets * 2, ufid);
>  }
>  
> -/* Must be called with OVS mutex held. */
> +/* Must be called with table mutex held. */
>  void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
>  {
> -	struct table_instance *ti = ovsl_dereference(table->ti);
> -	struct table_instance *ufid_ti = ovsl_dereference(table->ufid_ti);
> +	struct table_instance *ti = ovs_tbl_dereference(table->ti,
> +							table);
> +	struct table_instance *ufid_ti = ovs_tbl_dereference(table->ufid_ti,
> +							     table);
>  
>  	BUG_ON(table->count == 0);
>  	table_instance_flow_free(table, ti, ufid_ti, flow);
> @@ -988,10 +1036,10 @@ static struct sw_flow_mask *flow_mask_find(const struct flow_table *tbl,
>  	struct mask_array *ma;
>  	int i;
>  
> -	ma = ovsl_dereference(tbl->mask_array);
> +	ma = ovs_tbl_dereference(tbl->mask_array, tbl);
>  	for (i = 0; i < ma->max; i++) {
>  		struct sw_flow_mask *t;
> -		t = ovsl_dereference(ma->masks[i]);
> +		t = ovs_tbl_dereference(ma->masks[i], tbl);
>  
>  		if (t && mask_equal(mask, t))
>  			return t;
> @@ -1029,22 +1077,25 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow,
>  	return 0;
>  }
>  
> -/* Must be called with OVS mutex held. */
> +/* Must be called with table mutex held. */
>  static void flow_key_insert(struct flow_table *table, struct sw_flow *flow)
>  {
>  	struct table_instance *new_ti = NULL;
>  	struct table_instance *ti;
>  
> +	ASSERT_OVS_TBL(table);
> +
>  	flow->flow_table.hash = flow_hash(&flow->key, &flow->mask->range);
> -	ti = ovsl_dereference(table->ti);
> +	ti = ovs_tbl_dereference(table->ti, table);
>  	table_instance_insert(ti, flow);
>  	table->count++;
>  
>  	/* Expand table, if necessary, to make room. */
>  	if (table->count > ti->n_buckets)
> -		new_ti = table_instance_expand(ti, false);
> +		new_ti = table_instance_expand(table, ti, false);
>  	else if (time_after(jiffies, table->last_rehash + REHASH_INTERVAL))
> -		new_ti = table_instance_rehash(ti, ti->n_buckets, false);
> +		new_ti = table_instance_rehash(table, ti, ti->n_buckets,
> +					       false);
>  
>  	if (new_ti) {
>  		rcu_assign_pointer(table->ti, new_ti);
> @@ -1053,13 +1104,15 @@ static void flow_key_insert(struct flow_table *table, struct sw_flow *flow)
>  	}
>  }
>  
> -/* Must be called with OVS mutex held. */
> +/* Must be called with table mutex held. */
>  static void flow_ufid_insert(struct flow_table *table, struct sw_flow *flow)
>  {
>  	struct table_instance *ti;
>  
> +	ASSERT_OVS_TBL(table);
> +
>  	flow->ufid_table.hash = ufid_hash(&flow->id);
> -	ti = ovsl_dereference(table->ufid_ti);
> +	ti = ovs_tbl_dereference(table->ufid_ti, table);
>  	ufid_table_instance_insert(ti, flow);
>  	table->ufid_count++;
>  
> @@ -1067,7 +1120,7 @@ static void flow_ufid_insert(struct flow_table *table, struct sw_flow *flow)
>  	if (table->ufid_count > ti->n_buckets) {
>  		struct table_instance *new_ti;
>  
> -		new_ti = table_instance_expand(ti, true);
> +		new_ti = table_instance_expand(table, ti, true);
>  		if (new_ti) {
>  			rcu_assign_pointer(table->ufid_ti, new_ti);
>  			call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb);
> @@ -1075,12 +1128,14 @@ static void flow_ufid_insert(struct flow_table *table, struct sw_flow *flow)
>  	}
>  }
>  
> -/* Must be called with OVS mutex held. */
> +/* Must be called with table mutex held. */
>  int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
>  			const struct sw_flow_mask *mask)
>  {
>  	int err;
>  
> +	ASSERT_OVS_TBL(table);
> +
>  	err = flow_mask_insert(table, flow, mask);
>  	if (err)
>  		return err;
> @@ -1099,10 +1154,11 @@ static int compare_mask_and_count(const void *a, const void *b)
>  	return (s64)mc_b->counter - (s64)mc_a->counter;
>  }
>  
> -/* Must be called with OVS mutex held. */
> +/* Must be called with table->lock held. */
>  void ovs_flow_masks_rebalance(struct flow_table *table)
>  {
> -	struct mask_array *ma = rcu_dereference_ovsl(table->mask_array);
> +	struct mask_array *ma = rcu_dereference_ovs_tbl(table->mask_array,
> +							table);
>  	struct mask_count *masks_and_count;
>  	struct mask_array *new;
>  	int masks_entries = 0;
> @@ -1117,7 +1173,7 @@ void ovs_flow_masks_rebalance(struct flow_table *table)
>  		struct sw_flow_mask *mask;
>  		int cpu;
>  
> -		mask = rcu_dereference_ovsl(ma->masks[i]);
> +		mask = rcu_dereference_ovs_tbl(ma->masks[i], table);
>  		if (unlikely(!mask))
>  			break;
>  
> @@ -1171,7 +1227,7 @@ void ovs_flow_masks_rebalance(struct flow_table *table)
>  	for (i = 0; i < masks_entries; i++) {
>  		int index = masks_and_count[i].index;
>  
> -		if (ovsl_dereference(ma->masks[index]))
> +		if (ovs_tbl_dereference(ma->masks[index], table))
>  			new->masks[new->count++] = ma->masks[index];
>  	}
>  
> diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h
> index f524dc3e4862..cffd412c9045 100644
> --- a/net/openvswitch/flow_table.h
> +++ b/net/openvswitch/flow_table.h
> @@ -59,7 +59,29 @@ struct table_instance {
>  	u32 hash_seed;
>  };
>  
> +/* Locking:
> + *
> + * flow_table is _not_ protected by ovs_lock (see comment above ovs_mutex
> + * in datapath.c).
> + *
> + * All writes to flow_table are protected by the embedded "lock".
> + * In order to ensure datapath destruction does not trigger the destruction
> + * of the flow_table, "refcnt" is used. Therefore, writers must:
> + * 1 - Enter rcu read-protected section
> + * 2 - Increase "table->refcnt"
> + * 3 - Leave rcu read-protected section (to avoid using mutexes inside rcu)
> + * 4 - Lock "table->lock"
> + * 5 - Perform modifications
> + * 6 - Release "table->lock"
> + * 7 - Decrease "table->refcnt"
> + *
> + * Reads are protected by RCU.
> + */
>  struct flow_table {
> +	/* Locks flow table writes. */
> +	struct mutex lock;
> +	refcount_t refcnt;
> +	struct rcu_head rcu;
>  	struct table_instance __rcu *ti;
>  	struct table_instance __rcu *ufid_ti;
>  	struct mask_cache __rcu *mask_cache;
> @@ -71,15 +93,40 @@ struct flow_table {
>  
>  extern struct kmem_cache *flow_stats_cache;
>  
> +#ifdef CONFIG_LOCKDEP
> +int lockdep_ovs_tbl_is_held(const struct flow_table *table);
> +#else
> +static inline int lockdep_ovs_tbl_is_held(const struct flow_table *table)
> +{
> +	(void)table;
> +	return 1;
> +}
> +#endif
> +
> +#define ASSERT_OVS_TBL(tbl)   WARN_ON(!lockdep_ovs_tbl_is_held(tbl))
> +
> +/* Lock-protected update-allowed dereferences.*/
> +#define ovs_tbl_dereference(p, tbl)	\
> +	rcu_dereference_protected(p, lockdep_ovs_tbl_is_held(tbl))
> +
> +/* Read dereferences can be protected by either RCU, table lock or ovs_mutex. */
> +#define rcu_dereference_ovs_tbl(p, tbl) \
> +	rcu_dereference_check(p,		\
> +		lockdep_ovs_tbl_is_held(tbl) || lockdep_ovsl_is_held())
> +
>  int ovs_flow_init(void);
>  void ovs_flow_exit(void);
>  
>  struct sw_flow *ovs_flow_alloc(void);
>  void ovs_flow_free(struct sw_flow *, bool deferred);
>  
> -int ovs_flow_tbl_init(struct flow_table *);
> +struct flow_table *ovs_flow_tbl_alloc(void);
> +void ovs_flow_tbl_put(struct flow_table *table);
> +static inline bool ovs_flow_tbl_get(struct flow_table *table)
> +{
> +	return refcount_inc_not_zero(&table->refcnt);
> +}
>  int ovs_flow_tbl_count(const struct flow_table *table);
> -void ovs_flow_tbl_destroy(struct flow_table *table);
>  int ovs_flow_tbl_flush(struct flow_table *flow_table);
>  
>  int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,


^ permalink raw reply

* Re: [PATCH iwl-net 6/10] ice: check PHY autoneg capability before rejecting ethtool autoneg setting
From: Tony Nguyen @ 2026-04-10 18:58 UTC (permalink / raw)
  To: Aleksandr Loktionov, intel-wired-lan; +Cc: netdev, Jan Glaza
In-Reply-To: <20260403054029.3789616-7-aleksandr.loktionov@intel.com>



On 4/2/2026 10:40 PM, Aleksandr Loktionov wrote:
> ice_set_link_ksettings() rejects autoneg requests by comparing
> user settings against safe_ks which is populated by
> ice_phy_type_to_ethtool(). The Autoneg bit in safe_ks is set
> only if the current PHY configuration reports it supported,
> but this misses PHYs that support autoneg and have it available
> through PHY capabilities. Pull the autoneg flag from the actual
> PHY capabilities (already fetched earlier in the function) to
> ensure the user can toggle autoneg on any capable PHY.
> 
> Fixes: 5cd349c349d6 ("ice: report supported and advertised autoneg using PHY capabilities")
> Cc: stable@vger.kernel.org
> Signed-off-by: Jan Glaza <jan.glaza@intel.com>
> Signed-off-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
> ---
>   drivers/net/ethernet/intel/ice/ice_ethtool.c | 8 ++++++++
>   1 file changed, 8 insertions(+)
> 
> diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
> index 49b9376..44483bc 100644
> --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
> +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
> @@ -2654,6 +2654,14 @@ ice_set_link_ksettings(struct net_device *netdev,
>   	/* Get link modes supported by hardware.*/
>   	ice_phy_type_to_ethtool(netdev, &safe_ks);
>   
> +	/* Pull the value of autoneg from phy caps to ensure we allow
> +	 * toggling it on all PHYs that support it.
> +	 */
> +	if (ice_is_phy_caps_an_enabled(phy_caps)) {
> +		ethtool_link_ksettings_add_link_mode(&safe_ks, supported, Autoneg);
> +		set_bit(ETHTOOL_LINK_MODE_FEC_NONE_BIT, safe_ks.link_modes.supported);

 From Sashiko:

This isn't a bug, but should this use 
ethtool_link_ksettings_add_link_mode() instead of calling set_bit() 
directly? Using set_bit() on the link modes breaks the ethtool interface 
abstraction.

Also, does this incorrectly couple the ETHTOOL_LINK_MODE_FEC_NONE_BIT 
support with Autonegotiation support? Forward Error Correction support 
is independent of Autonegotiation.

For PHYs lacking Autonegotiation, the FEC none bit will not be added to 
safe_ks.link_modes.supported. When a user requests settings via ethtool, 
copy_ks.link_modes.advertising will likely contain the FEC none bit 
since it is unconditionally returned by ice_get_link_ksettings().

> +	}
> +
>   	/* and check against modes requested by user.
>   	 * Return an error if unsupported mode was set.
>   	 */


^ permalink raw reply

* Re: [PATCH iwl-net 10/10] ice: allow setting min_tx_rate to 0 to resolve VF bandwidth oversubscription
From: Tony Nguyen @ 2026-04-10 18:58 UTC (permalink / raw)
  To: Aleksandr Loktionov, intel-wired-lan; +Cc: netdev
In-Reply-To: <20260403054029.3789616-11-aleksandr.loktionov@intel.com>



On 4/2/2026 10:40 PM, Aleksandr Loktionov wrote:
> ice_set_vf_bw() refuses to accept any min_tx_rate value when the
> total guaranteed bandwidth is already oversubscribed, even when the
> requested value is 0. This makes it impossible to recover from an
> oversubscribed state via "ip link set <pf> vf <id> min_tx_rate 0".
> 
> Allow a zero min_tx_rate to bypass the oversubscription check so
> users can always clear the guaranteed rate. Additionally print an
> informational message when the oversubscription guard fires to help
> diagnose why a non-zero request was rejected.
> 
> Fixes: 4ecc8633056b ("ice: Add support for VF rate limiting")
> Cc: stable@vger.kernel.org
> Signed-off-by: Sudheer Mogilappagari <sudheer.mogilappagari@intel.com>
> Signed-off-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
> ---
>   drivers/net/ethernet/intel/ice/ice_sriov.c | 8 +++++++-
>   1 file changed, 7 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c
> index 7e00e09..6e3bec7 100644
> --- a/drivers/net/ethernet/intel/ice/ice_sriov.c
> +++ b/drivers/net/ethernet/intel/ice/ice_sriov.c
> @@ -1507,6 +1507,12 @@ ice_min_tx_rate_oversubscribed(struct ice_vf *vf, int min_tx_rate)
>   	all_vfs_min_tx_rate -= vf->min_tx_rate;
>   
>   	if (all_vfs_min_tx_rate + min_tx_rate > link_speed_mbps) {
> +		if (ice_calc_all_vfs_min_tx_rate(vf->pf) > link_speed_mbps) {

ice_calc_all_vfs_min_tx_rate() is already called above (out of this 
patch context), can we save that to an interim var and save this second 
call?

> +			dev_info(ice_pf_to_dev(vf->pf),
> +				 "The sum of min_tx_rate for all VFs is greater than the link speed\n");
> +			dev_info(ice_pf_to_dev(vf->pf),
> +				 "Set min_tx_rate to 0 on VFs to resolve oversubscription\n");

Why not 1 string/call?

Thanks,
Tony

> +		}
>   		dev_err(ice_pf_to_dev(vf->pf), "min_tx_rate of %d Mbps on VF %u would cause oversubscription of %d Mbps based on the current link speed %d Mbps\n",
>   			min_tx_rate, vf->vf_id,
>   			all_vfs_min_tx_rate + min_tx_rate - link_speed_mbps,
> @@ -1556,7 +1562,7 @@ ice_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate,
>   		goto out_put_vf;
>   	}
>   
> -	if (ice_min_tx_rate_oversubscribed(vf, min_tx_rate)) {
> +	if (min_tx_rate && ice_min_tx_rate_oversubscribed(vf, min_tx_rate)) {
>   		ret = -EINVAL;
>   		goto out_put_vf;
>   	}


^ permalink raw reply

* [PATCH net 0/2] sctp: fix a vtag verification failure caused by stale INITs
From: Xin Long @ 2026-04-10 18:59 UTC (permalink / raw)
  To: network dev, linux-sctp
  Cc: davem, kuba, Eric Dumazet, Paolo Abeni, Simon Horman,
	Marcelo Ricardo Leitner, Florian Westphal, Yi Chen

Similar to Scenario B in commit 8e56b063c865 ( netfilter: handle the
connecting collision properly in nf_conntrack_proto_sctp"):

Scenario B: INIT_ACK is delayed until the peer completes its own handshake

  192.168.1.2 > 192.168.1.1: sctp (1) [INIT] [init tag: 3922216408]
    192.168.1.1 > 192.168.1.2: sctp (1) [INIT] [init tag: 144230885]
    192.168.1.2 > 192.168.1.1: sctp (1) [INIT ACK] [init tag: 3922216408]
    192.168.1.1 > 192.168.1.2: sctp (1) [COOKIE ECHO]
    192.168.1.2 > 192.168.1.1: sctp (1) [COOKIE ACK]
  192.168.1.1 > 192.168.1.2: sctp (1) [INIT ACK] [init tag: 3914796021] *

There is another case:

Scenario F: INIT is delayed until the peer completes its own handshake

  192.168.1.2 > 192.168.1.1: sctp (1) [INIT] [init tag: 3922216408]
  (OVS upcall)
    192.168.1.1 > 192.168.1.2: sctp (1) [INIT] [init tag: 144230885]
    192.168.1.2 > 192.168.1.1: sctp (1) [INIT ACK] [init tag: 3922216408]
    192.168.1.1 > 192.168.1.2: sctp (1) [COOKIE ECHO]
    192.168.1.2 > 192.168.1.1: sctp (1) [COOKIE ACK]
  192.168.1.2 > 192.168.1.1: sctp (1) [INIT] [init tag: 3922216408]
  (delayed)
  192.168.1.1 > 192.168.1.2: sctp (1) [INIT ACK] [init tag: 3914796021] *

In this case, the delayed INIT (e.g. due to OVS upcall) is recorded by
conntrack, which prevents vtag verification from dropping the unexpected
INIT-ACK in nf_conntrack_sctp_packet():

  vtag = ct->proto.sctp.vtag[!dir];
  if (!ct->proto.sctp.init[!dir] && vtag && vtag != ih->init_tag)
          goto out_unlock;

This happens because ct->proto.sctp.init[!dir] is set by the delayed INIT,
even though it is stale.

Fix this in two parts:

- In netfilter: Do not record INITs whose init_tag matches the peer vtag,
  as they carry no new handshake state in the 1st patch.

- In SCTP: Prevent endpoints from responding to such INITs with INIT-ACK,
  ensuring correctness even when middleboxes lack the netfilter fix in
  the 2nd patch.

A follow-up selftest for this scenario will be posted in a separate patch
by Yi Chen.

Xin Long (2):
  netfilter: skip recording stale or retransmitted INIT
  sctp: discard stale INIT after handshake completion

 net/netfilter/nf_conntrack_proto_sctp.c | 10 +++++++---
 net/sctp/sm_statefuns.c                 |  6 ++++++
 2 files changed, 13 insertions(+), 3 deletions(-)

-- 
2.47.1


^ permalink raw reply

* [PATCH net 1/2] netfilter: skip recording stale or retransmitted INIT
From: Xin Long @ 2026-04-10 18:59 UTC (permalink / raw)
  To: network dev, linux-sctp
  Cc: davem, kuba, Eric Dumazet, Paolo Abeni, Simon Horman,
	Marcelo Ricardo Leitner, Florian Westphal, Yi Chen
In-Reply-To: <cover.1775847557.git.lucien.xin@gmail.com>

An INIT whose init_tag matches the peer's vtag does not provide new state
information. It indicates either:

- a stale INIT (after INIT-ACK has already been seen on the same side), or
- a retransmitted INIT (after INIT has already been recorded on the same
  side).

In both cases, the INIT must not update ct->proto.sctp.init[] state, since
it does not advance the handshake tracking and may otherwise corrupt
INIT/INIT-ACK validation logic.

Allow INIT processing only when the conntrack entry is newly created
(SCTP_CONNTRACK_NONE), or when the init_tag differs from the stored peer
vtag.

Note it skips the check for the ct with old_state SCTP_CONNTRACK_NONE in
nf_conntrack_sctp_packet(), as it is just created in sctp_new() where it
set ct->proto.sctp.vtag[IP_CT_DIR_REPLY] = ih->init_tag.

Fixes: 9fb9cbb1082d ("[NETFILTER]: Add nf_conntrack subsystem.")
Signed-off-by: Xin Long <lucien.xin@gmail.com>
---
 net/netfilter/nf_conntrack_proto_sctp.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 645d2c43ebf7..7e10fa65cbdd 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -466,9 +466,13 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct,
 			if (!ih)
 				goto out_unlock;
 
-			if (ct->proto.sctp.init[dir] && ct->proto.sctp.init[!dir])
-				ct->proto.sctp.init[!dir] = 0;
-			ct->proto.sctp.init[dir] = 1;
+			/* Do not record INIT matching peer vtag (stale or retransmitted INIT). */
+			if (old_state == SCTP_CONNTRACK_NONE ||
+			    ct->proto.sctp.vtag[!dir] != ih->init_tag) {
+				if (ct->proto.sctp.init[dir] && ct->proto.sctp.init[!dir])
+					ct->proto.sctp.init[!dir] = 0;
+				ct->proto.sctp.init[dir] = 1;
+			}
 
 			pr_debug("Setting vtag %x for dir %d\n", ih->init_tag, !dir);
 			ct->proto.sctp.vtag[!dir] = ih->init_tag;
-- 
2.47.1


^ permalink raw reply related

* [PATCH net 2/2] sctp: discard stale INIT after handshake completion
From: Xin Long @ 2026-04-10 18:59 UTC (permalink / raw)
  To: network dev, linux-sctp
  Cc: davem, kuba, Eric Dumazet, Paolo Abeni, Simon Horman,
	Marcelo Ricardo Leitner, Florian Westphal, Yi Chen
In-Reply-To: <cover.1775847557.git.lucien.xin@gmail.com>

After an association reaches ESTABLISHED, the peer’s init_tag is already
known from the handshake. Any subsequent INIT with the same init_tag is
not a valid restart, but a delayed or duplicate INIT.

Drop such INIT chunks in sctp_sf_do_unexpected_init() instead of
processing them as new association attempts.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Xin Long <lucien.xin@gmail.com>
---
 net/sctp/sm_statefuns.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 7b823d759141..3bec026ecbc0 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -1556,6 +1556,12 @@ static enum sctp_disposition sctp_sf_do_unexpected_init(
 	/* Tag the variable length parameters.  */
 	chunk->param_hdr.v = skb_pull(chunk->skb, sizeof(struct sctp_inithdr));
 
+	if (asoc->state >= SCTP_STATE_ESTABLISHED) {
+		/* Discard INIT matching peer vtag after handshake completion (stale INIT). */
+		if (chunk->subh.init_hdr->init_tag == asoc->peer.i.init_tag)
+			return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+	}
+
 	/* Verify the INIT chunk before processing it. */
 	err_chunk = NULL;
 	if (!sctp_verify_init(net, ep, asoc, chunk->chunk_hdr->type,
-- 
2.47.1


^ permalink raw reply related

* Re: [PATCH net-next 1/3] psp: add crypt-offset and spi-threshold get/set attributes
From: Akhilesh Samineni @ 2026-04-10 19:34 UTC (permalink / raw)
  To: Willem de Bruijn
  Cc: davem, edumazet, kuba, pabeni, andrew+netdev, horms, willemb,
	daniel.zahka, netdev, linux-kernel, jayakrishnan.udayavarma,
	ajit.khaparde, kiran.kella, sachin.suman
In-Reply-To: <willemdebruijn.kernel.1d7f9f774aa55@gmail.com>

[-- Attachment #1: Type: text/plain, Size: 10028 bytes --]

On Wed, Apr 8, 2026 at 3:07 AM Willem de Bruijn
<willemdebruijn.kernel@gmail.com> wrote:
>
> Akhilesh Samineni wrote:
> > crypt-offset (Crypt Offset)
> > ----------------------------------
> > The crypt-offset attribute specifies the byte offset within a packet
> > from which encryption begins. This is a per-device attribute that
> > allows a portion of the packet header to remain in plaintext while
> > the rest of the payload is encrypted. This is useful in scenarios
> > where intermediate nodes need to inspect or process a fixed-size
> > header before the encrypted payload.
> >
> > The default value is 0, meaning encryption starts from the beginning
> > of the payload following the PSP header.
> >
> > spi-threshold (SPI Threshold)
> > ------------------------------
> > The SPI (Security Parameter Index) is a 32-bit per-device identifier
> > used to distinguish security associations. As SPI values are allocated
> > monotonically, a threshold is needed to trigger timely SPI rotation
> > before the space is exhausted.
> >
> > The spi-threshold attribute allows userspace to configure the value at
> > which an SPI rotation should be initiated. The default is set to
> > PSP_SPI_THRESHOLD_DEFAULT (~90% of 0x7FFFFFFF), providing a comfortable
> > margin to perform rotation without racing to exhaustion.
> >
> > NOTE: A follow-up series will add notification support to alert
> > subscribed users when the configured spi-threshold is reached, enabling
> > timely SPI rotation.
> >
> > Signed-off-by: Akhilesh Samineni <akhilesh.samineni@broadcom.com>
> > Reviewed-by: Kiran Kella <kiran.kella@broadcom.com>
> > Reviewed-by: Ajit Kumar Khaparde <ajit.khaparde@broadcom.com>
> > ---
> >  Documentation/netlink/specs/psp.yaml | 13 +++++++++++++
> >  include/net/psp/types.h              |  7 +++++++
> >  include/uapi/linux/psp.h             |  2 ++
> >  net/psp/psp-nl-gen.c                 |  6 ++++--
> >  net/psp/psp_main.c                   |  3 +++
> >  net/psp/psp_nl.c                     | 27 +++++++++++++++++++++++----
> >  6 files changed, 52 insertions(+), 6 deletions(-)
> >
> > diff --git a/Documentation/netlink/specs/psp.yaml b/Documentation/netlink/specs/psp.yaml
> > index f3a57782d2cf..b22869be91cf 100644
> > --- a/Documentation/netlink/specs/psp.yaml
> > +++ b/Documentation/netlink/specs/psp.yaml
> > @@ -38,6 +38,15 @@ attribute-sets:
> >          type: u32
> >          enum: version
> >          enum-as-flags: true
> > +      -
> > +        name: crypt-offset
> > +        doc: The offset from the end of the PSP header to the start of the encrypted payload.
>
> In 4 octet units?
>

Yes. crypt-offset is in 4 octet units only. I will update the
description accordingly in the next v2 patch.

> > +        type: u8
> > +      -
> > +        name: spi-threshold
> > +        doc: Threshold for the SPI to trigger notification to the user for appropriate rotate action.
> > +        type: u32
> > +
> >    -
> >      name: assoc
> >      attributes:
> > @@ -170,6 +179,8 @@ operations:
> >              - ifindex
> >              - psp-versions-cap
> >              - psp-versions-ena
> > +            - crypt-offset
> > +            - spi-threshold
> >          pre: psp-device-get-locked
> >          post: psp-device-unlock
> >        dump:
> > @@ -193,6 +204,8 @@ operations:
> >            attributes:
> >              - id
> >              - psp-versions-ena
> > +            - crypt-offset
> > +            - spi-threshold
> >          reply:
> >            attributes: []
> >          pre: psp-device-get-locked
> > diff --git a/include/net/psp/types.h b/include/net/psp/types.h
> > index 25a9096d4e7d..875f7822557f 100644
> > --- a/include/net/psp/types.h
> > +++ b/include/net/psp/types.h
> > @@ -25,6 +25,9 @@ struct psphdr {
> >  #define PSP_SPI_KEY_ID               GENMASK(30, 0)
> >  #define PSP_SPI_KEY_PHASE    BIT(31)
> >
> > +/* Default SPI threshold: ~90% of max SPI (0x7FFFFFFF) to allow rotation before exhaustion */
> > +#define PSP_SPI_THRESHOLD_DEFAULT    0x73333333
>
> Do you want to choose a more round number, in either hex or dec?
>

I think we can use 0x70000000; it's approximately 87.5% of the maximum SPI.

> > +
> >  #define PSPHDR_CRYPT_OFFSET  GENMASK(5, 0)
> >
> >  #define PSPHDR_VERFL_SAMPLE  BIT(7)
> > @@ -38,9 +41,13 @@ struct psphdr {
> >  /**
> >   * struct psp_dev_config - PSP device configuration
> >   * @versions: PSP versions enabled on the device
> > + * @crypt_offset: crypto offset configured on the device
> > + * @spi_threshold: SPI threshold value on the device
> >   */
> >  struct psp_dev_config {
> >       u32 versions;
> > +     u8 crypt_offset;
> > +     u32 spi_threshold;
> >  };
> >
> >  /**
> > diff --git a/include/uapi/linux/psp.h b/include/uapi/linux/psp.h
> > index a3a336488dc3..bb390159dc72 100644
> > --- a/include/uapi/linux/psp.h
> > +++ b/include/uapi/linux/psp.h
> > @@ -22,6 +22,8 @@ enum {
> >       PSP_A_DEV_IFINDEX,
> >       PSP_A_DEV_PSP_VERSIONS_CAP,
> >       PSP_A_DEV_PSP_VERSIONS_ENA,
> > +     PSP_A_DEV_CRYPT_OFFSET,
> > +     PSP_A_DEV_SPI_THRESHOLD,
> >
> >       __PSP_A_DEV_MAX,
> >       PSP_A_DEV_MAX = (__PSP_A_DEV_MAX - 1)
> > diff --git a/net/psp/psp-nl-gen.c b/net/psp/psp-nl-gen.c
> > index 22a48d0fa378..e50b8b80955c 100644
> > --- a/net/psp/psp-nl-gen.c
> > +++ b/net/psp/psp-nl-gen.c
> > @@ -23,9 +23,11 @@ static const struct nla_policy psp_dev_get_nl_policy[PSP_A_DEV_ID + 1] = {
> >  };
> >
> >  /* PSP_CMD_DEV_SET - do */
> > -static const struct nla_policy psp_dev_set_nl_policy[PSP_A_DEV_PSP_VERSIONS_ENA + 1] = {
> > +static const struct nla_policy psp_dev_set_nl_policy[PSP_A_DEV_SPI_THRESHOLD + 1] = {
> >       [PSP_A_DEV_ID] = NLA_POLICY_MIN(NLA_U32, 1),
> >       [PSP_A_DEV_PSP_VERSIONS_ENA] = NLA_POLICY_MASK(NLA_U32, 0xf),
> > +     [PSP_A_DEV_CRYPT_OFFSET] = { .type = NLA_U8, },
> > +     [PSP_A_DEV_SPI_THRESHOLD] = { .type = NLA_U32, },
> >  };
> >
> >  /* PSP_CMD_KEY_ROTATE - do */
> > @@ -75,7 +77,7 @@ static const struct genl_split_ops psp_nl_ops[] = {
> >               .doit           = psp_nl_dev_set_doit,
> >               .post_doit      = psp_device_unlock,
> >               .policy         = psp_dev_set_nl_policy,
> > -             .maxattr        = PSP_A_DEV_PSP_VERSIONS_ENA,
> > +             .maxattr        = PSP_A_DEV_SPI_THRESHOLD,
> >               .flags          = GENL_CMD_CAP_DO,
> >       },
> >       {
> > diff --git a/net/psp/psp_main.c b/net/psp/psp_main.c
> > index 9508b6c38003..536ee44db09d 100644
> > --- a/net/psp/psp_main.c
> > +++ b/net/psp/psp_main.c
> > @@ -79,6 +79,9 @@ psp_dev_create(struct net_device *netdev,
> >       INIT_LIST_HEAD(&psd->stale_assocs);
> >       refcount_set(&psd->refcnt, 1);
> >
> > +     /* ~90% of 0x7FFFFFFF; allows SPI rotation well before space is exhausted */
>
> Repeat comment. Not needed here.
>

Ack

> > +     psd->config.spi_threshold = PSP_SPI_THRESHOLD_DEFAULT;
> > +
> >       mutex_lock(&psp_devs_lock);
> >       err = xa_alloc_cyclic(&psp_devs, &psd->id, psd, xa_limit_16b,
> >                             &last_id, GFP_KERNEL);
> > diff --git a/net/psp/psp_nl.c b/net/psp/psp_nl.c
> > index 6afd7707ec12..fbb77460a24b 100644
> > --- a/net/psp/psp_nl.c
> > +++ b/net/psp/psp_nl.c
> > @@ -101,7 +101,9 @@ psp_nl_dev_fill(struct psp_dev *psd, struct sk_buff *rsp,
> >       if (nla_put_u32(rsp, PSP_A_DEV_ID, psd->id) ||
> >           nla_put_u32(rsp, PSP_A_DEV_IFINDEX, psd->main_netdev->ifindex) ||
> >           nla_put_u32(rsp, PSP_A_DEV_PSP_VERSIONS_CAP, psd->caps->versions) ||
> > -         nla_put_u32(rsp, PSP_A_DEV_PSP_VERSIONS_ENA, psd->config.versions))
> > +         nla_put_u32(rsp, PSP_A_DEV_PSP_VERSIONS_ENA, psd->config.versions) ||
> > +         nla_put_u8(rsp, PSP_A_DEV_CRYPT_OFFSET, psd->config.crypt_offset) ||
> > +         nla_put_u32(rsp, PSP_A_DEV_SPI_THRESHOLD, psd->config.spi_threshold))
> >               goto err_cancel_msg;
> >
> >       genlmsg_end(rsp, hdr);
> > @@ -193,6 +195,13 @@ int psp_nl_dev_set_doit(struct sk_buff *skb, struct genl_info *info)
> >
> >       memcpy(&new_config, &psd->config, sizeof(new_config));
> >
> > +     if (!info->attrs[PSP_A_DEV_PSP_VERSIONS_ENA] &&
> > +         !info->attrs[PSP_A_DEV_CRYPT_OFFSET] &&
> > +         !info->attrs[PSP_A_DEV_SPI_THRESHOLD]) {
> > +             NL_SET_ERR_MSG(info->extack, "No settings present");
> > +             return -EINVAL;
> > +     }
> > +
> >       if (info->attrs[PSP_A_DEV_PSP_VERSIONS_ENA]) {
> >               new_config.versions =
> >                       nla_get_u32(info->attrs[PSP_A_DEV_PSP_VERSIONS_ENA]);
> > @@ -200,9 +209,19 @@ int psp_nl_dev_set_doit(struct sk_buff *skb, struct genl_info *info)
> >                       NL_SET_ERR_MSG(info->extack, "Requested PSP versions not supported by the device");
> >                       return -EINVAL;
> >               }
> > -     } else {
> > -             NL_SET_ERR_MSG(info->extack, "No settings present");
> > -             return -EINVAL;
> > +     }
> > +
> > +     if (info->attrs[PSP_A_DEV_CRYPT_OFFSET])
> > +             new_config.crypt_offset =
> > +                     nla_get_u8(info->attrs[PSP_A_DEV_CRYPT_OFFSET]);
>
> PSP defines a 6-bit field in 4 octet units. Does this need bounds checking?
>

 Yes, I will add the bound checks in the next v2 patch.
> > +
> > +     if (info->attrs[PSP_A_DEV_SPI_THRESHOLD]) {
> > +             new_config.spi_threshold =
> > +                     nla_get_u32(info->attrs[PSP_A_DEV_SPI_THRESHOLD]);
> > +             if (new_config.spi_threshold & PSP_SPI_KEY_PHASE) {
> > +                     NL_SET_ERR_MSG(info->extack, "SPI threshold must not have bit 31 set");
> > +                     return -EINVAL;
> > +             }
> >       }
> >
> >       rsp = psp_nl_reply_new(info);
> > --
> > 2.45.4
> >
>
>

[-- Attachment #2: S/MIME Cryptographic Signature --]
[-- Type: application/pkcs7-signature, Size: 4211 bytes --]

^ permalink raw reply

* Re: [PATCH net-next 1/3] psp: add crypt-offset and spi-threshold get/set attributes
From: Akhilesh Samineni @ 2026-04-10 19:36 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: Willem de Bruijn, davem, edumazet, pabeni, andrew+netdev, horms,
	willemb, daniel.zahka, netdev, linux-kernel,
	jayakrishnan.udayavarma, ajit.khaparde, kiran.kella, sachin.suman
In-Reply-To: <20260407180432.102073cf@kernel.org>

[-- Attachment #1: Type: text/plain, Size: 596 bytes --]

On Wed, Apr 8, 2026 at 6:34 AM Jakub Kicinski <kuba@kernel.org> wrote:
>
> On Tue, 07 Apr 2026 17:37:41 -0400 Willem de Bruijn wrote:
> > > +   if (info->attrs[PSP_A_DEV_CRYPT_OFFSET])
> > > +           new_config.crypt_offset =
> > > +                   nla_get_u8(info->attrs[PSP_A_DEV_CRYPT_OFFSET]);
> >
> > PSP defines a 6-bit field in 4 octet units. Does this need bounds checking?
>
> More fundamentally, were we to support this -- is it a device property
> or an assoc property?

It's a device property. All associations under the device will share
the same crypt-offset.

[-- Attachment #2: S/MIME Cryptographic Signature --]
[-- Type: application/pkcs7-signature, Size: 4211 bytes --]

^ permalink raw reply

* Re: [PATCH net-next 2/3] netdevsim: psp: handle the new crypt-offset and spi-threshold get/set operations
From: Akhilesh Samineni @ 2026-04-10 19:45 UTC (permalink / raw)
  To: Willem de Bruijn
  Cc: davem, edumazet, kuba, pabeni, andrew+netdev, horms, willemb,
	daniel.zahka, netdev, linux-kernel, jayakrishnan.udayavarma,
	ajit.khaparde, kiran.kella, sachin.suman
In-Reply-To: <willemdebruijn.kernel.2484afecaca4d@gmail.com>

[-- Attachment #1: Type: text/plain, Size: 2059 bytes --]

On Wed, Apr 8, 2026 at 3:13 AM Willem de Bruijn
<willemdebruijn.kernel@gmail.com> wrote:
>
> Akhilesh Samineni wrote:
> > Implement the crypt-offset and spi-threshold get/set in netdevsim PSP.
> >
> > Signed-off-by: Akhilesh Samineni <akhilesh.samineni@broadcom.com>
> > Reviewed-by: Kiran Kella <kiran.kella@broadcom.com>
> > Reviewed-by: Ajit Kumar Khaparde <ajit.khaparde@broadcom.com>
> > ---
> >  drivers/net/netdevsim/netdevsim.h | 2 ++
> >  drivers/net/netdevsim/psp.c       | 6 ++++++
> >  2 files changed, 8 insertions(+)
> >
> > diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h
> > index c904e14f6b3f..3ad7d42391c0 100644
> > --- a/drivers/net/netdevsim/netdevsim.h
> > +++ b/drivers/net/netdevsim/netdevsim.h
> > @@ -117,6 +117,8 @@ struct netdevsim {
> >               struct psp_dev *dev;
> >               u32 spi;
> >               u32 assoc_cnt;
> > +             u8  crypt_offset;
>
> Minor: variable names are already not aligned. No need for two spaces.
>

Ack
> > +             u32 spi_threshold;
> >       } psp;
> >
> >       struct nsim_bus_dev *nsim_bus_dev;
> > diff --git a/drivers/net/netdevsim/psp.c b/drivers/net/netdevsim/psp.c
> > index 0b4d717253b0..9098edf00c5c 100644
> > --- a/drivers/net/netdevsim/psp.c
> > +++ b/drivers/net/netdevsim/psp.c
> > @@ -122,6 +122,11 @@ static int
> >  nsim_psp_set_config(struct psp_dev *psd, struct psp_dev_config *conf,
> >                   struct netlink_ext_ack *extack)
> >  {
> > +     struct netdevsim *ns = psd->drv_priv;
> > +
> > +     ns->psp.crypt_offset = conf->crypt_offset;
> > +     ns->psp.spi_threshold = conf->spi_threshold;
> > +
> >       return 0;
> >  }
> >
> > @@ -249,6 +254,7 @@ int nsim_psp_init(struct netdevsim *ns)
> >       if (err)
> >               return err;
> >
> > +     ns->psp.spi_threshold = PSP_SPI_THRESHOLD_DEFAULT;
> >       debugfs_create_file("psp_rereg", 0200, ddir, ns, &nsim_psp_rereg_fops);
> >       return 0;
> >  }
> > --
> > 2.45.4
> >
>
>

[-- Attachment #2: S/MIME Cryptographic Signature --]
[-- Type: application/pkcs7-signature, Size: 4211 bytes --]

^ permalink raw reply

* Re: [PATCH net-next 2/3] netdevsim: psp: handle the new crypt-offset and spi-threshold get/set operations
From: Akhilesh Samineni @ 2026-04-10 19:48 UTC (permalink / raw)
  To: Willem de Bruijn
  Cc: davem, edumazet, kuba, pabeni, andrew+netdev, horms, willemb,
	daniel.zahka, netdev, linux-kernel, jayakrishnan.udayavarma,
	ajit.khaparde, kiran.kella, sachin.suman
In-Reply-To: <willemdebruijn.kernel.327df0cb46f23@gmail.com>

[-- Attachment #1: Type: text/plain, Size: 2096 bytes --]

On Wed, Apr 8, 2026 at 3:19 AM Willem de Bruijn
<willemdebruijn.kernel@gmail.com> wrote:
>
> Akhilesh Samineni wrote:
> > Implement the crypt-offset and spi-threshold get/set in netdevsim PSP.
> >
> > Signed-off-by: Akhilesh Samineni <akhilesh.samineni@broadcom.com>
> > Reviewed-by: Kiran Kella <kiran.kella@broadcom.com>
> > Reviewed-by: Ajit Kumar Khaparde <ajit.khaparde@broadcom.com>
> > ---
> >  drivers/net/netdevsim/netdevsim.h | 2 ++
> >  drivers/net/netdevsim/psp.c       | 6 ++++++
> >  2 files changed, 8 insertions(+)
> >
> > diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h
> > index c904e14f6b3f..3ad7d42391c0 100644
> > --- a/drivers/net/netdevsim/netdevsim.h
> > +++ b/drivers/net/netdevsim/netdevsim.h
> > @@ -117,6 +117,8 @@ struct netdevsim {
> >               struct psp_dev *dev;
> >               u32 spi;
> >               u32 assoc_cnt;
> > +             u8  crypt_offset;
> > +             u32 spi_threshold;
> >       } psp;
> >
> >       struct nsim_bus_dev *nsim_bus_dev;
> > diff --git a/drivers/net/netdevsim/psp.c b/drivers/net/netdevsim/psp.c
> > index 0b4d717253b0..9098edf00c5c 100644
> > --- a/drivers/net/netdevsim/psp.c
> > +++ b/drivers/net/netdevsim/psp.c
> > @@ -122,6 +122,11 @@ static int
> >  nsim_psp_set_config(struct psp_dev *psd, struct psp_dev_config *conf,
> >                   struct netlink_ext_ack *extack)
> >  {
> > +     struct netdevsim *ns = psd->drv_priv;
> > +
> > +     ns->psp.crypt_offset = conf->crypt_offset;
> > +     ns->psp.spi_threshold = conf->spi_threshold;
> > +
> >       return 0;
> >  }
> >
> > @@ -249,6 +254,7 @@ int nsim_psp_init(struct netdevsim *ns)
> >       if (err)
> >               return err;
> >
> > +     ns->psp.spi_threshold = PSP_SPI_THRESHOLD_DEFAULT;
> >       debugfs_create_file("psp_rereg", 0200, ddir, ns, &nsim_psp_rereg_fops);
> >       return 0;
>
> Default initialization should probably all complete before the device
> is made visible with psp_dev_create.

Yes. I will update it in the next v2 patch.

[-- Attachment #2: S/MIME Cryptographic Signature --]
[-- Type: application/pkcs7-signature, Size: 4211 bytes --]

^ permalink raw reply

* [PATCH net-next v2 00/14] net: macb: implement context swapping
From: Théo Lebrun @ 2026-04-10 19:51 UTC (permalink / raw)
  To: Nicolas Ferre, Claudiu Beznea, Andrew Lunn, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Richard Cochran,
	Russell King
  Cc: Paolo Valerio, Conor Dooley, Nicolai Buchwitz,
	Vladimir Kondratiev, Gregory CLEMENT, Benoît Monin,
	Tawfik Bayouk, Thomas Petazzoni, Maxime Chevallier, netdev,
	linux-kernel, Théo Lebrun

MACB has a pretty primitive approach to buffer management. They are all
stored in `struct macb *bp`. On operations that require buffer realloc
(set_ringparam & change_mtu ATM), the only option is to close the
interface, change our global state and re-open the interface.

Two issues:
- It doesn't fly on memory pressured systems; we free our precious
  buffers and don't manage to reallocate fully, meaning our machine
  just lost its network access.
- Anecdotally, it is pretty slow because it implies a full PHY reinit.

Instead, we shall:
 - allocate a new context (including buffers) first
 - if it fails, early return without any impact to the interface
 - stop interface
 - update global state (bp, netdev, etc)
 - pass newly allocated buffer pointers to the hardware
 - start interface
 - free old context

This is what we implement here. Both .set_ringparam() and
.ndo_change_mtu() are covered by this series. In the future,
at least .set_channels() [0], XDP [1] and XSK [2] would benefit.

The change is super intrusive so conflicts will be major. Sorry!

Thanks,
Have a nice day,
Théo

[0]: https://lore.kernel.org/netdev/20260317-macb-set-channels-v4-0-1bd4f4ffcfca@bootlin.com/
[1]: https://lore.kernel.org/netdev/20260323221047.2749577-1-pvalerio@redhat.com/
[2]: https://lore.kernel.org/netdev/20260304-macb-xsk-v1-0-ba2ebe2bdaa3@bootlin.com/

Signed-off-by: Théo Lebrun <theo.lebrun@bootlin.com>
---
Changes in v2:
- Patch "add subset of `struct macb` to `struct macb_context`" was
  messed up. It contained much more than what the name implied. Split
  into three commits (I caused trouble by rebase reordering).
- Fix tieoff; V1 allocated it without initialisation.
- Fix NULL pointer dereference on context in mab_get_regs() and
  macb_get_ringparam() when interface is offline.
- Patch "unify device pointer naming convention":
  - Fix build issue when CONFIG_NETCONSOLE=y.
  - Rename `struct net_device *dev` to `netdev` in macb.h.
  - Rename `struct phy_device *phy` to `phydev` in macb_main.c.
- On swap, call netdev_tx_reset_queue() to reset all DQL counters.
- At end of swap, add missing kfree(old_ctx).
- During HW disabling in swap, grab bp->lock to protect against IRQ
  handler.
- On swap, cancel the three BH features MACB has:
  bp->hresp_err_bh_work, bp->tx_lpi_work and queue->tx_error_task.
- On swap, call macb_configure_dma() which writes buffer size to
  hardware registers. This is important because the change_mtu codepath
  changes the buffer size.
- Rebase onto latest net-next/main (58dd34dbd5b0) & resolve conflicts.
- Link to v1: https://patch.msgid.link/20260401-macb-context-v1-0-9590c5ab7272@bootlin.com

---
Théo Lebrun (14):
      net: macb: unify device pointer naming convention
      net: macb: unify `struct macb *` naming convention
      net: macb: unify queue index variable naming convention and types
      net: macb: enforce reverse christmas tree (RCT) convention
      net: macb: allocate tieoff descriptor once across device lifetime
      net: macb: introduce macb_context struct for buffer management
      net: macb: avoid macb_init_rx_buffer_size() modifying state
      net: macb: make `struct macb` subset reachable from macb_context struct
      net: macb: change caps helpers signatures
      net: macb: change function signatures to take contexts
      net: macb: introduce macb_context_alloc() helper
      net: macb: re-read ISR inside IRQ handler locked section
      net: macb: use context swapping in .set_ringparam()
      net: macb: use context swapping in .ndo_change_mtu()

 drivers/net/ethernet/cadence/macb.h      |  125 ++-
 drivers/net/ethernet/cadence/macb_main.c | 1767 +++++++++++++++++-------------
 drivers/net/ethernet/cadence/macb_pci.c  |   46 +-
 drivers/net/ethernet/cadence/macb_ptp.c  |   26 +-
 4 files changed, 1126 insertions(+), 838 deletions(-)
---
base-commit: 6b6916526425235d5875df21dfa6f31fdc098599
change-id: 20260401-macb-context-bd0caf20414d

Best regards,
--  
Théo Lebrun <theo.lebrun@bootlin.com>


^ permalink raw reply

* [PATCH net-next v2 01/14] net: macb: unify device pointer naming convention
From: Théo Lebrun @ 2026-04-10 19:51 UTC (permalink / raw)
  To: Nicolas Ferre, Claudiu Beznea, Andrew Lunn, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Richard Cochran,
	Russell King
  Cc: Paolo Valerio, Conor Dooley, Nicolai Buchwitz,
	Vladimir Kondratiev, Gregory CLEMENT, Benoît Monin,
	Tawfik Bayouk, Thomas Petazzoni, Maxime Chevallier, netdev,
	linux-kernel, Théo Lebrun
In-Reply-To: <20260410-macb-context-v2-0-af39f71d40b6@bootlin.com>

Here are all device pointer variable permutations inside MACB:

   struct device *dev;
   struct net_device *dev;
   struct net_device *ndev;
   struct net_device *netdev;
   struct pci_dev *pdev;              // inside macb_pci.c
   struct platform_device *pdev;
   struct platform_device *plat_dev;  // inside macb_pci.c

Unify to this convention:

   struct device *dev;
   struct net_device *netdev;
   struct pci_dev *pci;
   struct platform_device *pdev;

Ensure nothing slipped through using ctags tooling:

⟩ ctags -o - --kinds-c='{local}{member}{parameter}' \
    --fields='{typeref}' drivers/net/ethernet/cadence/* | \
  awk -F"\t" '
    $NF~/struct:.*(device|dev) / {print $NF, $1}' | \
  sort -u
typeref:struct:device * dev
typeref:struct:in_device * idev        // ignored
typeref:struct:net_device * netdev
typeref:struct:pci_dev * pci
typeref:struct:phy_device * phy        // ignored
typeref:struct:phy_device * phydev     // ignored
typeref:struct:platform_device * pdev

Signed-off-by: Théo Lebrun <theo.lebrun@bootlin.com>
---
 drivers/net/ethernet/cadence/macb.h      |  20 +-
 drivers/net/ethernet/cadence/macb_main.c | 632 ++++++++++++++++---------------
 drivers/net/ethernet/cadence/macb_pci.c  |  46 +--
 drivers/net/ethernet/cadence/macb_ptp.c  |  18 +-
 4 files changed, 359 insertions(+), 357 deletions(-)

diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
index 2de56017ee0d..9857df5b57f0 100644
--- a/drivers/net/ethernet/cadence/macb.h
+++ b/drivers/net/ethernet/cadence/macb.h
@@ -1207,11 +1207,11 @@ struct macb_or_gem_ops {
 
 /* MACB-PTP interface: adapt to platform needs. */
 struct macb_ptp_info {
-	void (*ptp_init)(struct net_device *ndev);
-	void (*ptp_remove)(struct net_device *ndev);
+	void (*ptp_init)(struct net_device *netdev);
+	void (*ptp_remove)(struct net_device *netdev);
 	s32 (*get_ptp_max_adj)(void);
 	unsigned int (*get_tsu_rate)(struct macb *bp);
-	int (*get_ts_info)(struct net_device *dev,
+	int (*get_ts_info)(struct net_device *netdev,
 			   struct kernel_ethtool_ts_info *info);
 	int (*get_hwtst)(struct net_device *netdev,
 			 struct kernel_hwtstamp_config *tstamp_config);
@@ -1326,7 +1326,7 @@ struct macb {
 	struct clk		*tx_clk;
 	struct clk		*rx_clk;
 	struct clk		*tsu_clk;
-	struct net_device	*dev;
+	struct net_device	*netdev;
 	/* Protects hw_stats and ethtool_stats */
 	spinlock_t		stats_lock;
 	union {
@@ -1406,8 +1406,8 @@ enum macb_bd_control {
 	TSTAMP_ALL_FRAMES,
 };
 
-void gem_ptp_init(struct net_device *ndev);
-void gem_ptp_remove(struct net_device *ndev);
+void gem_ptp_init(struct net_device *netdev);
+void gem_ptp_remove(struct net_device *netdev);
 void gem_ptp_txstamp(struct macb *bp, struct sk_buff *skb, struct macb_dma_desc *desc);
 void gem_ptp_rxstamp(struct macb *bp, struct sk_buff *skb, struct macb_dma_desc *desc);
 static inline void gem_ptp_do_txstamp(struct macb *bp, struct sk_buff *skb, struct macb_dma_desc *desc)
@@ -1426,14 +1426,14 @@ static inline void gem_ptp_do_rxstamp(struct macb *bp, struct sk_buff *skb, stru
 	gem_ptp_rxstamp(bp, skb, desc);
 }
 
-int gem_get_hwtst(struct net_device *dev,
+int gem_get_hwtst(struct net_device *netdev,
 		  struct kernel_hwtstamp_config *tstamp_config);
-int gem_set_hwtst(struct net_device *dev,
+int gem_set_hwtst(struct net_device *netdev,
 		  struct kernel_hwtstamp_config *tstamp_config,
 		  struct netlink_ext_ack *extack);
 #else
-static inline void gem_ptp_init(struct net_device *ndev) { }
-static inline void gem_ptp_remove(struct net_device *ndev) { }
+static inline void gem_ptp_init(struct net_device *netdev) { }
+static inline void gem_ptp_remove(struct net_device *netdev) { }
 
 static inline void gem_ptp_do_txstamp(struct macb *bp, struct sk_buff *skb, struct macb_dma_desc *desc) { }
 static inline void gem_ptp_do_rxstamp(struct macb *bp, struct sk_buff *skb, struct macb_dma_desc *desc) { }
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index d9716c56f705..896d481e0f95 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -252,9 +252,9 @@ static void macb_set_hwaddr(struct macb *bp)
 	u32 bottom;
 	u16 top;
 
-	bottom = get_unaligned_le32(bp->dev->dev_addr);
+	bottom = get_unaligned_le32(bp->netdev->dev_addr);
 	macb_or_gem_writel(bp, SA1B, bottom);
-	top = get_unaligned_le16(bp->dev->dev_addr + 4);
+	top = get_unaligned_le16(bp->netdev->dev_addr + 4);
 	macb_or_gem_writel(bp, SA1T, top);
 
 	if (gem_has_ptp(bp)) {
@@ -291,13 +291,13 @@ static void macb_get_hwaddr(struct macb *bp)
 		addr[5] = (top >> 8) & 0xff;
 
 		if (is_valid_ether_addr(addr)) {
-			eth_hw_addr_set(bp->dev, addr);
+			eth_hw_addr_set(bp->netdev, addr);
 			return;
 		}
 	}
 
 	dev_info(&bp->pdev->dev, "invalid hw address, using random\n");
-	eth_hw_addr_random(bp->dev);
+	eth_hw_addr_random(bp->netdev);
 }
 
 static int macb_mdio_wait_for_idle(struct macb *bp)
@@ -509,12 +509,12 @@ static void macb_set_tx_clk(struct macb *bp, int speed)
 	ferr = abs(rate_rounded - rate);
 	ferr = DIV_ROUND_UP(ferr, rate / 100000);
 	if (ferr > 5)
-		netdev_warn(bp->dev,
+		netdev_warn(bp->netdev,
 			    "unable to generate target frequency: %ld Hz\n",
 			    rate);
 
 	if (clk_set_rate(bp->tx_clk, rate_rounded))
-		netdev_err(bp->dev, "adjusting tx_clk failed.\n");
+		netdev_err(bp->netdev, "adjusting tx_clk failed.\n");
 }
 
 static void macb_usx_pcs_link_up(struct phylink_pcs *pcs, unsigned int neg_mode,
@@ -697,8 +697,8 @@ static void macb_tx_lpi_wake(struct macb *bp)
 
 static void macb_mac_disable_tx_lpi(struct phylink_config *config)
 {
-	struct net_device *ndev = to_net_dev(config->dev);
-	struct macb *bp = netdev_priv(ndev);
+	struct net_device *netdev = to_net_dev(config->dev);
+	struct macb *bp = netdev_priv(netdev);
 	unsigned long flags;
 
 	cancel_delayed_work_sync(&bp->tx_lpi_work);
@@ -712,8 +712,8 @@ static void macb_mac_disable_tx_lpi(struct phylink_config *config)
 static int macb_mac_enable_tx_lpi(struct phylink_config *config, u32 timer,
 				  bool tx_clk_stop)
 {
-	struct net_device *ndev = to_net_dev(config->dev);
-	struct macb *bp = netdev_priv(ndev);
+	struct net_device *netdev = to_net_dev(config->dev);
+	struct macb *bp = netdev_priv(netdev);
 	unsigned long flags;
 
 	spin_lock_irqsave(&bp->lock, flags);
@@ -732,8 +732,8 @@ static int macb_mac_enable_tx_lpi(struct phylink_config *config, u32 timer,
 static void macb_mac_config(struct phylink_config *config, unsigned int mode,
 			    const struct phylink_link_state *state)
 {
-	struct net_device *ndev = to_net_dev(config->dev);
-	struct macb *bp = netdev_priv(ndev);
+	struct net_device *netdev = to_net_dev(config->dev);
+	struct macb *bp = netdev_priv(netdev);
 	unsigned long flags;
 	u32 old_ctrl, ctrl;
 	u32 old_ncr, ncr;
@@ -774,8 +774,8 @@ static void macb_mac_config(struct phylink_config *config, unsigned int mode,
 static void macb_mac_link_down(struct phylink_config *config, unsigned int mode,
 			       phy_interface_t interface)
 {
-	struct net_device *ndev = to_net_dev(config->dev);
-	struct macb *bp = netdev_priv(ndev);
+	struct net_device *netdev = to_net_dev(config->dev);
+	struct macb *bp = netdev_priv(netdev);
 	struct macb_queue *queue;
 	unsigned int q;
 	u32 ctrl;
@@ -789,7 +789,7 @@ static void macb_mac_link_down(struct phylink_config *config, unsigned int mode,
 	ctrl = macb_readl(bp, NCR) & ~(MACB_BIT(RE) | MACB_BIT(TE));
 	macb_writel(bp, NCR, ctrl);
 
-	netif_tx_stop_all_queues(ndev);
+	netif_tx_stop_all_queues(netdev);
 }
 
 /* Use juggling algorithm to left rotate tx ring and tx skb array */
@@ -884,13 +884,13 @@ static void gem_shuffle_tx_rings(struct macb *bp)
 }
 
 static void macb_mac_link_up(struct phylink_config *config,
-			     struct phy_device *phy,
+			     struct phy_device *phydev,
 			     unsigned int mode, phy_interface_t interface,
 			     int speed, int duplex,
 			     bool tx_pause, bool rx_pause)
 {
-	struct net_device *ndev = to_net_dev(config->dev);
-	struct macb *bp = netdev_priv(ndev);
+	struct net_device *netdev = to_net_dev(config->dev);
+	struct macb *bp = netdev_priv(netdev);
 	struct macb_queue *queue;
 	unsigned long flags;
 	unsigned int q;
@@ -946,14 +946,14 @@ static void macb_mac_link_up(struct phylink_config *config,
 
 	macb_writel(bp, NCR, ctrl | MACB_BIT(RE) | MACB_BIT(TE));
 
-	netif_tx_wake_all_queues(ndev);
+	netif_tx_wake_all_queues(netdev);
 }
 
 static struct phylink_pcs *macb_mac_select_pcs(struct phylink_config *config,
 					       phy_interface_t interface)
 {
-	struct net_device *ndev = to_net_dev(config->dev);
-	struct macb *bp = netdev_priv(ndev);
+	struct net_device *netdev = to_net_dev(config->dev);
+	struct macb *bp = netdev_priv(netdev);
 
 	if (interface == PHY_INTERFACE_MODE_10GBASER)
 		return &bp->phylink_usx_pcs;
@@ -982,7 +982,7 @@ static bool macb_phy_handle_exists(struct device_node *dn)
 static int macb_phylink_connect(struct macb *bp)
 {
 	struct device_node *dn = bp->pdev->dev.of_node;
-	struct net_device *dev = bp->dev;
+	struct net_device *netdev = bp->netdev;
 	struct phy_device *phydev;
 	int ret;
 
@@ -992,7 +992,7 @@ static int macb_phylink_connect(struct macb *bp)
 	if (!dn || (ret && !macb_phy_handle_exists(dn))) {
 		phydev = phy_find_first(bp->mii_bus);
 		if (!phydev) {
-			netdev_err(dev, "no PHY found\n");
+			netdev_err(netdev, "no PHY found\n");
 			return -ENXIO;
 		}
 
@@ -1001,7 +1001,7 @@ static int macb_phylink_connect(struct macb *bp)
 	}
 
 	if (ret) {
-		netdev_err(dev, "Could not attach PHY (%d)\n", ret);
+		netdev_err(netdev, "Could not attach PHY (%d)\n", ret);
 		return ret;
 	}
 
@@ -1013,21 +1013,21 @@ static int macb_phylink_connect(struct macb *bp)
 static void macb_get_pcs_fixed_state(struct phylink_config *config,
 				     struct phylink_link_state *state)
 {
-	struct net_device *ndev = to_net_dev(config->dev);
-	struct macb *bp = netdev_priv(ndev);
+	struct net_device *netdev = to_net_dev(config->dev);
+	struct macb *bp = netdev_priv(netdev);
 
 	state->link = (macb_readl(bp, NSR) & MACB_BIT(NSR_LINK)) != 0;
 }
 
 /* based on au1000_eth. c*/
-static int macb_mii_probe(struct net_device *dev)
+static int macb_mii_probe(struct net_device *netdev)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 
 	bp->phylink_sgmii_pcs.ops = &macb_phylink_pcs_ops;
 	bp->phylink_usx_pcs.ops = &macb_phylink_usx_pcs_ops;
 
-	bp->phylink_config.dev = &dev->dev;
+	bp->phylink_config.dev = &netdev->dev;
 	bp->phylink_config.type = PHYLINK_NETDEV;
 	bp->phylink_config.mac_managed_pm = true;
 
@@ -1086,7 +1086,7 @@ static int macb_mii_probe(struct net_device *dev)
 	bp->phylink = phylink_create(&bp->phylink_config, bp->pdev->dev.fwnode,
 				     bp->phy_interface, &macb_phylink_ops);
 	if (IS_ERR(bp->phylink)) {
-		netdev_err(dev, "Could not create a phylink instance (%ld)\n",
+		netdev_err(netdev, "Could not create a phylink instance (%ld)\n",
 			   PTR_ERR(bp->phylink));
 		return PTR_ERR(bp->phylink);
 	}
@@ -1133,7 +1133,7 @@ static int macb_mii_init(struct macb *bp)
 	 */
 	mdio_np = of_get_child_by_name(np, "mdio");
 	if (!mdio_np && of_phy_is_fixed_link(np))
-		return macb_mii_probe(bp->dev);
+		return macb_mii_probe(bp->netdev);
 
 	/* Enable management port */
 	macb_writel(bp, NCR, MACB_BIT(MPE));
@@ -1154,13 +1154,13 @@ static int macb_mii_init(struct macb *bp)
 	bp->mii_bus->priv = bp;
 	bp->mii_bus->parent = &bp->pdev->dev;
 
-	dev_set_drvdata(&bp->dev->dev, bp->mii_bus);
+	dev_set_drvdata(&bp->netdev->dev, bp->mii_bus);
 
 	err = macb_mdiobus_register(bp, mdio_np);
 	if (err)
 		goto err_out_free_mdiobus;
 
-	err = macb_mii_probe(bp->dev);
+	err = macb_mii_probe(bp->netdev);
 	if (err)
 		goto err_out_unregister_bus;
 
@@ -1268,7 +1268,7 @@ static void macb_tx_error_task(struct work_struct *work)
 	unsigned long		flags;
 
 	queue_index = queue - bp->queues;
-	netdev_vdbg(bp->dev, "macb_tx_error_task: q = %u, t = %u, h = %u\n",
+	netdev_vdbg(bp->netdev, "macb_tx_error_task: q = %u, t = %u, h = %u\n",
 		    queue_index, queue->tx_tail, queue->tx_head);
 
 	/* Prevent the queue NAPI TX poll from running, as it calls
@@ -1281,14 +1281,14 @@ static void macb_tx_error_task(struct work_struct *work)
 	spin_lock_irqsave(&bp->lock, flags);
 
 	/* Make sure nobody is trying to queue up new packets */
-	netif_tx_stop_all_queues(bp->dev);
+	netif_tx_stop_all_queues(bp->netdev);
 
 	/* Stop transmission now
 	 * (in case we have just queued new packets)
 	 * macb/gem must be halted to write TBQP register
 	 */
 	if (macb_halt_tx(bp)) {
-		netdev_err(bp->dev, "BUG: halt tx timed out\n");
+		netdev_err(bp->netdev, "BUG: halt tx timed out\n");
 		macb_writel(bp, NCR, macb_readl(bp, NCR) & (~MACB_BIT(TE)));
 		halt_timeout = true;
 	}
@@ -1317,13 +1317,13 @@ static void macb_tx_error_task(struct work_struct *work)
 			 * since it's the only one written back by the hardware
 			 */
 			if (!(ctrl & MACB_BIT(TX_BUF_EXHAUSTED))) {
-				netdev_vdbg(bp->dev, "txerr skb %u (data %p) TX complete\n",
+				netdev_vdbg(bp->netdev, "txerr skb %u (data %p) TX complete\n",
 					    macb_tx_ring_wrap(bp, tail),
 					    skb->data);
-				bp->dev->stats.tx_packets++;
+				bp->netdev->stats.tx_packets++;
 				queue->stats.tx_packets++;
 				packets++;
-				bp->dev->stats.tx_bytes += skb->len;
+				bp->netdev->stats.tx_bytes += skb->len;
 				queue->stats.tx_bytes += skb->len;
 				bytes += skb->len;
 			}
@@ -1333,7 +1333,7 @@ static void macb_tx_error_task(struct work_struct *work)
 			 * those. Statistics are updated by hardware.
 			 */
 			if (ctrl & MACB_BIT(TX_BUF_EXHAUSTED))
-				netdev_err(bp->dev,
+				netdev_err(bp->netdev,
 					   "BUG: TX buffers exhausted mid-frame\n");
 
 			desc->ctrl = ctrl | MACB_BIT(TX_USED);
@@ -1342,7 +1342,7 @@ static void macb_tx_error_task(struct work_struct *work)
 		macb_tx_unmap(bp, tx_skb, 0);
 	}
 
-	netdev_tx_completed_queue(netdev_get_tx_queue(bp->dev, queue_index),
+	netdev_tx_completed_queue(netdev_get_tx_queue(bp->netdev, queue_index),
 				  packets, bytes);
 
 	/* Set end of TX queue */
@@ -1367,7 +1367,7 @@ static void macb_tx_error_task(struct work_struct *work)
 		macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(TE));
 
 	/* Now we are ready to start transmission again */
-	netif_tx_start_all_queues(bp->dev);
+	netif_tx_start_all_queues(bp->netdev);
 	macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(TSTART));
 
 	spin_unlock_irqrestore(&bp->lock, flags);
@@ -1446,12 +1446,12 @@ static int macb_tx_complete(struct macb_queue *queue, int budget)
 				    !ptp_one_step_sync(skb))
 					gem_ptp_do_txstamp(bp, skb, desc);
 
-				netdev_vdbg(bp->dev, "skb %u (data %p) TX complete\n",
+				netdev_vdbg(bp->netdev, "skb %u (data %p) TX complete\n",
 					    macb_tx_ring_wrap(bp, tail),
 					    skb->data);
-				bp->dev->stats.tx_packets++;
+				bp->netdev->stats.tx_packets++;
 				queue->stats.tx_packets++;
-				bp->dev->stats.tx_bytes += skb->len;
+				bp->netdev->stats.tx_bytes += skb->len;
 				queue->stats.tx_bytes += skb->len;
 				packets++;
 				bytes += skb->len;
@@ -1469,14 +1469,14 @@ static int macb_tx_complete(struct macb_queue *queue, int budget)
 		}
 	}
 
-	netdev_tx_completed_queue(netdev_get_tx_queue(bp->dev, queue_index),
+	netdev_tx_completed_queue(netdev_get_tx_queue(bp->netdev, queue_index),
 				  packets, bytes);
 
 	queue->tx_tail = tail;
-	if (__netif_subqueue_stopped(bp->dev, queue_index) &&
+	if (__netif_subqueue_stopped(bp->netdev, queue_index) &&
 	    CIRC_CNT(queue->tx_head, queue->tx_tail,
 		     bp->tx_ring_size) <= MACB_TX_WAKEUP_THRESH(bp))
-		netif_wake_subqueue(bp->dev, queue_index);
+		netif_wake_subqueue(bp->netdev, queue_index);
 	spin_unlock_irqrestore(&queue->tx_ptr_lock, flags);
 
 	if (packets)
@@ -1504,9 +1504,9 @@ static void gem_rx_refill(struct macb_queue *queue)
 
 		if (!queue->rx_skbuff[entry]) {
 			/* allocate sk_buff for this free entry in ring */
-			skb = netdev_alloc_skb(bp->dev, bp->rx_buffer_size);
+			skb = netdev_alloc_skb(bp->netdev, bp->rx_buffer_size);
 			if (unlikely(!skb)) {
-				netdev_err(bp->dev,
+				netdev_err(bp->netdev,
 					   "Unable to allocate sk_buff\n");
 				break;
 			}
@@ -1555,8 +1555,8 @@ static void gem_rx_refill(struct macb_queue *queue)
 	/* Make descriptor updates visible to hardware */
 	wmb();
 
-	netdev_vdbg(bp->dev, "rx ring: queue: %p, prepared head %d, tail %d\n",
-			queue, queue->rx_prepared_head, queue->rx_tail);
+	netdev_vdbg(bp->netdev, "rx ring: queue: %p, prepared head %d, tail %d\n",
+		    queue, queue->rx_prepared_head, queue->rx_tail);
 }
 
 /* Mark DMA descriptors from begin up to and not including end as unused */
@@ -1616,17 +1616,17 @@ static int gem_rx(struct macb_queue *queue, struct napi_struct *napi,
 		count++;
 
 		if (!(ctrl & MACB_BIT(RX_SOF) && ctrl & MACB_BIT(RX_EOF))) {
-			netdev_err(bp->dev,
+			netdev_err(bp->netdev,
 				   "not whole frame pointed by descriptor\n");
-			bp->dev->stats.rx_dropped++;
+			bp->netdev->stats.rx_dropped++;
 			queue->stats.rx_dropped++;
 			break;
 		}
 		skb = queue->rx_skbuff[entry];
 		if (unlikely(!skb)) {
-			netdev_err(bp->dev,
+			netdev_err(bp->netdev,
 				   "inconsistent Rx descriptor chain\n");
-			bp->dev->stats.rx_dropped++;
+			bp->netdev->stats.rx_dropped++;
 			queue->stats.rx_dropped++;
 			break;
 		}
@@ -1634,28 +1634,28 @@ static int gem_rx(struct macb_queue *queue, struct napi_struct *napi,
 		queue->rx_skbuff[entry] = NULL;
 		len = ctrl & bp->rx_frm_len_mask;
 
-		netdev_vdbg(bp->dev, "gem_rx %u (len %u)\n", entry, len);
+		netdev_vdbg(bp->netdev, "gem_rx %u (len %u)\n", entry, len);
 
 		skb_put(skb, len);
 		dma_unmap_single(&bp->pdev->dev, addr,
 				 bp->rx_buffer_size, DMA_FROM_DEVICE);
 
-		skb->protocol = eth_type_trans(skb, bp->dev);
+		skb->protocol = eth_type_trans(skb, bp->netdev);
 		skb_checksum_none_assert(skb);
-		if (bp->dev->features & NETIF_F_RXCSUM &&
-		    !(bp->dev->flags & IFF_PROMISC) &&
+		if (bp->netdev->features & NETIF_F_RXCSUM &&
+		    !(bp->netdev->flags & IFF_PROMISC) &&
 		    GEM_BFEXT(RX_CSUM, ctrl) & GEM_RX_CSUM_CHECKED_MASK)
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
 
-		bp->dev->stats.rx_packets++;
+		bp->netdev->stats.rx_packets++;
 		queue->stats.rx_packets++;
-		bp->dev->stats.rx_bytes += skb->len;
+		bp->netdev->stats.rx_bytes += skb->len;
 		queue->stats.rx_bytes += skb->len;
 
 		gem_ptp_do_rxstamp(bp, skb, desc);
 
 #if defined(DEBUG) && defined(VERBOSE_DEBUG)
-		netdev_vdbg(bp->dev, "received skb of length %u, csum: %08x\n",
+		netdev_vdbg(bp->netdev, "received skb of length %u, csum: %08x\n",
 			    skb->len, skb->csum);
 		print_hex_dump(KERN_DEBUG, " mac: ", DUMP_PREFIX_ADDRESS, 16, 1,
 			       skb_mac_header(skb), 16, true);
@@ -1684,9 +1684,9 @@ static int macb_rx_frame(struct macb_queue *queue, struct napi_struct *napi,
 	desc = macb_rx_desc(queue, last_frag);
 	len = desc->ctrl & bp->rx_frm_len_mask;
 
-	netdev_vdbg(bp->dev, "macb_rx_frame frags %u - %u (len %u)\n",
-		macb_rx_ring_wrap(bp, first_frag),
-		macb_rx_ring_wrap(bp, last_frag), len);
+	netdev_vdbg(bp->netdev, "macb_rx_frame frags %u - %u (len %u)\n",
+		    macb_rx_ring_wrap(bp, first_frag),
+		    macb_rx_ring_wrap(bp, last_frag), len);
 
 	/* The ethernet header starts NET_IP_ALIGN bytes into the
 	 * first buffer. Since the header is 14 bytes, this makes the
@@ -1696,9 +1696,9 @@ static int macb_rx_frame(struct macb_queue *queue, struct napi_struct *napi,
 	 * the two padding bytes into the skb so that we avoid hitting
 	 * the slowpath in memcpy(), and pull them off afterwards.
 	 */
-	skb = netdev_alloc_skb(bp->dev, len + NET_IP_ALIGN);
+	skb = netdev_alloc_skb(bp->netdev, len + NET_IP_ALIGN);
 	if (!skb) {
-		bp->dev->stats.rx_dropped++;
+		bp->netdev->stats.rx_dropped++;
 		for (frag = first_frag; ; frag++) {
 			desc = macb_rx_desc(queue, frag);
 			desc->addr &= ~MACB_BIT(RX_USED);
@@ -1742,11 +1742,11 @@ static int macb_rx_frame(struct macb_queue *queue, struct napi_struct *napi,
 	wmb();
 
 	__skb_pull(skb, NET_IP_ALIGN);
-	skb->protocol = eth_type_trans(skb, bp->dev);
+	skb->protocol = eth_type_trans(skb, bp->netdev);
 
-	bp->dev->stats.rx_packets++;
-	bp->dev->stats.rx_bytes += skb->len;
-	netdev_vdbg(bp->dev, "received skb of length %u, csum: %08x\n",
+	bp->netdev->stats.rx_packets++;
+	bp->netdev->stats.rx_bytes += skb->len;
+	netdev_vdbg(bp->netdev, "received skb of length %u, csum: %08x\n",
 		    skb->len, skb->csum);
 	napi_gro_receive(napi, skb);
 
@@ -1826,7 +1826,7 @@ static int macb_rx(struct macb_queue *queue, struct napi_struct *napi,
 		unsigned long flags;
 		u32 ctrl;
 
-		netdev_err(bp->dev, "RX queue corruption: reset it\n");
+		netdev_err(bp->netdev, "RX queue corruption: reset it\n");
 
 		spin_lock_irqsave(&bp->lock, flags);
 
@@ -1873,7 +1873,7 @@ static int macb_rx_poll(struct napi_struct *napi, int budget)
 
 	work_done = bp->macbgem_ops.mog_rx(queue, napi, budget);
 
-	netdev_vdbg(bp->dev, "RX poll: queue = %u, work_done = %d, budget = %d\n",
+	netdev_vdbg(bp->netdev, "RX poll: queue = %u, work_done = %d, budget = %d\n",
 		    (unsigned int)(queue - bp->queues), work_done, budget);
 
 	if (work_done < budget && napi_complete_done(napi, work_done)) {
@@ -1892,7 +1892,7 @@ static int macb_rx_poll(struct napi_struct *napi, int budget)
 		if (macb_rx_pending(queue)) {
 			queue_writel(queue, IDR, bp->rx_intr_mask);
 			macb_queue_isr_clear(bp, queue, MACB_BIT(RCOMP));
-			netdev_vdbg(bp->dev, "poll: packets pending, reschedule\n");
+			netdev_vdbg(bp->netdev, "poll: packets pending, reschedule\n");
 			napi_schedule(napi);
 		}
 	}
@@ -1956,11 +1956,11 @@ static int macb_tx_poll(struct napi_struct *napi, int budget)
 	rmb(); // ensure txubr_pending is up to date
 	if (queue->txubr_pending) {
 		queue->txubr_pending = false;
-		netdev_vdbg(bp->dev, "poll: tx restart\n");
+		netdev_vdbg(bp->netdev, "poll: tx restart\n");
 		macb_tx_restart(queue);
 	}
 
-	netdev_vdbg(bp->dev, "TX poll: queue = %u, work_done = %d, budget = %d\n",
+	netdev_vdbg(bp->netdev, "TX poll: queue = %u, work_done = %d, budget = %d\n",
 		    (unsigned int)(queue - bp->queues), work_done, budget);
 
 	if (work_done < budget && napi_complete_done(napi, work_done)) {
@@ -1979,7 +1979,7 @@ static int macb_tx_poll(struct napi_struct *napi, int budget)
 		if (macb_tx_complete_pending(queue)) {
 			queue_writel(queue, IDR, MACB_BIT(TCOMP));
 			macb_queue_isr_clear(bp, queue, MACB_BIT(TCOMP));
-			netdev_vdbg(bp->dev, "TX poll: packets pending, reschedule\n");
+			netdev_vdbg(bp->netdev, "TX poll: packets pending, reschedule\n");
 			napi_schedule(napi);
 		}
 	}
@@ -1990,7 +1990,7 @@ static int macb_tx_poll(struct napi_struct *napi, int budget)
 static void macb_hresp_error_task(struct work_struct *work)
 {
 	struct macb *bp = from_work(bp, work, hresp_err_bh_work);
-	struct net_device *dev = bp->dev;
+	struct net_device *netdev = bp->netdev;
 	struct macb_queue *queue;
 	unsigned int q;
 	u32 ctrl;
@@ -2004,8 +2004,8 @@ static void macb_hresp_error_task(struct work_struct *work)
 	ctrl &= ~(MACB_BIT(RE) | MACB_BIT(TE));
 	macb_writel(bp, NCR, ctrl);
 
-	netif_tx_stop_all_queues(dev);
-	netif_carrier_off(dev);
+	netif_tx_stop_all_queues(netdev);
+	netif_carrier_off(netdev);
 
 	bp->macbgem_ops.mog_init_rings(bp);
 
@@ -2022,8 +2022,8 @@ static void macb_hresp_error_task(struct work_struct *work)
 	ctrl |= MACB_BIT(RE) | MACB_BIT(TE);
 	macb_writel(bp, NCR, ctrl);
 
-	netif_carrier_on(dev);
-	netif_tx_start_all_queues(dev);
+	netif_carrier_on(netdev);
+	netif_tx_start_all_queues(netdev);
 }
 
 static void macb_wol_interrupt(struct macb_queue *queue, u32 status)
@@ -2032,7 +2032,7 @@ static void macb_wol_interrupt(struct macb_queue *queue, u32 status)
 
 	queue_writel(queue, IDR, MACB_BIT(WOL));
 	macb_writel(bp, WOL, 0);
-	netdev_vdbg(bp->dev, "MACB WoL: queue = %u, isr = 0x%08lx\n",
+	netdev_vdbg(bp->netdev, "MACB WoL: queue = %u, isr = 0x%08lx\n",
 		    (unsigned int)(queue - bp->queues),
 		    (unsigned long)status);
 	macb_queue_isr_clear(bp, queue, MACB_BIT(WOL));
@@ -2045,7 +2045,7 @@ static void gem_wol_interrupt(struct macb_queue *queue, u32 status)
 
 	queue_writel(queue, IDR, GEM_BIT(WOL));
 	gem_writel(bp, WOL, 0);
-	netdev_vdbg(bp->dev, "GEM WoL: queue = %u, isr = 0x%08lx\n",
+	netdev_vdbg(bp->netdev, "GEM WoL: queue = %u, isr = 0x%08lx\n",
 		    (unsigned int)(queue - bp->queues),
 		    (unsigned long)status);
 	macb_queue_isr_clear(bp, queue, GEM_BIT(WOL));
@@ -2055,10 +2055,10 @@ static void gem_wol_interrupt(struct macb_queue *queue, u32 status)
 static int macb_interrupt_misc(struct macb_queue *queue, u32 status)
 {
 	struct macb *bp = queue->bp;
-	struct net_device *dev;
+	struct net_device *netdev;
 	u32 ctrl;
 
-	dev = bp->dev;
+	netdev = bp->netdev;
 
 	if (unlikely(status & (MACB_TX_ERR_FLAGS))) {
 		queue_writel(queue, IDR, MACB_TX_INT_FLAGS);
@@ -2099,7 +2099,7 @@ static int macb_interrupt_misc(struct macb_queue *queue, u32 status)
 
 	if (status & MACB_BIT(HRESP)) {
 		queue_work(system_bh_wq, &bp->hresp_err_bh_work);
-		netdev_err(dev, "DMA bus error: HRESP not OK\n");
+		netdev_err(netdev, "DMA bus error: HRESP not OK\n");
 		macb_queue_isr_clear(bp, queue, MACB_BIT(HRESP));
 	}
 
@@ -2118,7 +2118,7 @@ static irqreturn_t macb_interrupt(int irq, void *dev_id)
 {
 	struct macb_queue *queue = dev_id;
 	struct macb *bp = queue->bp;
-	struct net_device *dev = bp->dev;
+	struct net_device *netdev = bp->netdev;
 	u32 status;
 
 	status = queue_readl(queue, ISR);
@@ -2130,13 +2130,13 @@ static irqreturn_t macb_interrupt(int irq, void *dev_id)
 
 	while (status) {
 		/* close possible race with dev_close */
-		if (unlikely(!netif_running(dev))) {
+		if (unlikely(!netif_running(netdev))) {
 			queue_writel(queue, IDR, -1);
 			macb_queue_isr_clear(bp, queue, -1);
 			break;
 		}
 
-		netdev_vdbg(bp->dev, "queue = %u, isr = 0x%08lx\n",
+		netdev_vdbg(netdev, "queue = %u, isr = 0x%08lx\n",
 			    (unsigned int)(queue - bp->queues),
 			    (unsigned long)status);
 
@@ -2181,16 +2181,16 @@ static irqreturn_t macb_interrupt(int irq, void *dev_id)
 /* Polling receive - used by netconsole and other diagnostic tools
  * to allow network i/o with interrupts disabled.
  */
-static void macb_poll_controller(struct net_device *dev)
+static void macb_poll_controller(struct net_device *netdev)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 	struct macb_queue *queue;
 	unsigned long flags;
 	unsigned int q;
 
 	local_irq_save(flags);
 	for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue)
-		macb_interrupt(dev->irq, queue);
+		macb_interrupt(netdev->irq, queue);
 	local_irq_restore(flags);
 }
 #endif
@@ -2277,7 +2277,7 @@ static unsigned int macb_tx_map(struct macb *bp,
 
 	/* Should never happen */
 	if (unlikely(!tx_skb)) {
-		netdev_err(bp->dev, "BUG! empty skb!\n");
+		netdev_err(bp->netdev, "BUG! empty skb!\n");
 		return 0;
 	}
 
@@ -2328,7 +2328,7 @@ static unsigned int macb_tx_map(struct macb *bp,
 		if (i == queue->tx_head) {
 			ctrl |= MACB_BF(TX_LSO, lso_ctrl);
 			ctrl |= MACB_BF(TX_TCP_SEQ_SRC, seq_ctrl);
-			if ((bp->dev->features & NETIF_F_HW_CSUM) &&
+			if ((bp->netdev->features & NETIF_F_HW_CSUM) &&
 			    skb->ip_summed != CHECKSUM_PARTIAL && !lso_ctrl &&
 			    !ptp_one_step_sync(skb))
 				ctrl |= MACB_BIT(TX_NOCRC);
@@ -2352,7 +2352,7 @@ static unsigned int macb_tx_map(struct macb *bp,
 	return 0;
 
 dma_error:
-	netdev_err(bp->dev, "TX DMA map failed\n");
+	netdev_err(bp->netdev, "TX DMA map failed\n");
 
 	for (i = queue->tx_head; i != tx_head; i++) {
 		tx_skb = macb_tx_skb(queue, i);
@@ -2364,7 +2364,7 @@ static unsigned int macb_tx_map(struct macb *bp,
 }
 
 static netdev_features_t macb_features_check(struct sk_buff *skb,
-					     struct net_device *dev,
+					     struct net_device *netdev,
 					     netdev_features_t features)
 {
 	unsigned int nr_frags, f;
@@ -2416,7 +2416,7 @@ static inline int macb_clear_csum(struct sk_buff *skb)
 	return 0;
 }
 
-static int macb_pad_and_fcs(struct sk_buff **skb, struct net_device *ndev)
+static int macb_pad_and_fcs(struct sk_buff **skb, struct net_device *netdev)
 {
 	bool cloned = skb_cloned(*skb) || skb_header_cloned(*skb) ||
 		      skb_is_nonlinear(*skb);
@@ -2425,7 +2425,7 @@ static int macb_pad_and_fcs(struct sk_buff **skb, struct net_device *ndev)
 	struct sk_buff *nskb;
 	u32 fcs;
 
-	if (!(ndev->features & NETIF_F_HW_CSUM) ||
+	if (!(netdev->features & NETIF_F_HW_CSUM) ||
 	    !((*skb)->ip_summed != CHECKSUM_PARTIAL) ||
 	    skb_shinfo(*skb)->gso_size || ptp_one_step_sync(*skb))
 		return 0;
@@ -2467,10 +2467,11 @@ static int macb_pad_and_fcs(struct sk_buff **skb, struct net_device *ndev)
 	return 0;
 }
 
-static netdev_tx_t macb_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t macb_start_xmit(struct sk_buff *skb,
+				   struct net_device *netdev)
 {
 	u16 queue_index = skb_get_queue_mapping(skb);
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 	struct macb_queue *queue = &bp->queues[queue_index];
 	unsigned int desc_cnt, nr_frags, frag_size, f;
 	unsigned int hdrlen;
@@ -2483,7 +2484,7 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		return ret;
 	}
 
-	if (macb_pad_and_fcs(&skb, dev)) {
+	if (macb_pad_and_fcs(&skb, netdev)) {
 		dev_kfree_skb_any(skb);
 		return ret;
 	}
@@ -2502,7 +2503,7 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		else
 			hdrlen = skb_tcp_all_headers(skb);
 		if (skb_headlen(skb) < hdrlen) {
-			netdev_err(bp->dev, "Error - LSO headers fragmented!!!\n");
+			netdev_err(bp->netdev, "Error - LSO headers fragmented!!!\n");
 			/* if this is required, would need to copy to single buffer */
 			return NETDEV_TX_BUSY;
 		}
@@ -2510,7 +2511,7 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		hdrlen = umin(skb_headlen(skb), bp->max_tx_length);
 
 #if defined(DEBUG) && defined(VERBOSE_DEBUG)
-	netdev_vdbg(bp->dev,
+	netdev_vdbg(bp->netdev,
 		    "start_xmit: queue %hu len %u head %p data %p tail %p end %p\n",
 		    queue_index, skb->len, skb->head, skb->data,
 		    skb_tail_pointer(skb), skb_end_pointer(skb));
@@ -2538,8 +2539,8 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	/* This is a hard error, log it. */
 	if (CIRC_SPACE(queue->tx_head, queue->tx_tail,
 		       bp->tx_ring_size) < desc_cnt) {
-		netif_stop_subqueue(dev, queue_index);
-		netdev_dbg(bp->dev, "tx_head = %u, tx_tail = %u\n",
+		netif_stop_subqueue(netdev, queue_index);
+		netdev_dbg(netdev, "tx_head = %u, tx_tail = %u\n",
 			   queue->tx_head, queue->tx_tail);
 		ret = NETDEV_TX_BUSY;
 		goto unlock;
@@ -2554,7 +2555,7 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	/* Make newly initialized descriptor visible to hardware */
 	wmb();
 	skb_tx_timestamp(skb);
-	netdev_tx_sent_queue(netdev_get_tx_queue(bp->dev, queue_index),
+	netdev_tx_sent_queue(netdev_get_tx_queue(bp->netdev, queue_index),
 			     skb->len);
 
 	spin_lock(&bp->lock);
@@ -2563,7 +2564,7 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	spin_unlock(&bp->lock);
 
 	if (CIRC_SPACE(queue->tx_head, queue->tx_tail, bp->tx_ring_size) < 1)
-		netif_stop_subqueue(dev, queue_index);
+		netif_stop_subqueue(netdev, queue_index);
 
 unlock:
 	spin_unlock_irqrestore(&queue->tx_ptr_lock, flags);
@@ -2579,7 +2580,7 @@ static void macb_init_rx_buffer_size(struct macb *bp, size_t size)
 		bp->rx_buffer_size = MIN(size, RX_BUFFER_MAX);
 
 		if (bp->rx_buffer_size % RX_BUFFER_MULTIPLE) {
-			netdev_dbg(bp->dev,
+			netdev_dbg(bp->netdev,
 				   "RX buffer must be multiple of %d bytes, expanding\n",
 				   RX_BUFFER_MULTIPLE);
 			bp->rx_buffer_size =
@@ -2587,8 +2588,8 @@ static void macb_init_rx_buffer_size(struct macb *bp, size_t size)
 		}
 	}
 
-	netdev_dbg(bp->dev, "mtu [%u] rx_buffer_size [%zu]\n",
-		   bp->dev->mtu, bp->rx_buffer_size);
+	netdev_dbg(bp->netdev, "mtu [%u] rx_buffer_size [%zu]\n",
+		   bp->netdev->mtu, bp->rx_buffer_size);
 }
 
 static void gem_free_rx_buffers(struct macb *bp)
@@ -2687,7 +2688,7 @@ static int gem_alloc_rx_buffers(struct macb *bp)
 		if (!queue->rx_skbuff)
 			return -ENOMEM;
 		else
-			netdev_dbg(bp->dev,
+			netdev_dbg(bp->netdev,
 				   "Allocated %d RX struct sk_buff entries at %p\n",
 				   bp->rx_ring_size, queue->rx_skbuff);
 	}
@@ -2705,7 +2706,7 @@ static int macb_alloc_rx_buffers(struct macb *bp)
 	if (!queue->rx_buffers)
 		return -ENOMEM;
 
-	netdev_dbg(bp->dev,
+	netdev_dbg(bp->netdev,
 		   "Allocated RX buffers of %d bytes at %08lx (mapped %p)\n",
 		   size, (unsigned long)queue->rx_buffers_dma, queue->rx_buffers);
 	return 0;
@@ -2731,14 +2732,14 @@ static int macb_alloc_consistent(struct macb *bp)
 	tx = dma_alloc_coherent(dev, size, &tx_dma, GFP_KERNEL);
 	if (!tx || upper_32_bits(tx_dma) != upper_32_bits(tx_dma + size - 1))
 		goto out_err;
-	netdev_dbg(bp->dev, "Allocated %zu bytes for %u TX rings at %08lx (mapped %p)\n",
+	netdev_dbg(bp->netdev, "Allocated %zu bytes for %u TX rings at %08lx (mapped %p)\n",
 		   size, bp->num_queues, (unsigned long)tx_dma, tx);
 
 	size = bp->num_queues * macb_rx_ring_size_per_queue(bp);
 	rx = dma_alloc_coherent(dev, size, &rx_dma, GFP_KERNEL);
 	if (!rx || upper_32_bits(rx_dma) != upper_32_bits(rx_dma + size - 1))
 		goto out_err;
-	netdev_dbg(bp->dev, "Allocated %zu bytes for %u RX rings at %08lx (mapped %p)\n",
+	netdev_dbg(bp->netdev, "Allocated %zu bytes for %u RX rings at %08lx (mapped %p)\n",
 		   size, bp->num_queues, (unsigned long)rx_dma, rx);
 
 	for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
@@ -2966,7 +2967,7 @@ static void macb_configure_dma(struct macb *bp)
 		else
 			dmacfg |= GEM_BIT(ENDIA_DESC); /* CPU in big endian */
 
-		if (bp->dev->features & NETIF_F_HW_CSUM)
+		if (bp->netdev->features & NETIF_F_HW_CSUM)
 			dmacfg |= GEM_BIT(TXCOEN);
 		else
 			dmacfg &= ~GEM_BIT(TXCOEN);
@@ -2976,7 +2977,7 @@ static void macb_configure_dma(struct macb *bp)
 			dmacfg |= GEM_BIT(ADDR64);
 		if (macb_dma_ptp(bp))
 			dmacfg |= GEM_BIT(RXEXT) | GEM_BIT(TXEXT);
-		netdev_dbg(bp->dev, "Cadence configure DMA with 0x%08x\n",
+		netdev_dbg(bp->netdev, "Cadence configure DMA with 0x%08x\n",
 			   dmacfg);
 		gem_writel(bp, DMACFG, dmacfg);
 	}
@@ -3000,11 +3001,11 @@ static void macb_init_hw(struct macb *bp)
 		config |= MACB_BIT(JFRAME);	/* Enable jumbo frames */
 	else
 		config |= MACB_BIT(BIG);	/* Receive oversized frames */
-	if (bp->dev->flags & IFF_PROMISC)
+	if (bp->netdev->flags & IFF_PROMISC)
 		config |= MACB_BIT(CAF);	/* Copy All Frames */
-	else if (macb_is_gem(bp) && bp->dev->features & NETIF_F_RXCSUM)
+	else if (macb_is_gem(bp) && bp->netdev->features & NETIF_F_RXCSUM)
 		config |= GEM_BIT(RXCOEN);
-	if (!(bp->dev->flags & IFF_BROADCAST))
+	if (!(bp->netdev->flags & IFF_BROADCAST))
 		config |= MACB_BIT(NBC);	/* No BroadCast */
 	config |= macb_dbw(bp);
 	macb_writel(bp, NCFGR, config);
@@ -3078,17 +3079,17 @@ static int hash_get_index(__u8 *addr)
 }
 
 /* Add multicast addresses to the internal multicast-hash table. */
-static void macb_sethashtable(struct net_device *dev)
+static void macb_sethashtable(struct net_device *netdev)
 {
 	struct netdev_hw_addr *ha;
 	unsigned long mc_filter[2];
 	unsigned int bitnr;
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 
 	mc_filter[0] = 0;
 	mc_filter[1] = 0;
 
-	netdev_for_each_mc_addr(ha, dev) {
+	netdev_for_each_mc_addr(ha, netdev) {
 		bitnr = hash_get_index(ha->addr);
 		mc_filter[bitnr >> 5] |= 1 << (bitnr & 31);
 	}
@@ -3098,14 +3099,14 @@ static void macb_sethashtable(struct net_device *dev)
 }
 
 /* Enable/Disable promiscuous and multicast modes. */
-static void macb_set_rx_mode(struct net_device *dev)
+static void macb_set_rx_mode(struct net_device *netdev)
 {
 	unsigned long cfg;
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 
 	cfg = macb_readl(bp, NCFGR);
 
-	if (dev->flags & IFF_PROMISC) {
+	if (netdev->flags & IFF_PROMISC) {
 		/* Enable promiscuous mode */
 		cfg |= MACB_BIT(CAF);
 
@@ -3117,20 +3118,20 @@ static void macb_set_rx_mode(struct net_device *dev)
 		cfg &= ~MACB_BIT(CAF);
 
 		/* Enable RX checksum offload only if requested */
-		if (macb_is_gem(bp) && dev->features & NETIF_F_RXCSUM)
+		if (macb_is_gem(bp) && netdev->features & NETIF_F_RXCSUM)
 			cfg |= GEM_BIT(RXCOEN);
 	}
 
-	if (dev->flags & IFF_ALLMULTI) {
+	if (netdev->flags & IFF_ALLMULTI) {
 		/* Enable all multicast mode */
 		macb_or_gem_writel(bp, HRB, -1);
 		macb_or_gem_writel(bp, HRT, -1);
 		cfg |= MACB_BIT(NCFGR_MTI);
-	} else if (!netdev_mc_empty(dev)) {
+	} else if (!netdev_mc_empty(netdev)) {
 		/* Enable specific multicasts */
-		macb_sethashtable(dev);
+		macb_sethashtable(netdev);
 		cfg |= MACB_BIT(NCFGR_MTI);
-	} else if (dev->flags & (~IFF_ALLMULTI)) {
+	} else if (netdev->flags & (~IFF_ALLMULTI)) {
 		/* Disable all multicast mode */
 		macb_or_gem_writel(bp, HRB, 0);
 		macb_or_gem_writel(bp, HRT, 0);
@@ -3140,15 +3141,15 @@ static void macb_set_rx_mode(struct net_device *dev)
 	macb_writel(bp, NCFGR, cfg);
 }
 
-static int macb_open(struct net_device *dev)
+static int macb_open(struct net_device *netdev)
 {
-	size_t bufsz = dev->mtu + ETH_HLEN + ETH_FCS_LEN + NET_IP_ALIGN;
-	struct macb *bp = netdev_priv(dev);
+	size_t bufsz = netdev->mtu + ETH_HLEN + ETH_FCS_LEN + NET_IP_ALIGN;
+	struct macb *bp = netdev_priv(netdev);
 	struct macb_queue *queue;
 	unsigned int q;
 	int err;
 
-	netdev_dbg(bp->dev, "open\n");
+	netdev_dbg(bp->netdev, "open\n");
 
 	err = pm_runtime_resume_and_get(&bp->pdev->dev);
 	if (err < 0)
@@ -3159,7 +3160,7 @@ static int macb_open(struct net_device *dev)
 
 	err = macb_alloc_consistent(bp);
 	if (err) {
-		netdev_err(dev, "Unable to allocate DMA memory (error %d)\n",
+		netdev_err(netdev, "Unable to allocate DMA memory (error %d)\n",
 			   err);
 		goto pm_exit;
 	}
@@ -3186,10 +3187,10 @@ static int macb_open(struct net_device *dev)
 	if (err)
 		goto phy_off;
 
-	netif_tx_start_all_queues(dev);
+	netif_tx_start_all_queues(netdev);
 
 	if (bp->ptp_info)
-		bp->ptp_info->ptp_init(dev);
+		bp->ptp_info->ptp_init(netdev);
 
 	return 0;
 
@@ -3208,19 +3209,19 @@ static int macb_open(struct net_device *dev)
 	return err;
 }
 
-static int macb_close(struct net_device *dev)
+static int macb_close(struct net_device *netdev)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 	struct macb_queue *queue;
 	unsigned long flags;
 	unsigned int q;
 
-	netif_tx_stop_all_queues(dev);
+	netif_tx_stop_all_queues(netdev);
 
 	for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
 		napi_disable(&queue->napi_rx);
 		napi_disable(&queue->napi_tx);
-		netdev_tx_reset_queue(netdev_get_tx_queue(dev, q));
+		netdev_tx_reset_queue(netdev_get_tx_queue(netdev, q));
 	}
 
 	cancel_delayed_work_sync(&bp->tx_lpi_work);
@@ -3232,38 +3233,38 @@ static int macb_close(struct net_device *dev)
 
 	spin_lock_irqsave(&bp->lock, flags);
 	macb_reset_hw(bp);
-	netif_carrier_off(dev);
+	netif_carrier_off(netdev);
 	spin_unlock_irqrestore(&bp->lock, flags);
 
 	macb_free_consistent(bp);
 
 	if (bp->ptp_info)
-		bp->ptp_info->ptp_remove(dev);
+		bp->ptp_info->ptp_remove(netdev);
 
 	pm_runtime_put(&bp->pdev->dev);
 
 	return 0;
 }
 
-static int macb_change_mtu(struct net_device *dev, int new_mtu)
+static int macb_change_mtu(struct net_device *netdev, int new_mtu)
 {
-	if (netif_running(dev))
+	if (netif_running(netdev))
 		return -EBUSY;
 
-	WRITE_ONCE(dev->mtu, new_mtu);
+	WRITE_ONCE(netdev->mtu, new_mtu);
 
 	return 0;
 }
 
-static int macb_set_mac_addr(struct net_device *dev, void *addr)
+static int macb_set_mac_addr(struct net_device *netdev, void *addr)
 {
 	int err;
 
-	err = eth_mac_addr(dev, addr);
+	err = eth_mac_addr(netdev, addr);
 	if (err < 0)
 		return err;
 
-	macb_set_hwaddr(netdev_priv(dev));
+	macb_set_hwaddr(netdev_priv(netdev));
 	return 0;
 }
 
@@ -3301,7 +3302,7 @@ static void gem_get_stats(struct macb *bp, struct rtnl_link_stats64 *nstat)
 	struct gem_stats *hwstat = &bp->hw_stats.gem;
 
 	spin_lock_irq(&bp->stats_lock);
-	if (netif_running(bp->dev))
+	if (netif_running(bp->netdev))
 		gem_update_stats(bp);
 
 	nstat->rx_errors = (hwstat->rx_frame_check_sequence_errors +
@@ -3334,10 +3335,10 @@ static void gem_get_stats(struct macb *bp, struct rtnl_link_stats64 *nstat)
 	spin_unlock_irq(&bp->stats_lock);
 }
 
-static void gem_get_ethtool_stats(struct net_device *dev,
+static void gem_get_ethtool_stats(struct net_device *netdev,
 				  struct ethtool_stats *stats, u64 *data)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 
 	spin_lock_irq(&bp->stats_lock);
 	gem_update_stats(bp);
@@ -3346,9 +3347,9 @@ static void gem_get_ethtool_stats(struct net_device *dev,
 	spin_unlock_irq(&bp->stats_lock);
 }
 
-static int gem_get_sset_count(struct net_device *dev, int sset)
+static int gem_get_sset_count(struct net_device *netdev, int sset)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 
 	switch (sset) {
 	case ETH_SS_STATS:
@@ -3358,9 +3359,9 @@ static int gem_get_sset_count(struct net_device *dev, int sset)
 	}
 }
 
-static void gem_get_ethtool_strings(struct net_device *dev, u32 sset, u8 *p)
+static void gem_get_ethtool_strings(struct net_device *netdev, u32 sset, u8 *p)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 	struct macb_queue *queue;
 	unsigned int i;
 	unsigned int q;
@@ -3379,13 +3380,13 @@ static void gem_get_ethtool_strings(struct net_device *dev, u32 sset, u8 *p)
 	}
 }
 
-static void macb_get_stats(struct net_device *dev,
+static void macb_get_stats(struct net_device *netdev,
 			   struct rtnl_link_stats64 *nstat)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 	struct macb_stats *hwstat = &bp->hw_stats.macb;
 
-	netdev_stats_to_stats64(nstat, &bp->dev->stats);
+	netdev_stats_to_stats64(nstat, &bp->netdev->stats);
 	if (macb_is_gem(bp)) {
 		gem_get_stats(bp, nstat);
 		return;
@@ -3429,10 +3430,10 @@ static void macb_get_stats(struct net_device *dev,
 	spin_unlock_irq(&bp->stats_lock);
 }
 
-static void macb_get_pause_stats(struct net_device *dev,
+static void macb_get_pause_stats(struct net_device *netdev,
 				 struct ethtool_pause_stats *pause_stats)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 	struct macb_stats *hwstat = &bp->hw_stats.macb;
 
 	spin_lock_irq(&bp->stats_lock);
@@ -3442,10 +3443,10 @@ static void macb_get_pause_stats(struct net_device *dev,
 	spin_unlock_irq(&bp->stats_lock);
 }
 
-static void gem_get_pause_stats(struct net_device *dev,
+static void gem_get_pause_stats(struct net_device *netdev,
 				struct ethtool_pause_stats *pause_stats)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 	struct gem_stats *hwstat = &bp->hw_stats.gem;
 
 	spin_lock_irq(&bp->stats_lock);
@@ -3455,10 +3456,10 @@ static void gem_get_pause_stats(struct net_device *dev,
 	spin_unlock_irq(&bp->stats_lock);
 }
 
-static void macb_get_eth_mac_stats(struct net_device *dev,
+static void macb_get_eth_mac_stats(struct net_device *netdev,
 				   struct ethtool_eth_mac_stats *mac_stats)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 	struct macb_stats *hwstat = &bp->hw_stats.macb;
 
 	spin_lock_irq(&bp->stats_lock);
@@ -3480,10 +3481,10 @@ static void macb_get_eth_mac_stats(struct net_device *dev,
 	spin_unlock_irq(&bp->stats_lock);
 }
 
-static void gem_get_eth_mac_stats(struct net_device *dev,
+static void gem_get_eth_mac_stats(struct net_device *netdev,
 				  struct ethtool_eth_mac_stats *mac_stats)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 	struct gem_stats *hwstat = &bp->hw_stats.gem;
 
 	spin_lock_irq(&bp->stats_lock);
@@ -3513,10 +3514,10 @@ static void gem_get_eth_mac_stats(struct net_device *dev,
 }
 
 /* TODO: Report SQE test errors when added to phy_stats */
-static void macb_get_eth_phy_stats(struct net_device *dev,
+static void macb_get_eth_phy_stats(struct net_device *netdev,
 				   struct ethtool_eth_phy_stats *phy_stats)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 	struct macb_stats *hwstat = &bp->hw_stats.macb;
 
 	spin_lock_irq(&bp->stats_lock);
@@ -3525,10 +3526,10 @@ static void macb_get_eth_phy_stats(struct net_device *dev,
 	spin_unlock_irq(&bp->stats_lock);
 }
 
-static void gem_get_eth_phy_stats(struct net_device *dev,
+static void gem_get_eth_phy_stats(struct net_device *netdev,
 				  struct ethtool_eth_phy_stats *phy_stats)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 	struct gem_stats *hwstat = &bp->hw_stats.gem;
 
 	spin_lock_irq(&bp->stats_lock);
@@ -3537,11 +3538,11 @@ static void gem_get_eth_phy_stats(struct net_device *dev,
 	spin_unlock_irq(&bp->stats_lock);
 }
 
-static void macb_get_rmon_stats(struct net_device *dev,
+static void macb_get_rmon_stats(struct net_device *netdev,
 				struct ethtool_rmon_stats *rmon_stats,
 				const struct ethtool_rmon_hist_range **ranges)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 	struct macb_stats *hwstat = &bp->hw_stats.macb;
 
 	spin_lock_irq(&bp->stats_lock);
@@ -3563,11 +3564,11 @@ static const struct ethtool_rmon_hist_range gem_rmon_ranges[] = {
 	{ },
 };
 
-static void gem_get_rmon_stats(struct net_device *dev,
+static void gem_get_rmon_stats(struct net_device *netdev,
 			       struct ethtool_rmon_stats *rmon_stats,
 			       const struct ethtool_rmon_hist_range **ranges)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 	struct gem_stats *hwstat = &bp->hw_stats.gem;
 
 	spin_lock_irq(&bp->stats_lock);
@@ -3598,10 +3599,10 @@ static int macb_get_regs_len(struct net_device *netdev)
 	return MACB_GREGS_NBR * sizeof(u32);
 }
 
-static void macb_get_regs(struct net_device *dev, struct ethtool_regs *regs,
+static void macb_get_regs(struct net_device *netdev, struct ethtool_regs *regs,
 			  void *p)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 	unsigned int tail, head;
 	u32 *regs_buff = p;
 
@@ -3718,16 +3719,16 @@ static int macb_set_ringparam(struct net_device *netdev,
 		return 0;
 	}
 
-	if (netif_running(bp->dev)) {
+	if (netif_running(bp->netdev)) {
 		reset = 1;
-		macb_close(bp->dev);
+		macb_close(bp->netdev);
 	}
 
 	bp->rx_ring_size = new_rx_size;
 	bp->tx_ring_size = new_tx_size;
 
 	if (reset)
-		macb_open(bp->dev);
+		macb_open(bp->netdev);
 
 	return 0;
 }
@@ -3754,13 +3755,13 @@ static s32 gem_get_ptp_max_adj(void)
 	return 64000000;
 }
 
-static int gem_get_ts_info(struct net_device *dev,
+static int gem_get_ts_info(struct net_device *netdev,
 			   struct kernel_ethtool_ts_info *info)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 
 	if (!macb_dma_ptp(bp)) {
-		ethtool_op_get_ts_info(dev, info);
+		ethtool_op_get_ts_info(netdev, info);
 		return 0;
 	}
 
@@ -3807,7 +3808,7 @@ static int macb_get_ts_info(struct net_device *netdev,
 
 static void gem_enable_flow_filters(struct macb *bp, bool enable)
 {
-	struct net_device *netdev = bp->dev;
+	struct net_device *netdev = bp->netdev;
 	struct ethtool_rx_fs_item *item;
 	u32 t2_scr;
 	int num_t2_scr;
@@ -4137,16 +4138,16 @@ static const struct ethtool_ops macb_ethtool_ops = {
 	.set_ringparam		= macb_set_ringparam,
 };
 
-static int macb_get_eee(struct net_device *dev, struct ethtool_keee *eee)
+static int macb_get_eee(struct net_device *netdev, struct ethtool_keee *eee)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 
 	return phylink_ethtool_get_eee(bp->phylink, eee);
 }
 
-static int macb_set_eee(struct net_device *dev, struct ethtool_keee *eee)
+static int macb_set_eee(struct net_device *netdev, struct ethtool_keee *eee)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 
 	return phylink_ethtool_set_eee(bp->phylink, eee);
 }
@@ -4177,43 +4178,43 @@ static const struct ethtool_ops gem_ethtool_ops = {
 	.set_eee		= macb_set_eee,
 };
 
-static int macb_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
+static int macb_ioctl(struct net_device *netdev, struct ifreq *rq, int cmd)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 
-	if (!netif_running(dev))
+	if (!netif_running(netdev))
 		return -EINVAL;
 
 	return phylink_mii_ioctl(bp->phylink, rq, cmd);
 }
 
-static int macb_hwtstamp_get(struct net_device *dev,
+static int macb_hwtstamp_get(struct net_device *netdev,
 			     struct kernel_hwtstamp_config *cfg)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 
-	if (!netif_running(dev))
+	if (!netif_running(netdev))
 		return -EINVAL;
 
 	if (!bp->ptp_info)
 		return -EOPNOTSUPP;
 
-	return bp->ptp_info->get_hwtst(dev, cfg);
+	return bp->ptp_info->get_hwtst(netdev, cfg);
 }
 
-static int macb_hwtstamp_set(struct net_device *dev,
+static int macb_hwtstamp_set(struct net_device *netdev,
 			     struct kernel_hwtstamp_config *cfg,
 			     struct netlink_ext_ack *extack)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 
-	if (!netif_running(dev))
+	if (!netif_running(netdev))
 		return -EINVAL;
 
 	if (!bp->ptp_info)
 		return -EOPNOTSUPP;
 
-	return bp->ptp_info->set_hwtst(dev, cfg, extack);
+	return bp->ptp_info->set_hwtst(netdev, cfg, extack);
 }
 
 static inline void macb_set_txcsum_feature(struct macb *bp,
@@ -4236,7 +4237,7 @@ static inline void macb_set_txcsum_feature(struct macb *bp,
 static inline void macb_set_rxcsum_feature(struct macb *bp,
 					   netdev_features_t features)
 {
-	struct net_device *netdev = bp->dev;
+	struct net_device *netdev = bp->netdev;
 	u32 val;
 
 	if (!macb_is_gem(bp))
@@ -4283,7 +4284,7 @@ static int macb_set_features(struct net_device *netdev,
 
 static void macb_restore_features(struct macb *bp)
 {
-	struct net_device *netdev = bp->dev;
+	struct net_device *netdev = bp->netdev;
 	netdev_features_t features = netdev->features;
 	struct ethtool_rx_fs_item *item;
 
@@ -4300,14 +4301,14 @@ static void macb_restore_features(struct macb *bp)
 	macb_set_rxflow_feature(bp, features);
 }
 
-static int macb_taprio_setup_replace(struct net_device *ndev,
+static int macb_taprio_setup_replace(struct net_device *netdev,
 				     struct tc_taprio_qopt_offload *conf)
 {
 	u64 total_on_time = 0, start_time_sec = 0, start_time = conf->base_time;
 	u32 configured_queues = 0, speed = 0, start_time_nsec;
 	struct macb_queue_enst_config *enst_queue;
 	struct tc_taprio_sched_entry *entry;
-	struct macb *bp = netdev_priv(ndev);
+	struct macb *bp = netdev_priv(netdev);
 	struct ethtool_link_ksettings kset;
 	struct macb_queue *queue;
 	u32 queue_mask;
@@ -4316,13 +4317,13 @@ static int macb_taprio_setup_replace(struct net_device *ndev,
 	int err;
 
 	if (conf->num_entries > bp->num_queues) {
-		netdev_err(ndev, "Too many TAPRIO entries: %zu > %d queues\n",
+		netdev_err(netdev, "Too many TAPRIO entries: %zu > %d queues\n",
 			   conf->num_entries, bp->num_queues);
 		return -EINVAL;
 	}
 
 	if (conf->base_time < 0) {
-		netdev_err(ndev, "Invalid base_time: must be 0 or positive, got %lld\n",
+		netdev_err(netdev, "Invalid base_time: must be 0 or positive, got %lld\n",
 			   conf->base_time);
 		return -ERANGE;
 	}
@@ -4330,13 +4331,13 @@ static int macb_taprio_setup_replace(struct net_device *ndev,
 	/* Get the current link speed */
 	err = phylink_ethtool_ksettings_get(bp->phylink, &kset);
 	if (unlikely(err)) {
-		netdev_err(ndev, "Failed to get link settings: %d\n", err);
+		netdev_err(netdev, "Failed to get link settings: %d\n", err);
 		return err;
 	}
 
 	speed = kset.base.speed;
 	if (unlikely(speed <= 0)) {
-		netdev_err(ndev, "Invalid speed: %d\n", speed);
+		netdev_err(netdev, "Invalid speed: %d\n", speed);
 		return -EINVAL;
 	}
 
@@ -4349,7 +4350,7 @@ static int macb_taprio_setup_replace(struct net_device *ndev,
 		entry = &conf->entries[i];
 
 		if (entry->command != TC_TAPRIO_CMD_SET_GATES) {
-			netdev_err(ndev, "Entry %zu: unsupported command %d\n",
+			netdev_err(netdev, "Entry %zu: unsupported command %d\n",
 				   i, entry->command);
 			err = -EOPNOTSUPP;
 			goto cleanup;
@@ -4357,7 +4358,7 @@ static int macb_taprio_setup_replace(struct net_device *ndev,
 
 		/* Validate gate_mask: must be nonzero, single queue, and within range */
 		if (!is_power_of_2(entry->gate_mask)) {
-			netdev_err(ndev, "Entry %zu: gate_mask 0x%x is not a power of 2 (only one queue per entry allowed)\n",
+			netdev_err(netdev, "Entry %zu: gate_mask 0x%x is not a power of 2 (only one queue per entry allowed)\n",
 				   i, entry->gate_mask);
 			err = -EINVAL;
 			goto cleanup;
@@ -4366,7 +4367,7 @@ static int macb_taprio_setup_replace(struct net_device *ndev,
 		/* gate_mask must not select queues outside the valid queues */
 		queue_id = order_base_2(entry->gate_mask);
 		if (queue_id >= bp->num_queues) {
-			netdev_err(ndev, "Entry %zu: gate_mask 0x%x exceeds queue range (max_queues=%d)\n",
+			netdev_err(netdev, "Entry %zu: gate_mask 0x%x exceeds queue range (max_queues=%d)\n",
 				   i, entry->gate_mask, bp->num_queues);
 			err = -EINVAL;
 			goto cleanup;
@@ -4376,7 +4377,7 @@ static int macb_taprio_setup_replace(struct net_device *ndev,
 		start_time_sec = start_time;
 		start_time_nsec = do_div(start_time_sec, NSEC_PER_SEC);
 		if (start_time_sec > GENMASK(GEM_START_TIME_SEC_SIZE - 1, 0)) {
-			netdev_err(ndev, "Entry %zu: Start time %llu s exceeds hardware limit\n",
+			netdev_err(netdev, "Entry %zu: Start time %llu s exceeds hardware limit\n",
 				   i, start_time_sec);
 			err = -ERANGE;
 			goto cleanup;
@@ -4384,7 +4385,7 @@ static int macb_taprio_setup_replace(struct net_device *ndev,
 
 		/* Check for on time limit */
 		if (entry->interval > enst_max_hw_interval(speed)) {
-			netdev_err(ndev, "Entry %zu: interval %u ns exceeds hardware limit %llu ns\n",
+			netdev_err(netdev, "Entry %zu: interval %u ns exceeds hardware limit %llu ns\n",
 				   i, entry->interval, enst_max_hw_interval(speed));
 			err = -ERANGE;
 			goto cleanup;
@@ -4392,7 +4393,7 @@ static int macb_taprio_setup_replace(struct net_device *ndev,
 
 		/* Check for off time limit*/
 		if ((conf->cycle_time - entry->interval) > enst_max_hw_interval(speed)) {
-			netdev_err(ndev, "Entry %zu: off_time %llu ns exceeds hardware limit %llu ns\n",
+			netdev_err(netdev, "Entry %zu: off_time %llu ns exceeds hardware limit %llu ns\n",
 				   i, conf->cycle_time - entry->interval,
 				   enst_max_hw_interval(speed));
 			err = -ERANGE;
@@ -4415,13 +4416,13 @@ static int macb_taprio_setup_replace(struct net_device *ndev,
 
 	/* Check total interval doesn't exceed cycle time */
 	if (total_on_time > conf->cycle_time) {
-		netdev_err(ndev, "Total ON %llu ns exceeds cycle time %llu ns\n",
+		netdev_err(netdev, "Total ON %llu ns exceeds cycle time %llu ns\n",
 			   total_on_time, conf->cycle_time);
 		err = -EINVAL;
 		goto cleanup;
 	}
 
-	netdev_dbg(ndev, "TAPRIO setup: %zu entries, base_time=%lld ns, cycle_time=%llu ns\n",
+	netdev_dbg(netdev, "TAPRIO setup: %zu entries, base_time=%lld ns, cycle_time=%llu ns\n",
 		   conf->num_entries, conf->base_time, conf->cycle_time);
 
 	/* All validations passed - proceed with hardware configuration */
@@ -4446,7 +4447,7 @@ static int macb_taprio_setup_replace(struct net_device *ndev,
 		gem_writel(bp, ENST_CONTROL, configured_queues);
 	}
 
-	netdev_info(ndev, "TAPRIO configuration completed successfully: %zu entries, %d queues configured\n",
+	netdev_info(netdev, "TAPRIO configuration completed successfully: %zu entries, %d queues configured\n",
 		    conf->num_entries, hweight32(configured_queues));
 
 cleanup:
@@ -4454,14 +4455,14 @@ static int macb_taprio_setup_replace(struct net_device *ndev,
 	return err;
 }
 
-static void macb_taprio_destroy(struct net_device *ndev)
+static void macb_taprio_destroy(struct net_device *netdev)
 {
-	struct macb *bp = netdev_priv(ndev);
+	struct macb *bp = netdev_priv(netdev);
 	struct macb_queue *queue;
 	u32 queue_mask;
 	unsigned int q;
 
-	netdev_reset_tc(ndev);
+	netdev_reset_tc(netdev);
 	queue_mask = BIT_U32(bp->num_queues) - 1;
 
 	scoped_guard(spinlock_irqsave, &bp->lock) {
@@ -4476,30 +4477,30 @@ static void macb_taprio_destroy(struct net_device *ndev)
 			queue_writel(queue, ENST_OFF_TIME, 0);
 		}
 	}
-	netdev_info(ndev, "TAPRIO destroy: All gates disabled\n");
+	netdev_info(netdev, "TAPRIO destroy: All gates disabled\n");
 }
 
-static int macb_setup_taprio(struct net_device *ndev,
+static int macb_setup_taprio(struct net_device *netdev,
 			     struct tc_taprio_qopt_offload *taprio)
 {
-	struct macb *bp = netdev_priv(ndev);
+	struct macb *bp = netdev_priv(netdev);
 	int err = 0;
 
-	if (unlikely(!(ndev->hw_features & NETIF_F_HW_TC)))
+	if (unlikely(!(netdev->hw_features & NETIF_F_HW_TC)))
 		return -EOPNOTSUPP;
 
 	/* Check if Device is in runtime suspend */
 	if (unlikely(pm_runtime_suspended(&bp->pdev->dev))) {
-		netdev_err(ndev, "Device is in runtime suspend\n");
+		netdev_err(netdev, "Device is in runtime suspend\n");
 		return -EOPNOTSUPP;
 	}
 
 	switch (taprio->cmd) {
 	case TAPRIO_CMD_REPLACE:
-		err = macb_taprio_setup_replace(ndev, taprio);
+		err = macb_taprio_setup_replace(netdev, taprio);
 		break;
 	case TAPRIO_CMD_DESTROY:
-		macb_taprio_destroy(ndev);
+		macb_taprio_destroy(netdev);
 		break;
 	default:
 		err = -EOPNOTSUPP;
@@ -4508,15 +4509,15 @@ static int macb_setup_taprio(struct net_device *ndev,
 	return err;
 }
 
-static int macb_setup_tc(struct net_device *dev, enum tc_setup_type type,
+static int macb_setup_tc(struct net_device *netdev, enum tc_setup_type type,
 			 void *type_data)
 {
-	if (!dev || !type_data)
+	if (!netdev || !type_data)
 		return -EINVAL;
 
 	switch (type) {
 	case TC_SETUP_QDISC_TAPRIO:
-		return macb_setup_taprio(dev, type_data);
+		return macb_setup_taprio(netdev, type_data);
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -4724,9 +4725,9 @@ static int macb_clk_init(struct platform_device *pdev, struct clk **pclk,
 
 static int macb_init_dflt(struct platform_device *pdev)
 {
-	struct net_device *dev = platform_get_drvdata(pdev);
+	struct net_device *netdev = platform_get_drvdata(pdev);
 	unsigned int hw_q, q;
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 	struct macb_queue *queue;
 	int err;
 	u32 val, reg;
@@ -4742,8 +4743,8 @@ static int macb_init_dflt(struct platform_device *pdev)
 		queue = &bp->queues[q];
 		queue->bp = bp;
 		spin_lock_init(&queue->tx_ptr_lock);
-		netif_napi_add(dev, &queue->napi_rx, macb_rx_poll);
-		netif_napi_add_tx(dev, &queue->napi_tx, macb_tx_poll);
+		netif_napi_add(netdev, &queue->napi_rx, macb_rx_poll);
+		netif_napi_add_tx(netdev, &queue->napi_tx, macb_tx_poll);
 		if (hw_q) {
 			queue->ISR  = GEM_ISR(hw_q - 1);
 			queue->IER  = GEM_IER(hw_q - 1);
@@ -4773,7 +4774,7 @@ static int macb_init_dflt(struct platform_device *pdev)
 		 */
 		queue->irq = platform_get_irq(pdev, q);
 		err = devm_request_irq(&pdev->dev, queue->irq, macb_interrupt,
-				       IRQF_SHARED, dev->name, queue);
+				       IRQF_SHARED, netdev->name, queue);
 		if (err) {
 			dev_err(&pdev->dev,
 				"Unable to request IRQ %d (error %d)\n",
@@ -4785,7 +4786,7 @@ static int macb_init_dflt(struct platform_device *pdev)
 		q++;
 	}
 
-	dev->netdev_ops = &macb_netdev_ops;
+	netdev->netdev_ops = &macb_netdev_ops;
 
 	/* setup appropriated routines according to adapter type */
 	if (macb_is_gem(bp)) {
@@ -4793,39 +4794,39 @@ static int macb_init_dflt(struct platform_device *pdev)
 		bp->macbgem_ops.mog_free_rx_buffers = gem_free_rx_buffers;
 		bp->macbgem_ops.mog_init_rings = gem_init_rings;
 		bp->macbgem_ops.mog_rx = gem_rx;
-		dev->ethtool_ops = &gem_ethtool_ops;
+		netdev->ethtool_ops = &gem_ethtool_ops;
 	} else {
 		bp->macbgem_ops.mog_alloc_rx_buffers = macb_alloc_rx_buffers;
 		bp->macbgem_ops.mog_free_rx_buffers = macb_free_rx_buffers;
 		bp->macbgem_ops.mog_init_rings = macb_init_rings;
 		bp->macbgem_ops.mog_rx = macb_rx;
-		dev->ethtool_ops = &macb_ethtool_ops;
+		netdev->ethtool_ops = &macb_ethtool_ops;
 	}
 
-	netdev_sw_irq_coalesce_default_on(dev);
+	netdev_sw_irq_coalesce_default_on(netdev);
 
-	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+	netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
 
 	/* Set features */
-	dev->hw_features = NETIF_F_SG;
+	netdev->hw_features = NETIF_F_SG;
 
 	/* Check LSO capability; runtime detection can be overridden by a cap
 	 * flag if the hardware is known to be buggy
 	 */
 	if (!(bp->caps & MACB_CAPS_NO_LSO) &&
 	    GEM_BFEXT(PBUF_LSO, gem_readl(bp, DCFG6)))
-		dev->hw_features |= MACB_NETIF_LSO;
+		netdev->hw_features |= MACB_NETIF_LSO;
 
 	/* Checksum offload is only available on gem with packet buffer */
 	if (macb_is_gem(bp) && !(bp->caps & MACB_CAPS_FIFO_MODE))
-		dev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_RXCSUM;
+		netdev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_RXCSUM;
 	if (bp->caps & MACB_CAPS_SG_DISABLED)
-		dev->hw_features &= ~NETIF_F_SG;
+		netdev->hw_features &= ~NETIF_F_SG;
 	/* Enable HW_TC if hardware supports QBV */
 	if (bp->caps & MACB_CAPS_QBV)
-		dev->hw_features |= NETIF_F_HW_TC;
+		netdev->hw_features |= NETIF_F_HW_TC;
 
-	dev->features = dev->hw_features;
+	netdev->features = netdev->hw_features;
 
 	/* Check RX Flow Filters support.
 	 * Max Rx flows set by availability of screeners & compare regs:
@@ -4843,7 +4844,7 @@ static int macb_init_dflt(struct platform_device *pdev)
 			reg = GEM_BFINS(ETHTCMP, (uint16_t)ETH_P_IP, reg);
 			gem_writel_n(bp, ETHT, SCRT2_ETHT, reg);
 			/* Filtering is supported in hw but don't enable it in kernel now */
-			dev->hw_features |= NETIF_F_NTUPLE;
+			netdev->hw_features |= NETIF_F_NTUPLE;
 			/* init Rx flow definitions */
 			bp->rx_fs_list.count = 0;
 			spin_lock_init(&bp->rx_fs_lock);
@@ -5053,9 +5054,9 @@ static void at91ether_stop(struct macb *lp)
 }
 
 /* Open the ethernet interface */
-static int at91ether_open(struct net_device *dev)
+static int at91ether_open(struct net_device *netdev)
 {
-	struct macb *lp = netdev_priv(dev);
+	struct macb *lp = netdev_priv(netdev);
 	u32 ctl;
 	int ret;
 
@@ -5077,7 +5078,7 @@ static int at91ether_open(struct net_device *dev)
 	if (ret)
 		goto stop;
 
-	netif_start_queue(dev);
+	netif_start_queue(netdev);
 
 	return 0;
 
@@ -5089,11 +5090,11 @@ static int at91ether_open(struct net_device *dev)
 }
 
 /* Close the interface */
-static int at91ether_close(struct net_device *dev)
+static int at91ether_close(struct net_device *netdev)
 {
-	struct macb *lp = netdev_priv(dev);
+	struct macb *lp = netdev_priv(netdev);
 
-	netif_stop_queue(dev);
+	netif_stop_queue(netdev);
 
 	phylink_stop(lp->phylink);
 	phylink_disconnect_phy(lp->phylink);
@@ -5107,14 +5108,14 @@ static int at91ether_close(struct net_device *dev)
 
 /* Transmit packet */
 static netdev_tx_t at91ether_start_xmit(struct sk_buff *skb,
-					struct net_device *dev)
+					struct net_device *netdev)
 {
-	struct macb *lp = netdev_priv(dev);
+	struct macb *lp = netdev_priv(netdev);
 
 	if (macb_readl(lp, TSR) & MACB_BIT(RM9200_BNQ)) {
 		int desc = 0;
 
-		netif_stop_queue(dev);
+		netif_stop_queue(netdev);
 
 		/* Store packet information (to free when Tx completed) */
 		lp->rm9200_txq[desc].skb = skb;
@@ -5123,8 +5124,8 @@ static netdev_tx_t at91ether_start_xmit(struct sk_buff *skb,
 							      skb->len, DMA_TO_DEVICE);
 		if (dma_mapping_error(&lp->pdev->dev, lp->rm9200_txq[desc].mapping)) {
 			dev_kfree_skb_any(skb);
-			dev->stats.tx_dropped++;
-			netdev_err(dev, "%s: DMA mapping error\n", __func__);
+			netdev->stats.tx_dropped++;
+			netdev_err(netdev, "%s: DMA mapping error\n", __func__);
 			return NETDEV_TX_OK;
 		}
 
@@ -5134,7 +5135,8 @@ static netdev_tx_t at91ether_start_xmit(struct sk_buff *skb,
 		macb_writel(lp, TCR, skb->len);
 
 	} else {
-		netdev_err(dev, "%s called, but device is busy!\n", __func__);
+		netdev_err(netdev, "%s called, but device is busy!\n",
+			   __func__);
 		return NETDEV_TX_BUSY;
 	}
 
@@ -5144,9 +5146,9 @@ static netdev_tx_t at91ether_start_xmit(struct sk_buff *skb,
 /* Extract received frame from buffer descriptors and sent to upper layers.
  * (Called from interrupt context)
  */
-static void at91ether_rx(struct net_device *dev)
+static void at91ether_rx(struct net_device *netdev)
 {
-	struct macb *lp = netdev_priv(dev);
+	struct macb *lp = netdev_priv(netdev);
 	struct macb_queue *q = &lp->queues[0];
 	struct macb_dma_desc *desc;
 	unsigned char *p_recv;
@@ -5157,21 +5159,21 @@ static void at91ether_rx(struct net_device *dev)
 	while (desc->addr & MACB_BIT(RX_USED)) {
 		p_recv = q->rx_buffers + q->rx_tail * AT91ETHER_MAX_RBUFF_SZ;
 		pktlen = MACB_BF(RX_FRMLEN, desc->ctrl);
-		skb = netdev_alloc_skb(dev, pktlen + 2);
+		skb = netdev_alloc_skb(netdev, pktlen + 2);
 		if (skb) {
 			skb_reserve(skb, 2);
 			skb_put_data(skb, p_recv, pktlen);
 
-			skb->protocol = eth_type_trans(skb, dev);
-			dev->stats.rx_packets++;
-			dev->stats.rx_bytes += pktlen;
+			skb->protocol = eth_type_trans(skb, netdev);
+			netdev->stats.rx_packets++;
+			netdev->stats.rx_bytes += pktlen;
 			netif_rx(skb);
 		} else {
-			dev->stats.rx_dropped++;
+			netdev->stats.rx_dropped++;
 		}
 
 		if (desc->ctrl & MACB_BIT(RX_MHASH_MATCH))
-			dev->stats.multicast++;
+			netdev->stats.multicast++;
 
 		/* reset ownership bit */
 		desc->addr &= ~MACB_BIT(RX_USED);
@@ -5189,8 +5191,8 @@ static void at91ether_rx(struct net_device *dev)
 /* MAC interrupt handler */
 static irqreturn_t at91ether_interrupt(int irq, void *dev_id)
 {
-	struct net_device *dev = dev_id;
-	struct macb *lp = netdev_priv(dev);
+	struct net_device *netdev = dev_id;
+	struct macb *lp = netdev_priv(netdev);
 	u32 intstatus, ctl;
 	unsigned int desc;
 
@@ -5201,13 +5203,13 @@ static irqreturn_t at91ether_interrupt(int irq, void *dev_id)
 
 	/* Receive complete */
 	if (intstatus & MACB_BIT(RCOMP))
-		at91ether_rx(dev);
+		at91ether_rx(netdev);
 
 	/* Transmit complete */
 	if (intstatus & MACB_BIT(TCOMP)) {
 		/* The TCOM bit is set even if the transmission failed */
 		if (intstatus & (MACB_BIT(ISR_TUND) | MACB_BIT(ISR_RLE)))
-			dev->stats.tx_errors++;
+			netdev->stats.tx_errors++;
 
 		desc = 0;
 		if (lp->rm9200_txq[desc].skb) {
@@ -5215,10 +5217,10 @@ static irqreturn_t at91ether_interrupt(int irq, void *dev_id)
 			lp->rm9200_txq[desc].skb = NULL;
 			dma_unmap_single(&lp->pdev->dev, lp->rm9200_txq[desc].mapping,
 					 lp->rm9200_txq[desc].size, DMA_TO_DEVICE);
-			dev->stats.tx_packets++;
-			dev->stats.tx_bytes += lp->rm9200_txq[desc].size;
+			netdev->stats.tx_packets++;
+			netdev->stats.tx_bytes += lp->rm9200_txq[desc].size;
 		}
-		netif_wake_queue(dev);
+		netif_wake_queue(netdev);
 	}
 
 	/* Work-around for EMAC Errata section 41.3.1 */
@@ -5230,18 +5232,18 @@ static irqreturn_t at91ether_interrupt(int irq, void *dev_id)
 	}
 
 	if (intstatus & MACB_BIT(ISR_ROVR))
-		netdev_err(dev, "ROVR error\n");
+		netdev_err(netdev, "ROVR error\n");
 
 	return IRQ_HANDLED;
 }
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
-static void at91ether_poll_controller(struct net_device *dev)
+static void at91ether_poll_controller(struct net_device *netdev)
 {
 	unsigned long flags;
 
 	local_irq_save(flags);
-	at91ether_interrupt(dev->irq, dev);
+	at91ether_interrupt(netdev->irq, netdev);
 	local_irq_restore(flags);
 }
 #endif
@@ -5288,17 +5290,17 @@ static int at91ether_clk_init(struct platform_device *pdev, struct clk **pclk,
 
 static int at91ether_init(struct platform_device *pdev)
 {
-	struct net_device *dev = platform_get_drvdata(pdev);
-	struct macb *bp = netdev_priv(dev);
+	struct net_device *netdev = platform_get_drvdata(pdev);
+	struct macb *bp = netdev_priv(netdev);
 	int err;
 
 	bp->queues[0].bp = bp;
 
-	dev->netdev_ops = &at91ether_netdev_ops;
-	dev->ethtool_ops = &macb_ethtool_ops;
+	netdev->netdev_ops = &at91ether_netdev_ops;
+	netdev->ethtool_ops = &macb_ethtool_ops;
 
-	err = devm_request_irq(&pdev->dev, dev->irq, at91ether_interrupt,
-			       0, dev->name, dev);
+	err = devm_request_irq(&pdev->dev, netdev->irq, at91ether_interrupt,
+			       0, netdev->name, netdev);
 	if (err)
 		return err;
 
@@ -5427,8 +5429,8 @@ static int fu540_c000_init(struct platform_device *pdev)
 
 static int init_reset_optional(struct platform_device *pdev)
 {
-	struct net_device *dev = platform_get_drvdata(pdev);
-	struct macb *bp = netdev_priv(dev);
+	struct net_device *netdev = platform_get_drvdata(pdev);
+	struct macb *bp = netdev_priv(netdev);
 	int ret;
 
 	if (bp->phy_interface == PHY_INTERFACE_MODE_SGMII) {
@@ -5736,7 +5738,7 @@ static int macb_probe(struct platform_device *pdev)
 	const struct macb_config *macb_config;
 	struct clk *tsu_clk = NULL;
 	phy_interface_t interface;
-	struct net_device *dev;
+	struct net_device *netdev;
 	struct resource *regs;
 	u32 wtrmrk_rst_val;
 	void __iomem *mem;
@@ -5771,19 +5773,19 @@ static int macb_probe(struct platform_device *pdev)
 		goto err_disable_clocks;
 	}
 
-	dev = alloc_etherdev_mq(sizeof(*bp), num_queues);
-	if (!dev) {
+	netdev = alloc_etherdev_mq(sizeof(*bp), num_queues);
+	if (!netdev) {
 		err = -ENOMEM;
 		goto err_disable_clocks;
 	}
 
-	dev->base_addr = regs->start;
+	netdev->base_addr = regs->start;
 
-	SET_NETDEV_DEV(dev, &pdev->dev);
+	SET_NETDEV_DEV(netdev, &pdev->dev);
 
-	bp = netdev_priv(dev);
+	bp = netdev_priv(netdev);
 	bp->pdev = pdev;
-	bp->dev = dev;
+	bp->netdev = netdev;
 	bp->regs = mem;
 	bp->native_io = native_io;
 	if (native_io) {
@@ -5856,21 +5858,21 @@ static int macb_probe(struct platform_device *pdev)
 		bp->caps |= MACB_CAPS_DMA_64B;
 	}
 #endif
-	platform_set_drvdata(pdev, dev);
+	platform_set_drvdata(pdev, netdev);
 
-	dev->irq = platform_get_irq(pdev, 0);
-	if (dev->irq < 0) {
-		err = dev->irq;
+	netdev->irq = platform_get_irq(pdev, 0);
+	if (netdev->irq < 0) {
+		err = netdev->irq;
 		goto err_out_free_netdev;
 	}
 
 	/* MTU range: 68 - 1518 or 10240 */
-	dev->min_mtu = GEM_MTU_MIN_SIZE;
+	netdev->min_mtu = GEM_MTU_MIN_SIZE;
 	if ((bp->caps & MACB_CAPS_JUMBO) && bp->jumbo_max_len)
-		dev->max_mtu = MIN(bp->jumbo_max_len, RX_BUFFER_MAX) -
+		netdev->max_mtu = MIN(bp->jumbo_max_len, RX_BUFFER_MAX) -
 				ETH_HLEN - ETH_FCS_LEN;
 	else
-		dev->max_mtu = 1536 - ETH_HLEN - ETH_FCS_LEN;
+		netdev->max_mtu = 1536 - ETH_HLEN - ETH_FCS_LEN;
 
 	if (bp->caps & MACB_CAPS_BD_RD_PREFETCH) {
 		val = GEM_BFEXT(RXBD_RDBUFF, gem_readl(bp, DCFG10));
@@ -5888,7 +5890,7 @@ static int macb_probe(struct platform_device *pdev)
 	if (bp->caps & MACB_CAPS_NEEDS_RSTONUBR)
 		bp->rx_intr_mask |= MACB_BIT(RXUBR);
 
-	err = of_get_ethdev_address(np, bp->dev);
+	err = of_get_ethdev_address(np, bp->netdev);
 	if (err == -EPROBE_DEFER)
 		goto err_out_free_netdev;
 	else if (err)
@@ -5910,9 +5912,9 @@ static int macb_probe(struct platform_device *pdev)
 	if (err)
 		goto err_out_phy_exit;
 
-	netif_carrier_off(dev);
+	netif_carrier_off(netdev);
 
-	err = register_netdev(dev);
+	err = register_netdev(netdev);
 	if (err) {
 		dev_err(&pdev->dev, "Cannot register net device, aborting.\n");
 		goto err_out_unregister_mdio;
@@ -5921,9 +5923,9 @@ static int macb_probe(struct platform_device *pdev)
 	INIT_WORK(&bp->hresp_err_bh_work, macb_hresp_error_task);
 	INIT_DELAYED_WORK(&bp->tx_lpi_work, macb_tx_lpi_work_fn);
 
-	netdev_info(dev, "Cadence %s rev 0x%08x at 0x%08lx irq %d (%pM)\n",
+	netdev_info(netdev, "Cadence %s rev 0x%08x at 0x%08lx irq %d (%pM)\n",
 		    macb_is_gem(bp) ? "GEM" : "MACB", macb_readl(bp, MID),
-		    dev->base_addr, dev->irq, dev->dev_addr);
+		    netdev->base_addr, netdev->irq, netdev->dev_addr);
 
 	pm_runtime_put_autosuspend(&bp->pdev->dev);
 
@@ -5937,7 +5939,7 @@ static int macb_probe(struct platform_device *pdev)
 	phy_exit(bp->phy);
 
 err_out_free_netdev:
-	free_netdev(dev);
+	free_netdev(netdev);
 
 err_disable_clocks:
 	macb_clks_disable(pclk, hclk, tx_clk, rx_clk, tsu_clk);
@@ -5950,14 +5952,14 @@ static int macb_probe(struct platform_device *pdev)
 
 static void macb_remove(struct platform_device *pdev)
 {
-	struct net_device *dev;
+	struct net_device *netdev;
 	struct macb *bp;
 
-	dev = platform_get_drvdata(pdev);
+	netdev = platform_get_drvdata(pdev);
 
-	if (dev) {
-		bp = netdev_priv(dev);
-		unregister_netdev(dev);
+	if (netdev) {
+		bp = netdev_priv(netdev);
+		unregister_netdev(netdev);
 		phy_exit(bp->phy);
 		mdiobus_unregister(bp->mii_bus);
 		mdiobus_free(bp->mii_bus);
@@ -5969,7 +5971,7 @@ static void macb_remove(struct platform_device *pdev)
 		pm_runtime_dont_use_autosuspend(&pdev->dev);
 		pm_runtime_set_suspended(&pdev->dev);
 		phylink_destroy(bp->phylink);
-		free_netdev(dev);
+		free_netdev(netdev);
 	}
 }
 
@@ -5984,7 +5986,7 @@ static int __maybe_unused macb_suspend(struct device *dev)
 	u32 tmp, ifa_local;
 	unsigned int q;
 
-	if (!device_may_wakeup(&bp->dev->dev))
+	if (!device_may_wakeup(&bp->netdev->dev))
 		phy_exit(bp->phy);
 
 	if (!netif_running(netdev))
@@ -5994,7 +5996,7 @@ static int __maybe_unused macb_suspend(struct device *dev)
 		if (bp->wolopts & WAKE_ARP) {
 			/* Check for IP address in WOL ARP mode */
 			rcu_read_lock();
-			idev = __in_dev_get_rcu(bp->dev);
+			idev = __in_dev_get_rcu(bp->netdev);
 			if (idev)
 				ifa = rcu_dereference(idev->ifa_list);
 			if (!ifa) {
@@ -6096,7 +6098,7 @@ static int __maybe_unused macb_resume(struct device *dev)
 	unsigned long flags;
 	unsigned int q;
 
-	if (!device_may_wakeup(&bp->dev->dev))
+	if (!device_may_wakeup(&bp->netdev->dev))
 		phy_init(bp->phy);
 
 	if (!netif_running(netdev))
diff --git a/drivers/net/ethernet/cadence/macb_pci.c b/drivers/net/ethernet/cadence/macb_pci.c
index b79dec17e6b0..ac009007118f 100644
--- a/drivers/net/ethernet/cadence/macb_pci.c
+++ b/drivers/net/ethernet/cadence/macb_pci.c
@@ -24,48 +24,48 @@
 #define GEM_PCLK_RATE 50000000
 #define GEM_HCLK_RATE 50000000
 
-static int macb_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+static int macb_probe(struct pci_dev *pci, const struct pci_device_id *id)
 {
 	int err;
-	struct platform_device *plat_dev;
+	struct platform_device *pdev;
 	struct platform_device_info plat_info;
 	struct macb_platform_data plat_data;
 	struct resource res[2];
 
 	/* enable pci device */
-	err = pcim_enable_device(pdev);
+	err = pcim_enable_device(pci);
 	if (err < 0) {
-		dev_err(&pdev->dev, "Enabling PCI device has failed: %d", err);
+		dev_err(&pci->dev, "Enabling PCI device has failed: %d", err);
 		return err;
 	}
 
-	pci_set_master(pdev);
+	pci_set_master(pci);
 
 	/* set up resources */
 	memset(res, 0x00, sizeof(struct resource) * ARRAY_SIZE(res));
-	res[0].start = pci_resource_start(pdev, 0);
-	res[0].end = pci_resource_end(pdev, 0);
+	res[0].start = pci_resource_start(pci, 0);
+	res[0].end = pci_resource_end(pci, 0);
 	res[0].name = PCI_DRIVER_NAME;
 	res[0].flags = IORESOURCE_MEM;
-	res[1].start = pci_irq_vector(pdev, 0);
+	res[1].start = pci_irq_vector(pci, 0);
 	res[1].name = PCI_DRIVER_NAME;
 	res[1].flags = IORESOURCE_IRQ;
 
-	dev_info(&pdev->dev, "EMAC physical base addr: %pa\n",
+	dev_info(&pci->dev, "EMAC physical base addr: %pa\n",
 		 &res[0].start);
 
 	/* set up macb platform data */
 	memset(&plat_data, 0, sizeof(plat_data));
 
 	/* initialize clocks */
-	plat_data.pclk = clk_register_fixed_rate(&pdev->dev, "pclk", NULL, 0,
+	plat_data.pclk = clk_register_fixed_rate(&pci->dev, "pclk", NULL, 0,
 						 GEM_PCLK_RATE);
 	if (IS_ERR(plat_data.pclk)) {
 		err = PTR_ERR(plat_data.pclk);
 		goto err_pclk_register;
 	}
 
-	plat_data.hclk = clk_register_fixed_rate(&pdev->dev, "hclk", NULL, 0,
+	plat_data.hclk = clk_register_fixed_rate(&pci->dev, "hclk", NULL, 0,
 						 GEM_HCLK_RATE);
 	if (IS_ERR(plat_data.hclk)) {
 		err = PTR_ERR(plat_data.hclk);
@@ -74,24 +74,24 @@ static int macb_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	/* set up platform device info */
 	memset(&plat_info, 0, sizeof(plat_info));
-	plat_info.parent = &pdev->dev;
-	plat_info.fwnode = pdev->dev.fwnode;
+	plat_info.parent = &pci->dev;
+	plat_info.fwnode = pci->dev.fwnode;
 	plat_info.name = PLAT_DRIVER_NAME;
-	plat_info.id = pdev->devfn;
+	plat_info.id = pci->devfn;
 	plat_info.res = res;
 	plat_info.num_res = ARRAY_SIZE(res);
 	plat_info.data = &plat_data;
 	plat_info.size_data = sizeof(plat_data);
-	plat_info.dma_mask = pdev->dma_mask;
+	plat_info.dma_mask = pci->dma_mask;
 
 	/* register platform device */
-	plat_dev = platform_device_register_full(&plat_info);
-	if (IS_ERR(plat_dev)) {
-		err = PTR_ERR(plat_dev);
+	pdev = platform_device_register_full(&plat_info);
+	if (IS_ERR(pdev)) {
+		err = PTR_ERR(pdev);
 		goto err_plat_dev_register;
 	}
 
-	pci_set_drvdata(pdev, plat_dev);
+	pci_set_drvdata(pci, pdev);
 
 	return 0;
 
@@ -105,14 +105,14 @@ static int macb_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	return err;
 }
 
-static void macb_remove(struct pci_dev *pdev)
+static void macb_remove(struct pci_dev *pci)
 {
-	struct platform_device *plat_dev = pci_get_drvdata(pdev);
-	struct macb_platform_data *plat_data = dev_get_platdata(&plat_dev->dev);
+	struct platform_device *pdev = pci_get_drvdata(pci);
+	struct macb_platform_data *plat_data = dev_get_platdata(&pdev->dev);
 	struct clk *pclk = plat_data->pclk;
 	struct clk *hclk = plat_data->hclk;
 
-	platform_device_unregister(plat_dev);
+	platform_device_unregister(pdev);
 	clk_unregister_fixed_rate(pclk);
 	clk_unregister_fixed_rate(hclk);
 }
diff --git a/drivers/net/ethernet/cadence/macb_ptp.c b/drivers/net/ethernet/cadence/macb_ptp.c
index d91f7b1aa39c..e5195d7dac1d 100644
--- a/drivers/net/ethernet/cadence/macb_ptp.c
+++ b/drivers/net/ethernet/cadence/macb_ptp.c
@@ -324,9 +324,9 @@ void gem_ptp_txstamp(struct macb *bp, struct sk_buff *skb,
 	skb_tstamp_tx(skb, &shhwtstamps);
 }
 
-void gem_ptp_init(struct net_device *dev)
+void gem_ptp_init(struct net_device *netdev)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 
 	bp->ptp_clock_info = gem_ptp_caps_template;
 
@@ -334,7 +334,7 @@ void gem_ptp_init(struct net_device *dev)
 	bp->tsu_rate = bp->ptp_info->get_tsu_rate(bp);
 	bp->ptp_clock_info.max_adj = bp->ptp_info->get_ptp_max_adj();
 	gem_ptp_init_timer(bp);
-	bp->ptp_clock = ptp_clock_register(&bp->ptp_clock_info, &dev->dev);
+	bp->ptp_clock = ptp_clock_register(&bp->ptp_clock_info, &netdev->dev);
 	if (IS_ERR(bp->ptp_clock)) {
 		pr_err("ptp clock register failed: %ld\n",
 			PTR_ERR(bp->ptp_clock));
@@ -353,9 +353,9 @@ void gem_ptp_init(struct net_device *dev)
 		 GEM_PTP_TIMER_NAME);
 }
 
-void gem_ptp_remove(struct net_device *ndev)
+void gem_ptp_remove(struct net_device *netdev)
 {
-	struct macb *bp = netdev_priv(ndev);
+	struct macb *bp = netdev_priv(netdev);
 
 	if (bp->ptp_clock) {
 		ptp_clock_unregister(bp->ptp_clock);
@@ -378,10 +378,10 @@ static int gem_ptp_set_ts_mode(struct macb *bp,
 	return 0;
 }
 
-int gem_get_hwtst(struct net_device *dev,
+int gem_get_hwtst(struct net_device *netdev,
 		  struct kernel_hwtstamp_config *tstamp_config)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 
 	*tstamp_config = bp->tstamp_config;
 	if (!macb_dma_ptp(bp))
@@ -402,13 +402,13 @@ static void gem_ptp_set_one_step_sync(struct macb *bp, u8 enable)
 		macb_writel(bp, NCR, reg_val & ~MACB_BIT(OSSMODE));
 }
 
-int gem_set_hwtst(struct net_device *dev,
+int gem_set_hwtst(struct net_device *netdev,
 		  struct kernel_hwtstamp_config *tstamp_config,
 		  struct netlink_ext_ack *extack)
 {
 	enum macb_bd_control tx_bd_control = TSTAMP_DISABLED;
 	enum macb_bd_control rx_bd_control = TSTAMP_DISABLED;
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 	u32 regval;
 
 	if (!macb_dma_ptp(bp))

-- 
2.53.0


^ permalink raw reply related

* [PATCH net-next v2 02/14] net: macb: unify `struct macb *` naming convention
From: Théo Lebrun @ 2026-04-10 19:51 UTC (permalink / raw)
  To: Nicolas Ferre, Claudiu Beznea, Andrew Lunn, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Richard Cochran,
	Russell King
  Cc: Paolo Valerio, Conor Dooley, Nicolai Buchwitz,
	Vladimir Kondratiev, Gregory CLEMENT, Benoît Monin,
	Tawfik Bayouk, Thomas Petazzoni, Maxime Chevallier, netdev,
	linux-kernel, Théo Lebrun
In-Reply-To: <20260410-macb-context-v2-0-af39f71d40b6@bootlin.com>

For historical reason, MACB has both:

   struct macb *bp;
   struct macb *lp; // used in at91ether functions

Use only the former.

Signed-off-by: Théo Lebrun <theo.lebrun@bootlin.com>
---
 drivers/net/ethernet/cadence/macb_main.c | 176 ++++++++++++++++---------------
 1 file changed, 91 insertions(+), 85 deletions(-)

diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index 896d481e0f95..a8a7df615d25 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -4938,71 +4938,72 @@ static const struct macb_usrio_config at91_default_usrio = {
 
 static struct sifive_fu540_macb_mgmt *mgmt;
 
-static int at91ether_alloc_coherent(struct macb *lp)
+static int at91ether_alloc_coherent(struct macb *bp)
 {
-	struct macb_queue *q = &lp->queues[0];
+	struct macb_queue *queue = &bp->queues[0];
 
-	q->rx_ring = dma_alloc_coherent(&lp->pdev->dev,
-					 (AT91ETHER_MAX_RX_DESCR *
-					  macb_dma_desc_get_size(lp)),
-					 &q->rx_ring_dma, GFP_KERNEL);
-	if (!q->rx_ring)
+	queue->rx_ring = dma_alloc_coherent(&bp->pdev->dev,
+					    (AT91ETHER_MAX_RX_DESCR *
+					     macb_dma_desc_get_size(bp)),
+					    &queue->rx_ring_dma, GFP_KERNEL);
+	if (!queue->rx_ring)
 		return -ENOMEM;
 
-	q->rx_buffers = dma_alloc_coherent(&lp->pdev->dev,
-					    AT91ETHER_MAX_RX_DESCR *
-					    AT91ETHER_MAX_RBUFF_SZ,
-					    &q->rx_buffers_dma, GFP_KERNEL);
-	if (!q->rx_buffers) {
-		dma_free_coherent(&lp->pdev->dev,
+	queue->rx_buffers = dma_alloc_coherent(&bp->pdev->dev,
+					       AT91ETHER_MAX_RX_DESCR *
+					       AT91ETHER_MAX_RBUFF_SZ,
+					       &queue->rx_buffers_dma,
+					       GFP_KERNEL);
+	if (!queue->rx_buffers) {
+		dma_free_coherent(&bp->pdev->dev,
 				  AT91ETHER_MAX_RX_DESCR *
-				  macb_dma_desc_get_size(lp),
-				  q->rx_ring, q->rx_ring_dma);
-		q->rx_ring = NULL;
+				  macb_dma_desc_get_size(bp),
+				  queue->rx_ring, queue->rx_ring_dma);
+		queue->rx_ring = NULL;
 		return -ENOMEM;
 	}
 
 	return 0;
 }
 
-static void at91ether_free_coherent(struct macb *lp)
+static void at91ether_free_coherent(struct macb *bp)
 {
-	struct macb_queue *q = &lp->queues[0];
+	struct macb_queue *queue = &bp->queues[0];
 
-	if (q->rx_ring) {
-		dma_free_coherent(&lp->pdev->dev,
+	if (queue->rx_ring) {
+		dma_free_coherent(&bp->pdev->dev,
 				  AT91ETHER_MAX_RX_DESCR *
-				  macb_dma_desc_get_size(lp),
-				  q->rx_ring, q->rx_ring_dma);
-		q->rx_ring = NULL;
+				  macb_dma_desc_get_size(bp),
+				  queue->rx_ring, queue->rx_ring_dma);
+		queue->rx_ring = NULL;
 	}
 
-	if (q->rx_buffers) {
-		dma_free_coherent(&lp->pdev->dev,
+	if (queue->rx_buffers) {
+		dma_free_coherent(&bp->pdev->dev,
 				  AT91ETHER_MAX_RX_DESCR *
 				  AT91ETHER_MAX_RBUFF_SZ,
-				  q->rx_buffers, q->rx_buffers_dma);
-		q->rx_buffers = NULL;
+				  queue->rx_buffers, queue->rx_buffers_dma);
+		queue->rx_buffers = NULL;
 	}
 }
 
 /* Initialize and start the Receiver and Transmit subsystems */
-static int at91ether_start(struct macb *lp)
+static int at91ether_start(struct macb *bp)
 {
-	struct macb_queue *q = &lp->queues[0];
+	struct macb_queue *queue = &bp->queues[0];
 	struct macb_dma_desc *desc;
 	dma_addr_t addr;
 	u32 ctl;
 	int i, ret;
 
-	ret = at91ether_alloc_coherent(lp);
+	ret = at91ether_alloc_coherent(bp);
 	if (ret)
 		return ret;
 
-	addr = q->rx_buffers_dma;
+	addr = queue->rx_buffers_dma;
 	for (i = 0; i < AT91ETHER_MAX_RX_DESCR; i++) {
-		desc = macb_rx_desc(q, i);
-		macb_set_addr(lp, desc, addr);
+		desc = macb_rx_desc(queue, i);
+		macb_set_addr(bp, desc, addr);
 		desc->ctrl = 0;
 		addr += AT91ETHER_MAX_RBUFF_SZ;
 	}
@@ -5011,17 +5012,17 @@ static int at91ether_start(struct macb *lp)
 	desc->addr |= MACB_BIT(RX_WRAP);
 
 	/* Reset buffer index */
-	q->rx_tail = 0;
+	queue->rx_tail = 0;
 
 	/* Program address of descriptor list in Rx Buffer Queue register */
-	macb_writel(lp, RBQP, q->rx_ring_dma);
+	macb_writel(bp, RBQP, queue->rx_ring_dma);
 
 	/* Enable Receive and Transmit */
-	ctl = macb_readl(lp, NCR);
-	macb_writel(lp, NCR, ctl | MACB_BIT(RE) | MACB_BIT(TE));
+	ctl = macb_readl(bp, NCR);
+	macb_writel(bp, NCR, ctl | MACB_BIT(RE) | MACB_BIT(TE));
 
 	/* Enable MAC interrupts */
-	macb_writel(lp, IER, MACB_BIT(RCOMP)	|
+	macb_writel(bp, IER, MACB_BIT(RCOMP)	|
 			     MACB_BIT(RXUBR)	|
 			     MACB_BIT(ISR_TUND)	|
 			     MACB_BIT(ISR_RLE)	|
@@ -5032,12 +5033,12 @@ static int at91ether_start(struct macb *lp)
 	return 0;
 }
 
-static void at91ether_stop(struct macb *lp)
+static void at91ether_stop(struct macb *bp)
 {
 	u32 ctl;
 
 	/* Disable MAC interrupts */
-	macb_writel(lp, IDR, MACB_BIT(RCOMP)	|
+	macb_writel(bp, IDR, MACB_BIT(RCOMP)	|
 			     MACB_BIT(RXUBR)	|
 			     MACB_BIT(ISR_TUND)	|
 			     MACB_BIT(ISR_RLE)	|
@@ -5046,35 +5047,35 @@ static void at91ether_stop(struct macb *lp)
 			     MACB_BIT(HRESP));
 
 	/* Disable Receiver and Transmitter */
-	ctl = macb_readl(lp, NCR);
-	macb_writel(lp, NCR, ctl & ~(MACB_BIT(TE) | MACB_BIT(RE)));
+	ctl = macb_readl(bp, NCR);
+	macb_writel(bp, NCR, ctl & ~(MACB_BIT(TE) | MACB_BIT(RE)));
 
 	/* Free resources. */
-	at91ether_free_coherent(lp);
+	at91ether_free_coherent(bp);
 }
 
 /* Open the ethernet interface */
 static int at91ether_open(struct net_device *netdev)
 {
-	struct macb *lp = netdev_priv(netdev);
+	struct macb *bp = netdev_priv(netdev);
 	u32 ctl;
 	int ret;
 
-	ret = pm_runtime_resume_and_get(&lp->pdev->dev);
+	ret = pm_runtime_resume_and_get(&bp->pdev->dev);
 	if (ret < 0)
 		return ret;
 
 	/* Clear internal statistics */
-	ctl = macb_readl(lp, NCR);
-	macb_writel(lp, NCR, ctl | MACB_BIT(CLRSTAT));
+	ctl = macb_readl(bp, NCR);
+	macb_writel(bp, NCR, ctl | MACB_BIT(CLRSTAT));
 
-	macb_set_hwaddr(lp);
+	macb_set_hwaddr(bp);
 
-	ret = at91ether_start(lp);
+	ret = at91ether_start(bp);
 	if (ret)
 		goto pm_exit;
 
-	ret = macb_phylink_connect(lp);
+	ret = macb_phylink_connect(bp);
 	if (ret)
 		goto stop;
 
@@ -5083,25 +5084,25 @@ static int at91ether_open(struct net_device *netdev)
 	return 0;
 
 stop:
-	at91ether_stop(lp);
+	at91ether_stop(bp);
 pm_exit:
-	pm_runtime_put_sync(&lp->pdev->dev);
+	pm_runtime_put_sync(&bp->pdev->dev);
 	return ret;
 }
 
 /* Close the interface */
 static int at91ether_close(struct net_device *netdev)
 {
-	struct macb *lp = netdev_priv(netdev);
+	struct macb *bp = netdev_priv(netdev);
 
 	netif_stop_queue(netdev);
 
-	phylink_stop(lp->phylink);
-	phylink_disconnect_phy(lp->phylink);
+	phylink_stop(bp->phylink);
+	phylink_disconnect_phy(bp->phylink);
 
-	at91ether_stop(lp);
+	at91ether_stop(bp);
 
-	pm_runtime_put(&lp->pdev->dev);
+	pm_runtime_put(&bp->pdev->dev);
 
 	return 0;
 }
@@ -5110,19 +5111,21 @@ static int at91ether_close(struct net_device *netdev)
 static netdev_tx_t at91ether_start_xmit(struct sk_buff *skb,
 					struct net_device *netdev)
 {
-	struct macb *lp = netdev_priv(netdev);
+	struct macb *bp = netdev_priv(netdev);
+	struct device *dev = &bp->pdev->dev;
 
-	if (macb_readl(lp, TSR) & MACB_BIT(RM9200_BNQ)) {
+	if (macb_readl(bp, TSR) & MACB_BIT(RM9200_BNQ)) {
 		int desc = 0;
 
 		netif_stop_queue(netdev);
 
 		/* Store packet information (to free when Tx completed) */
-		lp->rm9200_txq[desc].skb = skb;
-		lp->rm9200_txq[desc].size = skb->len;
-		lp->rm9200_txq[desc].mapping = dma_map_single(&lp->pdev->dev, skb->data,
-							      skb->len, DMA_TO_DEVICE);
-		if (dma_mapping_error(&lp->pdev->dev, lp->rm9200_txq[desc].mapping)) {
+		bp->rm9200_txq[desc].skb = skb;
+		bp->rm9200_txq[desc].size = skb->len;
+		bp->rm9200_txq[desc].mapping = dma_map_single(dev, skb->data,
+							      skb->len,
+							      DMA_TO_DEVICE);
+		if (dma_mapping_error(dev, bp->rm9200_txq[desc].mapping)) {
 			dev_kfree_skb_any(skb);
 			netdev->stats.tx_dropped++;
 			netdev_err(netdev, "%s: DMA mapping error\n", __func__);
@@ -5130,9 +5133,9 @@ static netdev_tx_t at91ether_start_xmit(struct sk_buff *skb,
 		}
 
 		/* Set address of the data in the Transmit Address register */
-		macb_writel(lp, TAR, lp->rm9200_txq[desc].mapping);
+		macb_writel(bp, TAR, bp->rm9200_txq[desc].mapping);
 		/* Set length of the packet in the Transmit Control register */
-		macb_writel(lp, TCR, skb->len);
+		macb_writel(bp, TCR, skb->len);
 
 	} else {
 		netdev_err(netdev, "%s called, but device is busy!\n",
@@ -5148,16 +5151,17 @@ static netdev_tx_t at91ether_start_xmit(struct sk_buff *skb,
  */
 static void at91ether_rx(struct net_device *netdev)
 {
-	struct macb *lp = netdev_priv(netdev);
-	struct macb_queue *q = &lp->queues[0];
+	struct macb *bp = netdev_priv(netdev);
+	struct macb_queue *queue = &bp->queues[0];
 	struct macb_dma_desc *desc;
 	unsigned char *p_recv;
 	struct sk_buff *skb;
 	unsigned int pktlen;
 
-	desc = macb_rx_desc(q, q->rx_tail);
+	desc = macb_rx_desc(queue, queue->rx_tail);
 	while (desc->addr & MACB_BIT(RX_USED)) {
-		p_recv = q->rx_buffers + q->rx_tail * AT91ETHER_MAX_RBUFF_SZ;
+		p_recv = queue->rx_buffers +
+			 queue->rx_tail * AT91ETHER_MAX_RBUFF_SZ;
 		pktlen = MACB_BF(RX_FRMLEN, desc->ctrl);
 		skb = netdev_alloc_skb(netdev, pktlen + 2);
 		if (skb) {
@@ -5179,12 +5183,12 @@ static void at91ether_rx(struct net_device *netdev)
 		desc->addr &= ~MACB_BIT(RX_USED);
 
 		/* wrap after last buffer */
-		if (q->rx_tail == AT91ETHER_MAX_RX_DESCR - 1)
-			q->rx_tail = 0;
+		if (queue->rx_tail == AT91ETHER_MAX_RX_DESCR - 1)
+			queue->rx_tail = 0;
 		else
-			q->rx_tail++;
+			queue->rx_tail++;
 
-		desc = macb_rx_desc(q, q->rx_tail);
+		desc = macb_rx_desc(queue, queue->rx_tail);
 	}
 }
 
@@ -5192,14 +5196,14 @@ static void at91ether_rx(struct net_device *netdev)
 static irqreturn_t at91ether_interrupt(int irq, void *dev_id)
 {
 	struct net_device *netdev = dev_id;
-	struct macb *lp = netdev_priv(netdev);
+	struct macb *bp = netdev_priv(netdev);
 	u32 intstatus, ctl;
 	unsigned int desc;
 
 	/* MAC Interrupt Status register indicates what interrupts are pending.
 	 * It is automatically cleared once read.
 	 */
-	intstatus = macb_readl(lp, ISR);
+	intstatus = macb_readl(bp, ISR);
 
 	/* Receive complete */
 	if (intstatus & MACB_BIT(RCOMP))
@@ -5212,23 +5216,25 @@ static irqreturn_t at91ether_interrupt(int irq, void *dev_id)
 			netdev->stats.tx_errors++;
 
 		desc = 0;
-		if (lp->rm9200_txq[desc].skb) {
-			dev_consume_skb_irq(lp->rm9200_txq[desc].skb);
-			lp->rm9200_txq[desc].skb = NULL;
-			dma_unmap_single(&lp->pdev->dev, lp->rm9200_txq[desc].mapping,
-					 lp->rm9200_txq[desc].size, DMA_TO_DEVICE);
+		if (bp->rm9200_txq[desc].skb) {
+			dev_consume_skb_irq(bp->rm9200_txq[desc].skb);
+			bp->rm9200_txq[desc].skb = NULL;
+			dma_unmap_single(&bp->pdev->dev,
+					 bp->rm9200_txq[desc].mapping,
+					 bp->rm9200_txq[desc].size,
+					 DMA_TO_DEVICE);
 			netdev->stats.tx_packets++;
-			netdev->stats.tx_bytes += lp->rm9200_txq[desc].size;
+			netdev->stats.tx_bytes += bp->rm9200_txq[desc].size;
 		}
 		netif_wake_queue(netdev);
 	}
 
 	/* Work-around for EMAC Errata section 41.3.1 */
 	if (intstatus & MACB_BIT(RXUBR)) {
-		ctl = macb_readl(lp, NCR);
-		macb_writel(lp, NCR, ctl & ~MACB_BIT(RE));
+		ctl = macb_readl(bp, NCR);
+		macb_writel(bp, NCR, ctl & ~MACB_BIT(RE));
 		wmb();
-		macb_writel(lp, NCR, ctl | MACB_BIT(RE));
+		macb_writel(bp, NCR, ctl | MACB_BIT(RE));
 	}
 
 	if (intstatus & MACB_BIT(ISR_ROVR))

-- 
2.53.0


^ permalink raw reply related

* [PATCH net-next v2 03/14] net: macb: unify queue index variable naming convention and types
From: Théo Lebrun @ 2026-04-10 19:51 UTC (permalink / raw)
  To: Nicolas Ferre, Claudiu Beznea, Andrew Lunn, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Richard Cochran,
	Russell King
  Cc: Paolo Valerio, Conor Dooley, Nicolai Buchwitz,
	Vladimir Kondratiev, Gregory CLEMENT, Benoît Monin,
	Tawfik Bayouk, Thomas Petazzoni, Maxime Chevallier, netdev,
	linux-kernel, Théo Lebrun
In-Reply-To: <20260410-macb-context-v2-0-af39f71d40b6@bootlin.com>

Variables are named q or queue_index. Types are int, unsigned int, u32
and u16. Use `unsigned int q` everywhere.

Skip over taprio functions. They use `u8 queue_id` which fits with the
`struct macb_queue_enst_config` field. Using `queue_id` everywhere
would be too verbose.

Signed-off-by: Théo Lebrun <theo.lebrun@bootlin.com>
---
 drivers/net/ethernet/cadence/macb_main.c | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index a8a7df615d25..b0e70f6ce305 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -877,7 +877,7 @@ static void gem_shuffle_tx_one_ring(struct macb_queue *queue)
 static void gem_shuffle_tx_rings(struct macb *bp)
 {
 	struct macb_queue *queue;
-	int q;
+	unsigned int q;
 
 	for (q = 0, queue = bp->queues; q < bp->num_queues; q++, queue++)
 		gem_shuffle_tx_one_ring(queue);
@@ -1258,7 +1258,7 @@ static void macb_tx_error_task(struct work_struct *work)
 						      tx_error_task);
 	bool			halt_timeout = false;
 	struct macb		*bp = queue->bp;
-	u32			queue_index;
+	unsigned int		q;
 	u32			packets = 0;
 	u32			bytes = 0;
 	struct macb_tx_skb	*tx_skb;
@@ -1267,9 +1267,9 @@ static void macb_tx_error_task(struct work_struct *work)
 	unsigned int		tail;
 	unsigned long		flags;
 
-	queue_index = queue - bp->queues;
+	q = queue - bp->queues;
 	netdev_vdbg(bp->netdev, "macb_tx_error_task: q = %u, t = %u, h = %u\n",
-		    queue_index, queue->tx_tail, queue->tx_head);
+		    q, queue->tx_tail, queue->tx_head);
 
 	/* Prevent the queue NAPI TX poll from running, as it calls
 	 * macb_tx_complete(), which in turn may call netif_wake_subqueue().
@@ -1342,7 +1342,7 @@ static void macb_tx_error_task(struct work_struct *work)
 		macb_tx_unmap(bp, tx_skb, 0);
 	}
 
-	netdev_tx_completed_queue(netdev_get_tx_queue(bp->netdev, queue_index),
+	netdev_tx_completed_queue(netdev_get_tx_queue(bp->netdev, q),
 				  packets, bytes);
 
 	/* Set end of TX queue */
@@ -1407,7 +1407,7 @@ static bool ptp_one_step_sync(struct sk_buff *skb)
 static int macb_tx_complete(struct macb_queue *queue, int budget)
 {
 	struct macb *bp = queue->bp;
-	u16 queue_index = queue - bp->queues;
+	unsigned int q = queue - bp->queues;
 	unsigned long flags;
 	unsigned int tail;
 	unsigned int head;
@@ -1469,14 +1469,14 @@ static int macb_tx_complete(struct macb_queue *queue, int budget)
 		}
 	}
 
-	netdev_tx_completed_queue(netdev_get_tx_queue(bp->netdev, queue_index),
+	netdev_tx_completed_queue(netdev_get_tx_queue(bp->netdev, q),
 				  packets, bytes);
 
 	queue->tx_tail = tail;
-	if (__netif_subqueue_stopped(bp->netdev, queue_index) &&
+	if (__netif_subqueue_stopped(bp->netdev, q) &&
 	    CIRC_CNT(queue->tx_head, queue->tx_tail,
 		     bp->tx_ring_size) <= MACB_TX_WAKEUP_THRESH(bp))
-		netif_wake_subqueue(bp->netdev, queue_index);
+		netif_wake_subqueue(bp->netdev, q);
 	spin_unlock_irqrestore(&queue->tx_ptr_lock, flags);
 
 	if (packets)
@@ -2470,10 +2470,10 @@ static int macb_pad_and_fcs(struct sk_buff **skb, struct net_device *netdev)
 static netdev_tx_t macb_start_xmit(struct sk_buff *skb,
 				   struct net_device *netdev)
 {
-	u16 queue_index = skb_get_queue_mapping(skb);
 	struct macb *bp = netdev_priv(netdev);
-	struct macb_queue *queue = &bp->queues[queue_index];
+	unsigned int q = skb_get_queue_mapping(skb);
 	unsigned int desc_cnt, nr_frags, frag_size, f;
+	struct macb_queue *queue = &bp->queues[q];
 	unsigned int hdrlen;
 	unsigned long flags;
 	bool is_lso;
@@ -2513,7 +2513,7 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb,
 #if defined(DEBUG) && defined(VERBOSE_DEBUG)
 	netdev_vdbg(bp->netdev,
 		    "start_xmit: queue %hu len %u head %p data %p tail %p end %p\n",
-		    queue_index, skb->len, skb->head, skb->data,
+		    q, skb->len, skb->head, skb->data,
 		    skb_tail_pointer(skb), skb_end_pointer(skb));
 	print_hex_dump(KERN_DEBUG, "data: ", DUMP_PREFIX_OFFSET, 16, 1,
 		       skb->data, 16, true);
@@ -2539,7 +2539,7 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb,
 	/* This is a hard error, log it. */
 	if (CIRC_SPACE(queue->tx_head, queue->tx_tail,
 		       bp->tx_ring_size) < desc_cnt) {
-		netif_stop_subqueue(netdev, queue_index);
+		netif_stop_subqueue(netdev, q);
 		netdev_dbg(netdev, "tx_head = %u, tx_tail = %u\n",
 			   queue->tx_head, queue->tx_tail);
 		ret = NETDEV_TX_BUSY;
@@ -2555,7 +2555,7 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb,
 	/* Make newly initialized descriptor visible to hardware */
 	wmb();
 	skb_tx_timestamp(skb);
-	netdev_tx_sent_queue(netdev_get_tx_queue(bp->netdev, queue_index),
+	netdev_tx_sent_queue(netdev_get_tx_queue(bp->netdev, q),
 			     skb->len);
 
 	spin_lock(&bp->lock);
@@ -2564,7 +2564,7 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb,
 	spin_unlock(&bp->lock);
 
 	if (CIRC_SPACE(queue->tx_head, queue->tx_tail, bp->tx_ring_size) < 1)
-		netif_stop_subqueue(netdev, queue_index);
+		netif_stop_subqueue(netdev, q);
 
 unlock:
 	spin_unlock_irqrestore(&queue->tx_ptr_lock, flags);

-- 
2.53.0


^ permalink raw reply related

* [PATCH net-next v2 04/14] net: macb: enforce reverse christmas tree (RCT) convention
From: Théo Lebrun @ 2026-04-10 19:51 UTC (permalink / raw)
  To: Nicolas Ferre, Claudiu Beznea, Andrew Lunn, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Richard Cochran,
	Russell King
  Cc: Paolo Valerio, Conor Dooley, Nicolai Buchwitz,
	Vladimir Kondratiev, Gregory CLEMENT, Benoît Monin,
	Tawfik Bayouk, Thomas Petazzoni, Maxime Chevallier, netdev,
	linux-kernel, Théo Lebrun
In-Reply-To: <20260410-macb-context-v2-0-af39f71d40b6@bootlin.com>

Enforce the reverse christmas tree convention in those functions:

   macb_tx_error_task()
   gem_rx_refill()
   gem_rx()
   macb_rx_frame()
   macb_init_rx_ring()
   macb_rx()
   macb_rx_pending()
   macb_start_xmit()

The goal is to minimise unrelated diff in future patches.

Signed-off-by: Théo Lebrun <theo.lebrun@bootlin.com>
---
 drivers/net/ethernet/cadence/macb_main.c | 61 ++++++++++++++++----------------
 1 file changed, 30 insertions(+), 31 deletions(-)

diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index b0e70f6ce305..c5d8e8f835ba 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -1254,20 +1254,19 @@ static dma_addr_t macb_get_addr(struct macb *bp, struct macb_dma_desc *desc)
 
 static void macb_tx_error_task(struct work_struct *work)
 {
-	struct macb_queue	*queue = container_of(work, struct macb_queue,
-						      tx_error_task);
-	bool			halt_timeout = false;
-	struct macb		*bp = queue->bp;
-	unsigned int		q;
-	u32			packets = 0;
-	u32			bytes = 0;
-	struct macb_tx_skb	*tx_skb;
-	struct macb_dma_desc	*desc;
-	struct sk_buff		*skb;
-	unsigned int		tail;
-	unsigned long		flags;
+	struct macb_queue *queue = container_of(work, struct macb_queue,
+						tx_error_task);
+	unsigned int q = queue - queue->bp->queues;
+	struct macb *bp = queue->bp;
+	struct macb_tx_skb *tx_skb;
+	struct macb_dma_desc *desc;
+	bool halt_timeout = false;
+	struct sk_buff *skb;
+	unsigned long flags;
+	unsigned int tail;
+	u32 packets = 0;
+	u32 bytes = 0;
 
-	q = queue - bp->queues;
 	netdev_vdbg(bp->netdev, "macb_tx_error_task: q = %u, t = %u, h = %u\n",
 		    q, queue->tx_tail, queue->tx_head);
 
@@ -1487,11 +1486,11 @@ static int macb_tx_complete(struct macb_queue *queue, int budget)
 
 static void gem_rx_refill(struct macb_queue *queue)
 {
-	unsigned int		entry;
-	struct sk_buff		*skb;
-	dma_addr_t		paddr;
 	struct macb *bp = queue->bp;
 	struct macb_dma_desc *desc;
+	struct sk_buff *skb;
+	unsigned int entry;
+	dma_addr_t paddr;
 
 	while (CIRC_SPACE(queue->rx_prepared_head, queue->rx_tail,
 			bp->rx_ring_size) > 0) {
@@ -1584,11 +1583,11 @@ static int gem_rx(struct macb_queue *queue, struct napi_struct *napi,
 		  int budget)
 {
 	struct macb *bp = queue->bp;
-	unsigned int		len;
-	unsigned int		entry;
-	struct sk_buff		*skb;
-	struct macb_dma_desc	*desc;
-	int			count = 0;
+	struct macb_dma_desc *desc;
+	struct sk_buff *skb;
+	unsigned int entry;
+	unsigned int len;
+	int count = 0;
 
 	while (count < budget) {
 		u32 ctrl;
@@ -1674,12 +1673,12 @@ static int gem_rx(struct macb_queue *queue, struct napi_struct *napi,
 static int macb_rx_frame(struct macb_queue *queue, struct napi_struct *napi,
 			 unsigned int first_frag, unsigned int last_frag)
 {
-	unsigned int len;
-	unsigned int frag;
+	struct macb *bp = queue->bp;
+	struct macb_dma_desc *desc;
 	unsigned int offset;
 	struct sk_buff *skb;
-	struct macb_dma_desc *desc;
-	struct macb *bp = queue->bp;
+	unsigned int frag;
+	unsigned int len;
 
 	desc = macb_rx_desc(queue, last_frag);
 	len = desc->ctrl & bp->rx_frm_len_mask;
@@ -1755,9 +1754,9 @@ static int macb_rx_frame(struct macb_queue *queue, struct napi_struct *napi,
 
 static inline void macb_init_rx_ring(struct macb_queue *queue)
 {
+	struct macb_dma_desc *desc = NULL;
 	struct macb *bp = queue->bp;
 	dma_addr_t addr;
-	struct macb_dma_desc *desc = NULL;
 	int i;
 
 	addr = queue->rx_buffers_dma;
@@ -1776,9 +1775,9 @@ static int macb_rx(struct macb_queue *queue, struct napi_struct *napi,
 {
 	struct macb *bp = queue->bp;
 	bool reset_rx_queue = false;
-	int received = 0;
-	unsigned int tail;
 	int first_frag = -1;
+	unsigned int tail;
+	int received = 0;
 
 	for (tail = queue->rx_tail; budget > 0; tail++) {
 		struct macb_dma_desc *desc = macb_rx_desc(queue, tail);
@@ -1853,8 +1852,8 @@ static int macb_rx(struct macb_queue *queue, struct napi_struct *napi,
 static bool macb_rx_pending(struct macb_queue *queue)
 {
 	struct macb *bp = queue->bp;
-	unsigned int		entry;
-	struct macb_dma_desc	*desc;
+	struct macb_dma_desc *desc;
+	unsigned int entry;
 
 	entry = macb_rx_ring_wrap(bp, queue->rx_tail);
 	desc = macb_rx_desc(queue, entry);
@@ -2474,10 +2473,10 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb,
 	unsigned int q = skb_get_queue_mapping(skb);
 	unsigned int desc_cnt, nr_frags, frag_size, f;
 	struct macb_queue *queue = &bp->queues[q];
+	netdev_tx_t ret = NETDEV_TX_OK;
 	unsigned int hdrlen;
 	unsigned long flags;
 	bool is_lso;
-	netdev_tx_t ret = NETDEV_TX_OK;
 
 	if (macb_clear_csum(skb)) {
 		dev_kfree_skb_any(skb);

-- 
2.53.0


^ permalink raw reply related

* [PATCH net-next v2 05/14] net: macb: allocate tieoff descriptor once across device lifetime
From: Théo Lebrun @ 2026-04-10 19:51 UTC (permalink / raw)
  To: Nicolas Ferre, Claudiu Beznea, Andrew Lunn, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Richard Cochran,
	Russell King
  Cc: Paolo Valerio, Conor Dooley, Nicolai Buchwitz,
	Vladimir Kondratiev, Gregory CLEMENT, Benoît Monin,
	Tawfik Bayouk, Thomas Petazzoni, Maxime Chevallier, netdev,
	linux-kernel, Théo Lebrun
In-Reply-To: <20260410-macb-context-v2-0-af39f71d40b6@bootlin.com>

The tieoff descriptor is a RX DMA descriptor ring of size one. It gets
configured onto queues for Wake-on-LAN during system-wide suspend when
hardware does not support disabling individual queues
(MACB_CAPS_QUEUE_DISABLE).

MACB/GEM driver allocates it alongside the main RX ring
inside macb_alloc_consistent() at open. Free is done by
macb_free_consistent() at close.

Change to allocate once at probe and free on probe failure or device
removal. This makes the tieoff descriptor lifetime much longer,
avoiding repeating coherent buffer allocation on each open/close cycle.

Main benefit: we dissociate its lifetime from the main ring's lifetime.
That way there is less work to be doing on resources (re)alloc. This
currently happens on close/open, but will soon also happen on context
swap operations (set_ringparam, change_mtu, set_channels, etc).

Signed-off-by: Théo Lebrun <theo.lebrun@bootlin.com>
---
 drivers/net/ethernet/cadence/macb_main.c | 75 +++++++++++++++++---------------
 1 file changed, 41 insertions(+), 34 deletions(-)

diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index c5d8e8f835ba..ec030801ed68 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -2653,12 +2653,6 @@ static void macb_free_consistent(struct macb *bp)
 	unsigned int q;
 	size_t size;
 
-	if (bp->rx_ring_tieoff) {
-		dma_free_coherent(dev, macb_dma_desc_get_size(bp),
-				  bp->rx_ring_tieoff, bp->rx_ring_tieoff_dma);
-		bp->rx_ring_tieoff = NULL;
-	}
-
 	bp->macbgem_ops.mog_free_rx_buffers(bp);
 
 	size = bp->num_queues * macb_tx_ring_size_per_queue(bp);
@@ -2756,16 +2750,6 @@ static int macb_alloc_consistent(struct macb *bp)
 	if (bp->macbgem_ops.mog_alloc_rx_buffers(bp))
 		goto out_err;
 
-	/* Required for tie off descriptor for PM cases */
-	if (!(bp->caps & MACB_CAPS_QUEUE_DISABLE)) {
-		bp->rx_ring_tieoff = dma_alloc_coherent(&bp->pdev->dev,
-							macb_dma_desc_get_size(bp),
-							&bp->rx_ring_tieoff_dma,
-							GFP_KERNEL);
-		if (!bp->rx_ring_tieoff)
-			goto out_err;
-	}
-
 	return 0;
 
 out_err:
@@ -2773,19 +2757,6 @@ static int macb_alloc_consistent(struct macb *bp)
 	return -ENOMEM;
 }
 
-static void macb_init_tieoff(struct macb *bp)
-{
-	struct macb_dma_desc *desc = bp->rx_ring_tieoff;
-
-	if (bp->caps & MACB_CAPS_QUEUE_DISABLE)
-		return;
-	/* Setup a wrapping descriptor with no free slots
-	 * (WRAP and USED) to tie off/disable unused RX queues.
-	 */
-	macb_set_addr(bp, desc, MACB_BIT(RX_WRAP) | MACB_BIT(RX_USED));
-	desc->ctrl = 0;
-}
-
 static void gem_init_rx_ring(struct macb_queue *queue)
 {
 	queue->rx_tail = 0;
@@ -2813,8 +2784,6 @@ static void gem_init_rings(struct macb *bp)
 
 		gem_init_rx_ring(queue);
 	}
-
-	macb_init_tieoff(bp);
 }
 
 static void macb_init_rings(struct macb *bp)
@@ -2832,8 +2801,6 @@ static void macb_init_rings(struct macb *bp)
 	bp->queues[0].tx_head = 0;
 	bp->queues[0].tx_tail = 0;
 	desc->ctrl |= MACB_BIT(TX_WRAP);
-
-	macb_init_tieoff(bp);
 }
 
 static void macb_reset_hw(struct macb *bp)
@@ -5510,6 +5477,38 @@ static int eyeq5_init(struct platform_device *pdev)
 	return ret;
 }
 
+static int macb_alloc_tieoff(struct macb *bp)
+{
+	/* Tieoff is a workaround in case HW cannot disable queues, for PM. */
+	if (bp->caps & MACB_CAPS_QUEUE_DISABLE)
+		return 0;
+
+	bp->rx_ring_tieoff = dma_alloc_coherent(&bp->pdev->dev,
+						macb_dma_desc_get_size(bp),
+						&bp->rx_ring_tieoff_dma,
+						GFP_KERNEL);
+	if (!bp->rx_ring_tieoff)
+		return -ENOMEM;
+
+	macb_set_addr(bp, bp->rx_ring_tieoff,
+		      MACB_BIT(RX_WRAP) | MACB_BIT(RX_USED));
+
+	bp->rx_ring_tieoff->ctrl = 0;
+
+	return 0;
+}
+
+static void macb_free_tieoff(struct macb *bp)
+{
+	if (!bp->rx_ring_tieoff)
+		return;
+
+	dma_free_coherent(&bp->pdev->dev, macb_dma_desc_get_size(bp),
+			  bp->rx_ring_tieoff,
+			  bp->rx_ring_tieoff_dma);
+	bp->rx_ring_tieoff = NULL;
+}
+
 static const struct macb_usrio_config mpfs_usrio = {
 	.tsu_source = 0,
 };
@@ -5919,10 +5918,14 @@ static int macb_probe(struct platform_device *pdev)
 
 	netif_carrier_off(netdev);
 
+	err = macb_alloc_tieoff(bp);
+	if (err)
+		goto err_out_unregister_mdio;
+
 	err = register_netdev(netdev);
 	if (err) {
 		dev_err(&pdev->dev, "Cannot register net device, aborting.\n");
-		goto err_out_unregister_mdio;
+		goto err_out_free_tieoff;
 	}
 
 	INIT_WORK(&bp->hresp_err_bh_work, macb_hresp_error_task);
@@ -5936,6 +5939,9 @@ static int macb_probe(struct platform_device *pdev)
 
 	return 0;
 
+err_out_free_tieoff:
+	macb_free_tieoff(bp);
+
 err_out_unregister_mdio:
 	mdiobus_unregister(bp->mii_bus);
 	mdiobus_free(bp->mii_bus);
@@ -5965,6 +5971,7 @@ static void macb_remove(struct platform_device *pdev)
 	if (netdev) {
 		bp = netdev_priv(netdev);
 		unregister_netdev(netdev);
+		macb_free_tieoff(bp);
 		phy_exit(bp->phy);
 		mdiobus_unregister(bp->mii_bus);
 		mdiobus_free(bp->mii_bus);

-- 
2.53.0


^ permalink raw reply related

* [PATCH net-next v2 06/14] net: macb: introduce macb_context struct for buffer management
From: Théo Lebrun @ 2026-04-10 19:51 UTC (permalink / raw)
  To: Nicolas Ferre, Claudiu Beznea, Andrew Lunn, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Richard Cochran,
	Russell King
  Cc: Paolo Valerio, Conor Dooley, Nicolai Buchwitz,
	Vladimir Kondratiev, Gregory CLEMENT, Benoît Monin,
	Tawfik Bayouk, Thomas Petazzoni, Maxime Chevallier, netdev,
	linux-kernel, Théo Lebrun
In-Reply-To: <20260410-macb-context-v2-0-af39f71d40b6@bootlin.com>

Whenever an operation requires buffer realloc, we close the interface,
update parameters and reopen. To improve reliability under memory
pressure, we should rather alloc new buffers, reconfigure HW and free
old buffers. This requires MACB to support having multiple "contexts"
in parallel.

Introduce this concept by adding the macb_context struct, which owns all
queue buffers and the parameters associated. We do not yet support
multiple contexts in parallel, because all functions access bp->ctx
(the currently active context) directly.

Steps:

 - Introduce `struct macb_context` and its children `struct macb_rxq`
   and `struct macb_txq`. Context fields are stolen from `struct macb`
   and rxq/txq fields are from `struct macb_queue`.

   Making it two separate structs per queue simplifies accesses: we grab
   a txq/rxq local variable and access fields like txq->head instead of
   queue->tx_head. It also anecdotally improves data locality.

 - macb_init_dflt() / macb_get_ringparam() do not access
   bp->ctx->{rx,tx}_ring_size as they will/might run while interface is
   offline and ctx is not NULL. Instead, introduce
   bp->configured_{rx,tx}_ring_size which get updated on user requests.

 - macb_open() starts by allocating bp->ctx. It gets freed in the
   open error codepath or by macb_close().

 - Guided by compile errors, update all codepaths. Most diff is changing
   `queue->tx_*` to `txq->*` and `queue->rx_*` to `rxq->*`, with a new
   local variable. Also rx_buffer_size / rx_ring_size / tx_ring_size
   move from bp to bp->ctx.

   Introduce two helpers macb_tx|rx() functions to convert macb_queue
   pointers.

 - macb_get_regs() is tweaked to support being ran while interface is
   offline (and context is NULL). Use default values at zero and
   override them only if context is present.

Signed-off-by: Théo Lebrun <theo.lebrun@bootlin.com>
---
 drivers/net/ethernet/cadence/macb.h      |  49 +++-
 drivers/net/ethernet/cadence/macb_main.c | 454 ++++++++++++++++++-------------
 2 files changed, 305 insertions(+), 198 deletions(-)

diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
index 9857df5b57f0..452b2c8f8641 100644
--- a/drivers/net/ethernet/cadence/macb.h
+++ b/drivers/net/ethernet/cadence/macb.h
@@ -1272,21 +1272,10 @@ struct macb_queue {
 
 	/* Lock to protect tx_head and tx_tail */
 	spinlock_t		tx_ptr_lock;
-	unsigned int		tx_head, tx_tail;
-	struct macb_dma_desc	*tx_ring;
-	struct macb_tx_skb	*tx_skb;
-	dma_addr_t		tx_ring_dma;
 	struct work_struct	tx_error_task;
 	bool			txubr_pending;
 	struct napi_struct	napi_tx;
 
-	dma_addr_t		rx_ring_dma;
-	dma_addr_t		rx_buffers_dma;
-	unsigned int		rx_tail;
-	unsigned int		rx_prepared_head;
-	struct macb_dma_desc	*rx_ring;
-	struct sk_buff		**rx_skbuff;
-	void			*rx_buffers;
 	struct napi_struct	napi_rx;
 	struct queue_stats stats;
 };
@@ -1301,6 +1290,32 @@ struct ethtool_rx_fs_list {
 	unsigned int count;
 };
 
+struct macb_rxq {
+	struct macb_dma_desc	*ring;		/* MACB & GEM */
+	dma_addr_t		ring_dma;	/* MACB & GEM */
+	unsigned int		tail;		/* MACB & GEM */
+	unsigned int		prepared_head;	/* GEM */
+	struct sk_buff		**skbuff;	/* GEM */
+	dma_addr_t		buffers_dma;	/* MACB */
+	void			*buffers;	/* MACB */
+};
+
+struct macb_txq {
+	unsigned int		head;
+	unsigned int		tail;
+	struct macb_dma_desc	*ring;
+	dma_addr_t		ring_dma;
+	struct macb_tx_skb	*skb;
+};
+
+struct macb_context {
+	unsigned int		rx_buffer_size;
+	unsigned int		rx_ring_size;
+	unsigned int		tx_ring_size;
+	struct macb_rxq		rxq[MACB_MAX_QUEUES];
+	struct macb_txq		txq[MACB_MAX_QUEUES];
+};
+
 struct macb {
 	void __iomem		*regs;
 	bool			native_io;
@@ -1309,12 +1324,16 @@ struct macb {
 	u32	(*macb_reg_readl)(struct macb *bp, int offset);
 	void	(*macb_reg_writel)(struct macb *bp, int offset, u32 value);
 
+	/*
+	 * Context stores all its parameters.
+	 * But we must remember them across closure.
+	 */
+	unsigned int		configured_rx_ring_size;
+	unsigned int		configured_tx_ring_size;
+	struct macb_context	*ctx;
+
 	struct macb_dma_desc	*rx_ring_tieoff;
 	dma_addr_t		rx_ring_tieoff_dma;
-	size_t			rx_buffer_size;
-
-	unsigned int		rx_ring_size;
-	unsigned int		tx_ring_size;
 
 	unsigned int		num_queues;
 	struct macb_queue	queues[MACB_MAX_QUEUES];
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index ec030801ed68..3e596cbe9fc8 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -61,7 +61,7 @@ struct sifive_fu540_macb_mgmt {
 #define MAX_TX_RING_SIZE	4096
 
 /* level of occupied TX descriptors under which we wake up TX process */
-#define MACB_TX_WAKEUP_THRESH(bp)	(3 * (bp)->tx_ring_size / 4)
+#define MACB_TX_WAKEUP_THRESH(bp)	(3 * (bp)->ctx->tx_ring_size / 4)
 
 #define MACB_RX_INT_FLAGS	(MACB_BIT(RCOMP) | MACB_BIT(ISR_ROVR))
 #define MACB_TX_ERR_FLAGS	(MACB_BIT(ISR_TUND)			\
@@ -152,48 +152,73 @@ static struct macb_dma_desc_64 *macb_64b_desc(struct macb *bp, struct macb_dma_d
 /* Ring buffer accessors */
 static unsigned int macb_tx_ring_wrap(struct macb *bp, unsigned int index)
 {
-	return index & (bp->tx_ring_size - 1);
+	return index & (bp->ctx->tx_ring_size - 1);
+}
+
+static struct macb_txq *macb_txq(struct macb_queue *queue)
+{
+	struct macb *bp = queue->bp;
+	unsigned int q = queue - bp->queues;
+
+	return &bp->ctx->txq[q];
+}
+
+static struct macb_rxq *macb_rxq(struct macb_queue *queue)
+{
+	struct macb *bp = queue->bp;
+	unsigned int q = queue - bp->queues;
+
+	return &bp->ctx->rxq[q];
 }
 
 static struct macb_dma_desc *macb_tx_desc(struct macb_queue *queue,
 					  unsigned int index)
 {
+	struct macb_txq *txq = macb_txq(queue);
+
 	index = macb_tx_ring_wrap(queue->bp, index);
 	index = macb_adj_dma_desc_idx(queue->bp, index);
-	return &queue->tx_ring[index];
+	return &txq->ring[index];
 }
 
 static struct macb_tx_skb *macb_tx_skb(struct macb_queue *queue,
 				       unsigned int index)
 {
-	return &queue->tx_skb[macb_tx_ring_wrap(queue->bp, index)];
+	struct macb_txq *txq = macb_txq(queue);
+
+	return &txq->skb[macb_tx_ring_wrap(queue->bp, index)];
 }
 
 static dma_addr_t macb_tx_dma(struct macb_queue *queue, unsigned int index)
 {
+	struct macb_txq *txq = macb_txq(queue);
 	dma_addr_t offset;
 
 	offset = macb_tx_ring_wrap(queue->bp, index) *
 			macb_dma_desc_get_size(queue->bp);
 
-	return queue->tx_ring_dma + offset;
+	return txq->ring_dma + offset;
 }
 
 static unsigned int macb_rx_ring_wrap(struct macb *bp, unsigned int index)
 {
-	return index & (bp->rx_ring_size - 1);
+	return index & (bp->ctx->rx_ring_size - 1);
 }
 
 static struct macb_dma_desc *macb_rx_desc(struct macb_queue *queue, unsigned int index)
 {
+	struct macb_rxq *rxq = macb_rxq(queue);
+
 	index = macb_rx_ring_wrap(queue->bp, index);
 	index = macb_adj_dma_desc_idx(queue->bp, index);
-	return &queue->rx_ring[index];
+	return &rxq->ring[index];
 }
 
 static void *macb_rx_buffer(struct macb_queue *queue, unsigned int index)
 {
-	return queue->rx_buffers + queue->bp->rx_buffer_size *
+	struct macb_rxq *rxq = macb_rxq(queue);
+
+	return rxq->buffers + queue->bp->ctx->rx_buffer_size *
 	       macb_rx_ring_wrap(queue->bp, index);
 }
 
@@ -463,19 +488,23 @@ static int macb_mdio_write_c45(struct mii_bus *bus, int mii_id,
 static void macb_init_buffers(struct macb *bp)
 {
 	struct macb_queue *queue;
+	struct macb_rxq *rxq;
+	struct macb_txq *txq;
 	unsigned int q;
 
 	/* Single register for all queues' high 32 bits. */
 	if (macb_dma64(bp)) {
-		macb_writel(bp, RBQPH,
-			    upper_32_bits(bp->queues[0].rx_ring_dma));
-		macb_writel(bp, TBQPH,
-			    upper_32_bits(bp->queues[0].tx_ring_dma));
+		rxq = &bp->ctx->rxq[0];
+		txq = &bp->ctx->txq[0];
+		macb_writel(bp, RBQPH, upper_32_bits(rxq->ring_dma));
+		macb_writel(bp, TBQPH, upper_32_bits(txq->ring_dma));
 	}
 
 	for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
-		queue_writel(queue, RBQP, lower_32_bits(queue->rx_ring_dma));
-		queue_writel(queue, TBQP, lower_32_bits(queue->tx_ring_dma));
+		rxq = &bp->ctx->rxq[q];
+		txq = &bp->ctx->txq[q];
+		queue_writel(queue, RBQP, lower_32_bits(rxq->ring_dma));
+		queue_writel(queue, TBQP, lower_32_bits(txq->ring_dma));
 	}
 }
 
@@ -648,11 +677,12 @@ static bool macb_tx_lpi_set(struct macb *bp, bool enable)
 
 static bool macb_tx_all_queues_idle(struct macb *bp)
 {
-	struct macb_queue *queue;
+	struct macb_txq *txq;
 	unsigned int q;
 
-	for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
-		if (READ_ONCE(queue->tx_head) != READ_ONCE(queue->tx_tail))
+	for (q = 0; q < bp->num_queues; ++q) {
+		txq = &bp->ctx->txq[q];
+		if (READ_ONCE(txq->head) != READ_ONCE(txq->tail))
 			return false;
 	}
 	return true;
@@ -799,6 +829,7 @@ static void gem_shuffle_tx_one_ring(struct macb_queue *queue)
 	struct macb_tx_skb tx_skb, *skb_curr, *skb_next;
 	struct macb_dma_desc *desc_curr, *desc_next;
 	unsigned int i, cycles, shift, curr, next;
+	struct macb_txq *txq = macb_txq(queue);
 	struct macb *bp = queue->bp;
 	unsigned char desc[24];
 	unsigned long flags;
@@ -809,17 +840,17 @@ static void gem_shuffle_tx_one_ring(struct macb_queue *queue)
 		return;
 
 	spin_lock_irqsave(&queue->tx_ptr_lock, flags);
-	head = queue->tx_head;
-	tail = queue->tx_tail;
-	ring_size = bp->tx_ring_size;
+	head = txq->head;
+	tail = txq->tail;
+	ring_size = bp->ctx->tx_ring_size;
 	count = CIRC_CNT(head, tail, ring_size);
 
 	if (!(tail % ring_size))
 		goto unlock;
 
 	if (!count) {
-		queue->tx_head = 0;
-		queue->tx_tail = 0;
+		txq->head = 0;
+		txq->tail = 0;
 		goto unlock;
 	}
 
@@ -863,8 +894,8 @@ static void gem_shuffle_tx_one_ring(struct macb_queue *queue)
 		       sizeof(struct macb_tx_skb));
 	}
 
-	queue->tx_head = count;
-	queue->tx_tail = 0;
+	txq->head = count;
+	txq->tail = 0;
 
 	/* Make descriptor updates visible to hardware */
 	wmb();
@@ -1257,6 +1288,7 @@ static void macb_tx_error_task(struct work_struct *work)
 	struct macb_queue *queue = container_of(work, struct macb_queue,
 						tx_error_task);
 	unsigned int q = queue - queue->bp->queues;
+	struct macb_txq *txq = macb_txq(queue);
 	struct macb *bp = queue->bp;
 	struct macb_tx_skb *tx_skb;
 	struct macb_dma_desc *desc;
@@ -1268,7 +1300,7 @@ static void macb_tx_error_task(struct work_struct *work)
 	u32 bytes = 0;
 
 	netdev_vdbg(bp->netdev, "macb_tx_error_task: q = %u, t = %u, h = %u\n",
-		    q, queue->tx_tail, queue->tx_head);
+		    q, txq->tail, txq->head);
 
 	/* Prevent the queue NAPI TX poll from running, as it calls
 	 * macb_tx_complete(), which in turn may call netif_wake_subqueue().
@@ -1295,7 +1327,7 @@ static void macb_tx_error_task(struct work_struct *work)
 	/* Treat frames in TX queue including the ones that caused the error.
 	 * Free transmit buffers in upper layer.
 	 */
-	for (tail = queue->tx_tail; tail != queue->tx_head; tail++) {
+	for (tail = txq->tail; tail != txq->head; tail++) {
 		u32	ctrl;
 
 		desc = macb_tx_desc(queue, tail);
@@ -1353,10 +1385,10 @@ static void macb_tx_error_task(struct work_struct *work)
 	wmb();
 
 	/* Reinitialize the TX desc queue */
-	queue_writel(queue, TBQP, lower_32_bits(queue->tx_ring_dma));
+	queue_writel(queue, TBQP, lower_32_bits(txq->ring_dma));
 	/* Make TX ring reflect state of hardware */
-	queue->tx_head = 0;
-	queue->tx_tail = 0;
+	txq->head = 0;
+	txq->tail = 0;
 
 	/* Housework before enabling TX IRQ */
 	macb_writel(bp, TSR, macb_readl(bp, TSR));
@@ -1406,6 +1438,7 @@ static bool ptp_one_step_sync(struct sk_buff *skb)
 static int macb_tx_complete(struct macb_queue *queue, int budget)
 {
 	struct macb *bp = queue->bp;
+	struct macb_txq *txq = macb_txq(queue);
 	unsigned int q = queue - bp->queues;
 	unsigned long flags;
 	unsigned int tail;
@@ -1414,8 +1447,8 @@ static int macb_tx_complete(struct macb_queue *queue, int budget)
 	u32 bytes = 0;
 
 	spin_lock_irqsave(&queue->tx_ptr_lock, flags);
-	head = queue->tx_head;
-	for (tail = queue->tx_tail; tail != head && packets < budget; tail++) {
+	head = txq->head;
+	for (tail = txq->tail; tail != head && packets < budget; tail++) {
 		struct macb_tx_skb	*tx_skb;
 		struct sk_buff		*skb;
 		struct macb_dma_desc	*desc;
@@ -1471,10 +1504,10 @@ static int macb_tx_complete(struct macb_queue *queue, int budget)
 	netdev_tx_completed_queue(netdev_get_tx_queue(bp->netdev, q),
 				  packets, bytes);
 
-	queue->tx_tail = tail;
+	txq->tail = tail;
 	if (__netif_subqueue_stopped(bp->netdev, q) &&
-	    CIRC_CNT(queue->tx_head, queue->tx_tail,
-		     bp->tx_ring_size) <= MACB_TX_WAKEUP_THRESH(bp))
+	    CIRC_CNT(txq->head, txq->tail,
+		     bp->ctx->tx_ring_size) <= MACB_TX_WAKEUP_THRESH(bp))
 		netif_wake_subqueue(bp->netdev, q);
 	spin_unlock_irqrestore(&queue->tx_ptr_lock, flags);
 
@@ -1486,24 +1519,26 @@ static int macb_tx_complete(struct macb_queue *queue, int budget)
 
 static void gem_rx_refill(struct macb_queue *queue)
 {
+	struct macb_rxq *rxq = macb_rxq(queue);
 	struct macb *bp = queue->bp;
 	struct macb_dma_desc *desc;
 	struct sk_buff *skb;
 	unsigned int entry;
 	dma_addr_t paddr;
 
-	while (CIRC_SPACE(queue->rx_prepared_head, queue->rx_tail,
-			bp->rx_ring_size) > 0) {
-		entry = macb_rx_ring_wrap(bp, queue->rx_prepared_head);
+	while (CIRC_SPACE(rxq->prepared_head, rxq->tail,
+			  bp->ctx->rx_ring_size) > 0) {
+		entry = macb_rx_ring_wrap(bp, rxq->prepared_head);
 
 		/* Make hw descriptor updates visible to CPU */
 		rmb();
 
 		desc = macb_rx_desc(queue, entry);
 
-		if (!queue->rx_skbuff[entry]) {
+		if (!rxq->skbuff[entry]) {
 			/* allocate sk_buff for this free entry in ring */
-			skb = netdev_alloc_skb(bp->netdev, bp->rx_buffer_size);
+			skb = netdev_alloc_skb(bp->netdev,
+					       bp->ctx->rx_buffer_size);
 			if (unlikely(!skb)) {
 				netdev_err(bp->netdev,
 					   "Unable to allocate sk_buff\n");
@@ -1512,16 +1547,16 @@ static void gem_rx_refill(struct macb_queue *queue)
 
 			/* now fill corresponding descriptor entry */
 			paddr = dma_map_single(&bp->pdev->dev, skb->data,
-					       bp->rx_buffer_size,
+					       bp->ctx->rx_buffer_size,
 					       DMA_FROM_DEVICE);
 			if (dma_mapping_error(&bp->pdev->dev, paddr)) {
 				dev_kfree_skb(skb);
 				break;
 			}
 
-			queue->rx_skbuff[entry] = skb;
+			rxq->skbuff[entry] = skb;
 
-			if (entry == bp->rx_ring_size - 1)
+			if (entry == bp->ctx->rx_ring_size - 1)
 				paddr |= MACB_BIT(RX_WRAP);
 			desc->ctrl = 0;
 			/* Setting addr clears RX_USED and allows reception,
@@ -1548,14 +1583,14 @@ static void gem_rx_refill(struct macb_queue *queue)
 			dma_wmb();
 			desc->addr &= ~MACB_BIT(RX_USED);
 		}
-		queue->rx_prepared_head++;
+		rxq->prepared_head++;
 	}
 
 	/* Make descriptor updates visible to hardware */
 	wmb();
 
 	netdev_vdbg(bp->netdev, "rx ring: queue: %p, prepared head %d, tail %d\n",
-		    queue, queue->rx_prepared_head, queue->rx_tail);
+		    queue, rxq->prepared_head, rxq->tail);
 }
 
 /* Mark DMA descriptors from begin up to and not including end as unused */
@@ -1582,6 +1617,7 @@ static void discard_partial_frame(struct macb_queue *queue, unsigned int begin,
 static int gem_rx(struct macb_queue *queue, struct napi_struct *napi,
 		  int budget)
 {
+	struct macb_rxq *rxq = macb_rxq(queue);
 	struct macb *bp = queue->bp;
 	struct macb_dma_desc *desc;
 	struct sk_buff *skb;
@@ -1594,7 +1630,7 @@ static int gem_rx(struct macb_queue *queue, struct napi_struct *napi,
 		dma_addr_t addr;
 		bool rxused;
 
-		entry = macb_rx_ring_wrap(bp, queue->rx_tail);
+		entry = macb_rx_ring_wrap(bp, rxq->tail);
 		desc = macb_rx_desc(queue, entry);
 
 		/* Make hw descriptor updates visible to CPU */
@@ -1611,7 +1647,7 @@ static int gem_rx(struct macb_queue *queue, struct napi_struct *napi,
 
 		ctrl = desc->ctrl;
 
-		queue->rx_tail++;
+		rxq->tail++;
 		count++;
 
 		if (!(ctrl & MACB_BIT(RX_SOF) && ctrl & MACB_BIT(RX_EOF))) {
@@ -1621,7 +1657,7 @@ static int gem_rx(struct macb_queue *queue, struct napi_struct *napi,
 			queue->stats.rx_dropped++;
 			break;
 		}
-		skb = queue->rx_skbuff[entry];
+		skb = rxq->skbuff[entry];
 		if (unlikely(!skb)) {
 			netdev_err(bp->netdev,
 				   "inconsistent Rx descriptor chain\n");
@@ -1630,14 +1666,14 @@ static int gem_rx(struct macb_queue *queue, struct napi_struct *napi,
 			break;
 		}
 		/* now everything is ready for receiving packet */
-		queue->rx_skbuff[entry] = NULL;
+		rxq->skbuff[entry] = NULL;
 		len = ctrl & bp->rx_frm_len_mask;
 
 		netdev_vdbg(bp->netdev, "gem_rx %u (len %u)\n", entry, len);
 
 		skb_put(skb, len);
 		dma_unmap_single(&bp->pdev->dev, addr,
-				 bp->rx_buffer_size, DMA_FROM_DEVICE);
+				 bp->ctx->rx_buffer_size, DMA_FROM_DEVICE);
 
 		skb->protocol = eth_type_trans(skb, bp->netdev);
 		skb_checksum_none_assert(skb);
@@ -1717,7 +1753,7 @@ static int macb_rx_frame(struct macb_queue *queue, struct napi_struct *napi,
 	skb_put(skb, len);
 
 	for (frag = first_frag; ; frag++) {
-		unsigned int frag_len = bp->rx_buffer_size;
+		unsigned int frag_len = bp->ctx->rx_buffer_size;
 
 		if (offset + frag_len > len) {
 			if (unlikely(frag != last_frag)) {
@@ -1729,7 +1765,7 @@ static int macb_rx_frame(struct macb_queue *queue, struct napi_struct *napi,
 		skb_copy_to_linear_data_offset(skb, offset,
 					       macb_rx_buffer(queue, frag),
 					       frag_len);
-		offset += bp->rx_buffer_size;
+		offset += bp->ctx->rx_buffer_size;
 		desc = macb_rx_desc(queue, frag);
 		desc->addr &= ~MACB_BIT(RX_USED);
 
@@ -1754,32 +1790,34 @@ static int macb_rx_frame(struct macb_queue *queue, struct napi_struct *napi,
 
 static inline void macb_init_rx_ring(struct macb_queue *queue)
 {
+	struct macb_rxq *rxq = macb_rxq(queue);
 	struct macb_dma_desc *desc = NULL;
 	struct macb *bp = queue->bp;
 	dma_addr_t addr;
 	int i;
 
-	addr = queue->rx_buffers_dma;
-	for (i = 0; i < bp->rx_ring_size; i++) {
+	addr = rxq->buffers_dma;
+	for (i = 0; i < bp->ctx->rx_ring_size; i++) {
 		desc = macb_rx_desc(queue, i);
 		macb_set_addr(bp, desc, addr);
 		desc->ctrl = 0;
-		addr += bp->rx_buffer_size;
+		addr += bp->ctx->rx_buffer_size;
 	}
 	desc->addr |= MACB_BIT(RX_WRAP);
-	queue->rx_tail = 0;
+	rxq->tail = 0;
 }
 
 static int macb_rx(struct macb_queue *queue, struct napi_struct *napi,
 		   int budget)
 {
+	struct macb_rxq *rxq = macb_rxq(queue);
 	struct macb *bp = queue->bp;
 	bool reset_rx_queue = false;
 	int first_frag = -1;
 	unsigned int tail;
 	int received = 0;
 
-	for (tail = queue->rx_tail; budget > 0; tail++) {
+	for (tail = rxq->tail; budget > 0; tail++) {
 		struct macb_dma_desc *desc = macb_rx_desc(queue, tail);
 		u32 ctrl;
 
@@ -1833,7 +1871,7 @@ static int macb_rx(struct macb_queue *queue, struct napi_struct *napi,
 		macb_writel(bp, NCR, ctrl & ~MACB_BIT(RE));
 
 		macb_init_rx_ring(queue);
-		queue_writel(queue, RBQP, queue->rx_ring_dma);
+		queue_writel(queue, RBQP, rxq->ring_dma);
 
 		macb_writel(bp, NCR, ctrl | MACB_BIT(RE));
 
@@ -1842,20 +1880,21 @@ static int macb_rx(struct macb_queue *queue, struct napi_struct *napi,
 	}
 
 	if (first_frag != -1)
-		queue->rx_tail = first_frag;
+		rxq->tail = first_frag;
 	else
-		queue->rx_tail = tail;
+		rxq->tail = tail;
 
 	return received;
 }
 
 static bool macb_rx_pending(struct macb_queue *queue)
 {
+	struct macb_rxq *rxq = macb_rxq(queue);
 	struct macb *bp = queue->bp;
 	struct macb_dma_desc *desc;
 	unsigned int entry;
 
-	entry = macb_rx_ring_wrap(bp, queue->rx_tail);
+	entry = macb_rx_ring_wrap(bp, rxq->tail);
 	desc = macb_rx_desc(queue, entry);
 
 	/* Make hw descriptor updates visible to CPU */
@@ -1903,18 +1942,19 @@ static int macb_rx_poll(struct napi_struct *napi, int budget)
 
 static void macb_tx_restart(struct macb_queue *queue)
 {
+	struct macb_txq *txq = macb_txq(queue);
 	struct macb *bp = queue->bp;
 	unsigned int head_idx, tbqp;
 	unsigned long flags;
 
 	spin_lock_irqsave(&queue->tx_ptr_lock, flags);
 
-	if (queue->tx_head == queue->tx_tail)
+	if (txq->head == txq->tail)
 		goto out_tx_ptr_unlock;
 
 	tbqp = queue_readl(queue, TBQP) / macb_dma_desc_get_size(bp);
 	tbqp = macb_adj_dma_desc_idx(bp, macb_tx_ring_wrap(bp, tbqp));
-	head_idx = macb_adj_dma_desc_idx(bp, macb_tx_ring_wrap(bp, queue->tx_head));
+	head_idx = macb_adj_dma_desc_idx(bp, macb_tx_ring_wrap(bp, txq->head));
 
 	if (tbqp == head_idx)
 		goto out_tx_ptr_unlock;
@@ -1929,15 +1969,16 @@ static void macb_tx_restart(struct macb_queue *queue)
 
 static bool macb_tx_complete_pending(struct macb_queue *queue)
 {
+	struct macb_txq *txq = macb_txq(queue);
 	bool retval = false;
 	unsigned long flags;
 
 	spin_lock_irqsave(&queue->tx_ptr_lock, flags);
-	if (queue->tx_head != queue->tx_tail) {
+	if (txq->head != txq->tail) {
 		/* Make hw descriptor updates visible to CPU */
 		rmb();
 
-		if (macb_tx_desc(queue, queue->tx_tail)->ctrl & MACB_BIT(TX_USED))
+		if (macb_tx_desc(queue, txq->tail)->ctrl & MACB_BIT(TX_USED))
 			retval = true;
 	}
 	spin_unlock_irqrestore(&queue->tx_ptr_lock, flags);
@@ -2199,8 +2240,9 @@ static unsigned int macb_tx_map(struct macb *bp,
 				struct sk_buff *skb,
 				unsigned int hdrlen)
 {
+	struct macb_txq *txq = macb_txq(queue);
 	unsigned int f, nr_frags = skb_shinfo(skb)->nr_frags;
-	unsigned int len, i, tx_head = queue->tx_head;
+	unsigned int len, i, tx_head = txq->head;
 	u32 ctrl, lso_ctrl = 0, seq_ctrl = 0;
 	unsigned int eof = 1, mss_mfs = 0;
 	struct macb_tx_skb *tx_skb = NULL;
@@ -2320,11 +2362,12 @@ static unsigned int macb_tx_map(struct macb *bp,
 			ctrl |= MACB_BIT(TX_LAST);
 			eof = 0;
 		}
-		if (unlikely(macb_tx_ring_wrap(bp, i) == bp->tx_ring_size - 1))
+		if (unlikely(macb_tx_ring_wrap(bp, i) ==
+				bp->ctx->tx_ring_size - 1))
 			ctrl |= MACB_BIT(TX_WRAP);
 
 		/* First descriptor is header descriptor */
-		if (i == queue->tx_head) {
+		if (i == txq->head) {
 			ctrl |= MACB_BF(TX_LSO, lso_ctrl);
 			ctrl |= MACB_BF(TX_TCP_SEQ_SRC, seq_ctrl);
 			if ((bp->netdev->features & NETIF_F_HW_CSUM) &&
@@ -2344,16 +2387,16 @@ static unsigned int macb_tx_map(struct macb *bp,
 		 */
 		wmb();
 		desc->ctrl = ctrl;
-	} while (i != queue->tx_head);
+	} while (i != txq->head);
 
-	queue->tx_head = tx_head;
+	txq->head = tx_head;
 
 	return 0;
 
 dma_error:
 	netdev_err(bp->netdev, "TX DMA map failed\n");
 
-	for (i = queue->tx_head; i != tx_head; i++) {
+	for (i = txq->head; i != tx_head; i++) {
 		tx_skb = macb_tx_skb(queue, i);
 
 		macb_tx_unmap(bp, tx_skb, 0);
@@ -2473,6 +2516,7 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb,
 	unsigned int q = skb_get_queue_mapping(skb);
 	unsigned int desc_cnt, nr_frags, frag_size, f;
 	struct macb_queue *queue = &bp->queues[q];
+	struct macb_txq *txq = macb_txq(queue);
 	netdev_tx_t ret = NETDEV_TX_OK;
 	unsigned int hdrlen;
 	unsigned long flags;
@@ -2536,11 +2580,11 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb,
 	spin_lock_irqsave(&queue->tx_ptr_lock, flags);
 
 	/* This is a hard error, log it. */
-	if (CIRC_SPACE(queue->tx_head, queue->tx_tail,
-		       bp->tx_ring_size) < desc_cnt) {
+	if (CIRC_SPACE(txq->head, txq->tail,
+		       bp->ctx->tx_ring_size) < desc_cnt) {
 		netif_stop_subqueue(netdev, q);
 		netdev_dbg(netdev, "tx_head = %u, tx_tail = %u\n",
-			   queue->tx_head, queue->tx_tail);
+			   txq->head, txq->tail);
 		ret = NETDEV_TX_BUSY;
 		goto unlock;
 	}
@@ -2562,7 +2606,7 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb,
 	macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(TSTART));
 	spin_unlock(&bp->lock);
 
-	if (CIRC_SPACE(queue->tx_head, queue->tx_tail, bp->tx_ring_size) < 1)
+	if (CIRC_SPACE(txq->head, txq->tail, bp->ctx->tx_ring_size) < 1)
 		netif_stop_subqueue(netdev, q);
 
 unlock:
@@ -2574,38 +2618,42 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb,
 static void macb_init_rx_buffer_size(struct macb *bp, size_t size)
 {
 	if (!macb_is_gem(bp)) {
-		bp->rx_buffer_size = MACB_RX_BUFFER_SIZE;
+		bp->ctx->rx_buffer_size = MACB_RX_BUFFER_SIZE;
 	} else {
-		bp->rx_buffer_size = MIN(size, RX_BUFFER_MAX);
+		bp->ctx->rx_buffer_size = MIN(size, RX_BUFFER_MAX);
 
-		if (bp->rx_buffer_size % RX_BUFFER_MULTIPLE) {
+		if (bp->ctx->rx_buffer_size % RX_BUFFER_MULTIPLE) {
 			netdev_dbg(bp->netdev,
 				   "RX buffer must be multiple of %d bytes, expanding\n",
 				   RX_BUFFER_MULTIPLE);
-			bp->rx_buffer_size =
-				roundup(bp->rx_buffer_size, RX_BUFFER_MULTIPLE);
+			bp->ctx->rx_buffer_size =
+				roundup(bp->ctx->rx_buffer_size,
+					RX_BUFFER_MULTIPLE);
 		}
 	}
 
-	netdev_dbg(bp->netdev, "mtu [%u] rx_buffer_size [%zu]\n",
-		   bp->netdev->mtu, bp->rx_buffer_size);
+	netdev_dbg(bp->netdev, "mtu [%u] rx_buffer_size [%u]\n",
+		   bp->netdev->mtu, bp->ctx->rx_buffer_size);
 }
 
 static void gem_free_rx_buffers(struct macb *bp)
 {
-	struct sk_buff		*skb;
-	struct macb_dma_desc	*desc;
+	struct macb_dma_desc *desc;
 	struct macb_queue *queue;
-	dma_addr_t		addr;
+	struct macb_rxq *rxq;
+	struct sk_buff *skb;
+	dma_addr_t addr;
 	unsigned int q;
 	int i;
 
 	for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
-		if (!queue->rx_skbuff)
+		rxq = &bp->ctx->rxq[q];
+
+		if (!rxq->skbuff)
 			continue;
 
-		for (i = 0; i < bp->rx_ring_size; i++) {
-			skb = queue->rx_skbuff[i];
+		for (i = 0; i < bp->ctx->rx_ring_size; i++) {
+			skb = rxq->skbuff[i];
 
 			if (!skb)
 				continue;
@@ -2613,95 +2661,106 @@ static void gem_free_rx_buffers(struct macb *bp)
 			desc = macb_rx_desc(queue, i);
 			addr = macb_get_addr(bp, desc);
 
-			dma_unmap_single(&bp->pdev->dev, addr, bp->rx_buffer_size,
-					DMA_FROM_DEVICE);
+			dma_unmap_single(&bp->pdev->dev, addr,
+					 bp->ctx->rx_buffer_size,
+					 DMA_FROM_DEVICE);
 			dev_kfree_skb_any(skb);
 			skb = NULL;
 		}
 
-		kfree(queue->rx_skbuff);
-		queue->rx_skbuff = NULL;
+		kfree(rxq->skbuff);
+		rxq->skbuff = NULL;
 	}
 }
 
 static void macb_free_rx_buffers(struct macb *bp)
 {
-	struct macb_queue *queue = &bp->queues[0];
+	struct macb_rxq *rxq = &bp->ctx->rxq[0];
 
-	if (queue->rx_buffers) {
+	if (rxq->buffers) {
 		dma_free_coherent(&bp->pdev->dev,
-				  bp->rx_ring_size * bp->rx_buffer_size,
-				  queue->rx_buffers, queue->rx_buffers_dma);
-		queue->rx_buffers = NULL;
+				  bp->ctx->rx_ring_size *
+					bp->ctx->rx_buffer_size,
+				  rxq->buffers, rxq->buffers_dma);
+		rxq->buffers = NULL;
 	}
 }
 
 static unsigned int macb_tx_ring_size_per_queue(struct macb *bp)
 {
-	return macb_dma_desc_get_size(bp) * bp->tx_ring_size + bp->tx_bd_rd_prefetch;
+	return macb_dma_desc_get_size(bp) * bp->ctx->tx_ring_size +
+		bp->tx_bd_rd_prefetch;
 }
 
 static unsigned int macb_rx_ring_size_per_queue(struct macb *bp)
 {
-	return macb_dma_desc_get_size(bp) * bp->rx_ring_size + bp->rx_bd_rd_prefetch;
+	return macb_dma_desc_get_size(bp) * bp->ctx->rx_ring_size +
+		bp->rx_bd_rd_prefetch;
 }
 
 static void macb_free_consistent(struct macb *bp)
 {
 	struct device *dev = &bp->pdev->dev;
-	struct macb_queue *queue;
+	struct macb_txq *txq;
+	struct macb_rxq *rxq;
 	unsigned int q;
 	size_t size;
 
 	bp->macbgem_ops.mog_free_rx_buffers(bp);
 
+	txq = &bp->ctx->txq[0];
 	size = bp->num_queues * macb_tx_ring_size_per_queue(bp);
-	dma_free_coherent(dev, size, bp->queues[0].tx_ring, bp->queues[0].tx_ring_dma);
+	dma_free_coherent(dev, size, txq->ring, txq->ring_dma);
 
+	rxq = &bp->ctx->rxq[0];
 	size = bp->num_queues * macb_rx_ring_size_per_queue(bp);
-	dma_free_coherent(dev, size, bp->queues[0].rx_ring, bp->queues[0].rx_ring_dma);
+	dma_free_coherent(dev, size, rxq->ring, rxq->ring_dma);
 
-	for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
-		kfree(queue->tx_skb);
-		queue->tx_skb = NULL;
-		queue->tx_ring = NULL;
-		queue->rx_ring = NULL;
+	for (q = 0; q < bp->num_queues; ++q) {
+		txq = &bp->ctx->txq[q];
+		rxq = &bp->ctx->rxq[q];
+
+		kfree(txq->skb);
+		txq->skb = NULL;
+		txq->ring = NULL;
+		rxq->ring = NULL;
 	}
 }
 
 static int gem_alloc_rx_buffers(struct macb *bp)
 {
-	struct macb_queue *queue;
+	struct macb_rxq *rxq;
 	unsigned int q;
 	int size;
 
-	for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
-		size = bp->rx_ring_size * sizeof(struct sk_buff *);
-		queue->rx_skbuff = kzalloc(size, GFP_KERNEL);
-		if (!queue->rx_skbuff)
+	for (q = 0; q < bp->num_queues; ++q) {
+		rxq = &bp->ctx->rxq[q];
+		size = bp->ctx->rx_ring_size * sizeof(struct sk_buff *);
+		rxq->skbuff = kzalloc(size, GFP_KERNEL);
+		if (!rxq->skbuff)
 			return -ENOMEM;
 		else
 			netdev_dbg(bp->netdev,
 				   "Allocated %d RX struct sk_buff entries at %p\n",
-				   bp->rx_ring_size, queue->rx_skbuff);
+				   bp->ctx->rx_ring_size, rxq->skbuff);
 	}
 	return 0;
 }
 
 static int macb_alloc_rx_buffers(struct macb *bp)
 {
-	struct macb_queue *queue = &bp->queues[0];
+	struct macb_rxq *rxq = &bp->ctx->rxq[0];
 	int size;
 
-	size = bp->rx_ring_size * bp->rx_buffer_size;
-	queue->rx_buffers = dma_alloc_coherent(&bp->pdev->dev, size,
-					    &queue->rx_buffers_dma, GFP_KERNEL);
-	if (!queue->rx_buffers)
+	size = bp->ctx->rx_ring_size * bp->ctx->rx_buffer_size;
+	rxq->buffers = dma_alloc_coherent(&bp->pdev->dev, size,
+					  &rxq->buffers_dma, GFP_KERNEL);
+	if (!rxq->buffers)
 		return -ENOMEM;
 
 	netdev_dbg(bp->netdev,
 		   "Allocated RX buffers of %d bytes at %08lx (mapped %p)\n",
-		   size, (unsigned long)queue->rx_buffers_dma, queue->rx_buffers);
+		   size, (unsigned long)rxq->buffers_dma, rxq->buffers);
 	return 0;
 }
 
@@ -2709,7 +2768,8 @@ static int macb_alloc_consistent(struct macb *bp)
 {
 	struct device *dev = &bp->pdev->dev;
 	dma_addr_t tx_dma, rx_dma;
-	struct macb_queue *queue;
+	struct macb_txq *txq;
+	struct macb_rxq *rxq;
 	unsigned int q;
 	void *tx, *rx;
 	size_t size;
@@ -2735,16 +2795,19 @@ static int macb_alloc_consistent(struct macb *bp)
 	netdev_dbg(bp->netdev, "Allocated %zu bytes for %u RX rings at %08lx (mapped %p)\n",
 		   size, bp->num_queues, (unsigned long)rx_dma, rx);
 
-	for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
-		queue->tx_ring = tx + macb_tx_ring_size_per_queue(bp) * q;
-		queue->tx_ring_dma = tx_dma + macb_tx_ring_size_per_queue(bp) * q;
+	for (q = 0; q < bp->num_queues; ++q) {
+		txq = &bp->ctx->txq[q];
+		rxq = &bp->ctx->rxq[q];
 
-		queue->rx_ring = rx + macb_rx_ring_size_per_queue(bp) * q;
-		queue->rx_ring_dma = rx_dma + macb_rx_ring_size_per_queue(bp) * q;
+		txq->ring = tx + macb_tx_ring_size_per_queue(bp) * q;
+		txq->ring_dma = tx_dma + macb_tx_ring_size_per_queue(bp) * q;
 
-		size = bp->tx_ring_size * sizeof(struct macb_tx_skb);
-		queue->tx_skb = kmalloc(size, GFP_KERNEL);
-		if (!queue->tx_skb)
+		rxq->ring = rx + macb_rx_ring_size_per_queue(bp) * q;
+		rxq->ring_dma = rx_dma + macb_rx_ring_size_per_queue(bp) * q;
+
+		size = bp->ctx->tx_ring_size * sizeof(struct macb_tx_skb);
+		txq->skb = kmalloc(size, GFP_KERNEL);
+		if (!txq->skb)
 			goto out_err;
 	}
 	if (bp->macbgem_ops.mog_alloc_rx_buffers(bp))
@@ -2759,8 +2822,10 @@ static int macb_alloc_consistent(struct macb *bp)
 
 static void gem_init_rx_ring(struct macb_queue *queue)
 {
-	queue->rx_tail = 0;
-	queue->rx_prepared_head = 0;
+	struct macb_rxq *rxq = macb_rxq(queue);
+
+	rxq->tail = 0;
+	rxq->prepared_head = 0;
 
 	gem_rx_refill(queue);
 }
@@ -2769,18 +2834,20 @@ static void gem_init_rings(struct macb *bp)
 {
 	struct macb_queue *queue;
 	struct macb_dma_desc *desc = NULL;
+	struct macb_txq *txq;
 	unsigned int q;
 	int i;
 
 	for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
-		for (i = 0; i < bp->tx_ring_size; i++) {
+		txq = &bp->ctx->txq[q];
+		for (i = 0; i < bp->ctx->tx_ring_size; i++) {
 			desc = macb_tx_desc(queue, i);
 			macb_set_addr(bp, desc, 0);
 			desc->ctrl = MACB_BIT(TX_USED);
 		}
 		desc->ctrl |= MACB_BIT(TX_WRAP);
-		queue->tx_head = 0;
-		queue->tx_tail = 0;
+		txq->head = 0;
+		txq->tail = 0;
 
 		gem_init_rx_ring(queue);
 	}
@@ -2788,18 +2855,19 @@ static void gem_init_rings(struct macb *bp)
 
 static void macb_init_rings(struct macb *bp)
 {
-	int i;
+	struct macb_txq *txq = &bp->ctx->txq[0];
 	struct macb_dma_desc *desc = NULL;
+	int i;
 
 	macb_init_rx_ring(&bp->queues[0]);
 
-	for (i = 0; i < bp->tx_ring_size; i++) {
+	for (i = 0; i < bp->ctx->tx_ring_size; i++) {
 		desc = macb_tx_desc(&bp->queues[0], i);
 		macb_set_addr(bp, desc, 0);
 		desc->ctrl = MACB_BIT(TX_USED);
 	}
-	bp->queues[0].tx_head = 0;
-	bp->queues[0].tx_tail = 0;
+	txq->head = 0;
+	txq->tail = 0;
 	desc->ctrl |= MACB_BIT(TX_WRAP);
 }
 
@@ -2914,7 +2982,7 @@ static void macb_configure_dma(struct macb *bp)
 	unsigned int q;
 	u32 dmacfg;
 
-	buffer_size = bp->rx_buffer_size / RX_BUFFER_MULTIPLE;
+	buffer_size = bp->ctx->rx_buffer_size / RX_BUFFER_MULTIPLE;
 	if (macb_is_gem(bp)) {
 		dmacfg = gem_readl(bp, DMACFG) & ~GEM_BF(RXBS, -1L);
 		for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
@@ -3121,14 +3189,22 @@ static int macb_open(struct net_device *netdev)
 	if (err < 0)
 		return err;
 
+	bp->ctx = kzalloc_obj(*bp->ctx);
+	if (!bp->ctx) {
+		err = -ENOMEM;
+		goto pm_exit;
+	}
+
 	/* RX buffers initialization */
 	macb_init_rx_buffer_size(bp, bufsz);
+	bp->ctx->rx_ring_size = bp->configured_rx_ring_size;
+	bp->ctx->tx_ring_size = bp->configured_tx_ring_size;
 
 	err = macb_alloc_consistent(bp);
 	if (err) {
 		netdev_err(netdev, "Unable to allocate DMA memory (error %d)\n",
 			   err);
-		goto pm_exit;
+		goto free_ctx;
 	}
 
 	bp->macbgem_ops.mog_init_rings(bp);
@@ -3170,6 +3246,9 @@ static int macb_open(struct net_device *netdev)
 		napi_disable(&queue->napi_tx);
 	}
 	macb_free_consistent(bp);
+free_ctx:
+	kfree(bp->ctx);
+	bp->ctx = NULL;
 pm_exit:
 	pm_runtime_put_sync(&bp->pdev->dev);
 	return err;
@@ -3203,6 +3282,8 @@ static int macb_close(struct net_device *netdev)
 	spin_unlock_irqrestore(&bp->lock, flags);
 
 	macb_free_consistent(bp);
+	kfree(bp->ctx);
+	bp->ctx = NULL;
 
 	if (bp->ptp_info)
 		bp->ptp_info->ptp_remove(netdev);
@@ -3568,15 +3649,22 @@ static int macb_get_regs_len(struct net_device *netdev)
 static void macb_get_regs(struct net_device *netdev, struct ethtool_regs *regs,
 			  void *p)
 {
+	dma_addr_t tx_dma_tail = 0, tx_dma_head = 0;
 	struct macb *bp = netdev_priv(netdev);
-	unsigned int tail, head;
+	unsigned int tail = 0, head = 0;
+	struct macb_txq *txq;
 	u32 *regs_buff = p;
 
 	regs->version = (macb_readl(bp, MID) & ((1 << MACB_REV_SIZE) - 1))
 			| MACB_GREGS_VERSION;
 
-	tail = macb_tx_ring_wrap(bp, bp->queues[0].tx_tail);
-	head = macb_tx_ring_wrap(bp, bp->queues[0].tx_head);
+	if (bp->ctx) {
+		txq = &bp->ctx->txq[0];
+		tail = macb_tx_ring_wrap(bp, txq->tail);
+		head = macb_tx_ring_wrap(bp, txq->head);
+		tx_dma_tail = macb_tx_dma(&bp->queues[0], tail);
+		tx_dma_head = macb_tx_dma(&bp->queues[0], head);
+	}
 
 	regs_buff[0]  = macb_readl(bp, NCR);
 	regs_buff[1]  = macb_or_gem_readl(bp, NCFGR);
@@ -3589,8 +3677,8 @@ static void macb_get_regs(struct net_device *netdev, struct ethtool_regs *regs,
 
 	regs_buff[8]  = tail;
 	regs_buff[9]  = head;
-	regs_buff[10] = macb_tx_dma(&bp->queues[0], tail);
-	regs_buff[11] = macb_tx_dma(&bp->queues[0], head);
+	regs_buff[10] = tx_dma_tail;
+	regs_buff[11] = tx_dma_head;
 
 	if (!(bp->caps & MACB_CAPS_USRIO_DISABLED))
 		regs_buff[12] = macb_or_gem_readl(bp, USRIO);
@@ -3655,8 +3743,8 @@ static void macb_get_ringparam(struct net_device *netdev,
 	ring->rx_max_pending = MAX_RX_RING_SIZE;
 	ring->tx_max_pending = MAX_TX_RING_SIZE;
 
-	ring->rx_pending = bp->rx_ring_size;
-	ring->tx_pending = bp->tx_ring_size;
+	ring->rx_pending = bp->configured_rx_ring_size;
+	ring->tx_pending = bp->configured_tx_ring_size;
 }
 
 static int macb_set_ringparam(struct net_device *netdev,
@@ -3679,8 +3767,8 @@ static int macb_set_ringparam(struct net_device *netdev,
 			      MIN_TX_RING_SIZE, MAX_TX_RING_SIZE);
 	new_tx_size = roundup_pow_of_two(new_tx_size);
 
-	if ((new_tx_size == bp->tx_ring_size) &&
-	    (new_rx_size == bp->rx_ring_size)) {
+	if (new_tx_size == bp->configured_tx_ring_size &&
+	    new_rx_size == bp->configured_rx_ring_size) {
 		/* nothing to do */
 		return 0;
 	}
@@ -3690,8 +3778,8 @@ static int macb_set_ringparam(struct net_device *netdev,
 		macb_close(bp->netdev);
 	}
 
-	bp->rx_ring_size = new_rx_size;
-	bp->tx_ring_size = new_tx_size;
+	bp->configured_rx_ring_size = new_rx_size;
+	bp->configured_tx_ring_size = new_tx_size;
 
 	if (reset)
 		macb_open(bp->netdev);
@@ -4698,9 +4786,6 @@ static int macb_init_dflt(struct platform_device *pdev)
 	int err;
 	u32 val, reg;
 
-	bp->tx_ring_size = DEFAULT_TX_RING_SIZE;
-	bp->rx_ring_size = DEFAULT_RX_RING_SIZE;
-
 	/* set the queue register mapping once for all: queue0 has a special
 	 * register mapping but we don't want to test the queue index then
 	 * compute the corresponding register offset at run time.
@@ -4906,26 +4991,26 @@ static struct sifive_fu540_macb_mgmt *mgmt;
 
 static int at91ether_alloc_coherent(struct macb *bp)
 {
-	struct macb_queue *queue = &bp->queues[0];
+	struct macb_rxq *rxq = &bp->ctx->rxq[0];
 
-	queue->rx_ring = dma_alloc_coherent(&bp->pdev->dev,
-					    (AT91ETHER_MAX_RX_DESCR *
-					     macb_dma_desc_get_size(bp)),
-					    &queue->rx_ring_dma, GFP_KERNEL);
-	if (!queue->rx_ring)
+	rxq->ring = dma_alloc_coherent(&bp->pdev->dev,
+				       (AT91ETHER_MAX_RX_DESCR *
+					macb_dma_desc_get_size(bp)),
+				       &rxq->ring_dma, GFP_KERNEL);
+	if (!rxq->ring)
 		return -ENOMEM;
 
-	queue->rx_buffers = dma_alloc_coherent(&bp->pdev->dev,
-					       AT91ETHER_MAX_RX_DESCR *
-					       AT91ETHER_MAX_RBUFF_SZ,
-					       &queue->rx_buffers_dma,
-					       GFP_KERNEL);
-	if (!queue->rx_buffers) {
+	rxq->buffers = dma_alloc_coherent(&bp->pdev->dev,
+					  AT91ETHER_MAX_RX_DESCR *
+					  AT91ETHER_MAX_RBUFF_SZ,
+					  &rxq->buffers_dma,
+					  GFP_KERNEL);
+	if (!rxq->buffers) {
 		dma_free_coherent(&bp->pdev->dev,
 				  AT91ETHER_MAX_RX_DESCR *
 				  macb_dma_desc_get_size(bp),
-				  queue->rx_ring, queue->rx_ring_dma);
-		queue->rx_ring = NULL;
+				  rxq->ring, rxq->ring_dma);
+		rxq->ring = NULL;
 		return -ENOMEM;
 	}
 
@@ -4934,22 +5019,22 @@ static int at91ether_alloc_coherent(struct macb *bp)
 
 static void at91ether_free_coherent(struct macb *bp)
 {
-	struct macb_queue *queue = &bp->queues[0];
+	struct macb_rxq *rxq = &bp->ctx->rxq[0];
 
-	if (queue->rx_ring) {
+	if (rxq->ring) {
 		dma_free_coherent(&bp->pdev->dev,
 				  AT91ETHER_MAX_RX_DESCR *
 				  macb_dma_desc_get_size(bp),
-				  queue->rx_ring, queue->rx_ring_dma);
-		queue->rx_ring = NULL;
+				  rxq->ring, rxq->ring_dma);
+		rxq->ring = NULL;
 	}
 
-	if (queue->rx_buffers) {
+	if (rxq->buffers) {
 		dma_free_coherent(&bp->pdev->dev,
 				  AT91ETHER_MAX_RX_DESCR *
 				  AT91ETHER_MAX_RBUFF_SZ,
-				  queue->rx_buffers, queue->rx_buffers_dma);
-		queue->rx_buffers = NULL;
+				  rxq->buffers, rxq->buffers_dma);
+		rxq->buffers = NULL;
 	}
 }
 
@@ -4957,6 +5042,7 @@ static void at91ether_free_coherent(struct macb *bp)
 static int at91ether_start(struct macb *bp)
 {
 	struct macb_queue *queue = &bp->queues[0];
+	struct macb_rxq *rxq = &bp->ctx->rxq[0];
 	struct macb_dma_desc *desc;
 	dma_addr_t addr;
 	u32 ctl;
@@ -4966,7 +5052,7 @@ static int at91ether_start(struct macb *bp)
 	if (ret)
 		return ret;
 
-	addr = queue->rx_buffers_dma;
+	addr = rxq->buffers_dma;
 	for (i = 0; i < AT91ETHER_MAX_RX_DESCR; i++) {
 		desc = macb_rx_desc(queue, i);
 		macb_set_addr(bp, desc, addr);
@@ -4978,10 +5064,10 @@ static int at91ether_start(struct macb *bp)
 	desc->addr |= MACB_BIT(RX_WRAP);
 
 	/* Reset buffer index */
-	queue->rx_tail = 0;
+	rxq->tail = 0;
 
 	/* Program address of descriptor list in Rx Buffer Queue register */
-	macb_writel(bp, RBQP, queue->rx_ring_dma);
+	macb_writel(bp, RBQP, rxq->ring_dma);
 
 	/* Enable Receive and Transmit */
 	ctl = macb_readl(bp, NCR);
@@ -5119,15 +5205,15 @@ static void at91ether_rx(struct net_device *netdev)
 {
 	struct macb *bp = netdev_priv(netdev);
 	struct macb_queue *queue = &bp->queues[0];
+	struct macb_rxq *rxq = &bp->ctx->rxq[0];
 	struct macb_dma_desc *desc;
 	unsigned char *p_recv;
 	struct sk_buff *skb;
 	unsigned int pktlen;
 
-	desc = macb_rx_desc(queue, queue->rx_tail);
+	desc = macb_rx_desc(queue, rxq->tail);
 	while (desc->addr & MACB_BIT(RX_USED)) {
-		p_recv = queue->rx_buffers +
-			 queue->rx_tail * AT91ETHER_MAX_RBUFF_SZ;
+		p_recv = rxq->buffers + rxq->tail * AT91ETHER_MAX_RBUFF_SZ;
 		pktlen = MACB_BF(RX_FRMLEN, desc->ctrl);
 		skb = netdev_alloc_skb(netdev, pktlen + 2);
 		if (skb) {
@@ -5149,12 +5235,12 @@ static void at91ether_rx(struct net_device *netdev)
 		desc->addr &= ~MACB_BIT(RX_USED);
 
 		/* wrap after last buffer */
-		if (queue->rx_tail == AT91ETHER_MAX_RX_DESCR - 1)
-			queue->rx_tail = 0;
+		if (rxq->tail == AT91ETHER_MAX_RX_DESCR - 1)
+			rxq->tail = 0;
 		else
-			queue->rx_tail++;
+			rxq->tail++;
 
-		desc = macb_rx_desc(queue, queue->rx_tail);
+		desc = macb_rx_desc(queue, rxq->tail);
 	}
 }
 
@@ -5807,6 +5893,8 @@ static int macb_probe(struct platform_device *pdev)
 	bp->rx_clk = rx_clk;
 	bp->tsu_clk = tsu_clk;
 	bp->jumbo_max_len = macb_config->jumbo_max_len;
+	bp->configured_rx_ring_size = DEFAULT_RX_RING_SIZE;
+	bp->configured_tx_ring_size = DEFAULT_TX_RING_SIZE;
 
 	if (!hw_is_gem(bp->regs, bp->native_io))
 		bp->max_tx_length = MACB_MAX_TX_LEN;

-- 
2.53.0


^ permalink raw reply related

* [PATCH net-next v2 07/14] net: macb: avoid macb_init_rx_buffer_size() modifying state
From: Théo Lebrun @ 2026-04-10 19:51 UTC (permalink / raw)
  To: Nicolas Ferre, Claudiu Beznea, Andrew Lunn, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Richard Cochran,
	Russell King
  Cc: Paolo Valerio, Conor Dooley, Nicolai Buchwitz,
	Vladimir Kondratiev, Gregory CLEMENT, Benoît Monin,
	Tawfik Bayouk, Thomas Petazzoni, Maxime Chevallier, netdev,
	linux-kernel, Théo Lebrun
In-Reply-To: <20260410-macb-context-v2-0-af39f71d40b6@bootlin.com>

macb_init_rx_buffer_size() takes the macb private data struct and
overrides its bp->ctx->rx_buffer_size. To make it usable with multiple
contexts, make it return its value.

Also, move the `bufsz` computation into it. The value is only used if
GEM, and for historical reason it currently lives in macb_open().

Signed-off-by: Théo Lebrun <theo.lebrun@bootlin.com>
---
 drivers/net/ethernet/cadence/macb_main.c | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index 3e596cbe9fc8..2eddc7892073 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -2615,25 +2615,26 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb,
 	return ret;
 }
 
-static void macb_init_rx_buffer_size(struct macb *bp, size_t size)
+static unsigned int macb_rx_buffer_size(struct macb *bp, unsigned int mtu)
 {
-	if (!macb_is_gem(bp)) {
-		bp->ctx->rx_buffer_size = MACB_RX_BUFFER_SIZE;
-	} else {
-		bp->ctx->rx_buffer_size = MIN(size, RX_BUFFER_MAX);
+	unsigned int size;
 
-		if (bp->ctx->rx_buffer_size % RX_BUFFER_MULTIPLE) {
+	if (!macb_is_gem(bp)) {
+		size = MACB_RX_BUFFER_SIZE;
+	} else {
+		size = mtu + ETH_HLEN + ETH_FCS_LEN + NET_IP_ALIGN;
+		size = MIN(size, RX_BUFFER_MAX);
+
+		if (size % RX_BUFFER_MULTIPLE) {
 			netdev_dbg(bp->netdev,
 				   "RX buffer must be multiple of %d bytes, expanding\n",
 				   RX_BUFFER_MULTIPLE);
-			bp->ctx->rx_buffer_size =
-				roundup(bp->ctx->rx_buffer_size,
-					RX_BUFFER_MULTIPLE);
+			size = roundup(size, RX_BUFFER_MULTIPLE);
 		}
 	}
 
-	netdev_dbg(bp->netdev, "mtu [%u] rx_buffer_size [%u]\n",
-		   bp->netdev->mtu, bp->ctx->rx_buffer_size);
+	netdev_dbg(bp->netdev, "mtu [%u] rx_buffer_size [%u]\n", mtu, size);
+	return size;
 }
 
 static void gem_free_rx_buffers(struct macb *bp)
@@ -3177,7 +3178,6 @@ static void macb_set_rx_mode(struct net_device *netdev)
 
 static int macb_open(struct net_device *netdev)
 {
-	size_t bufsz = netdev->mtu + ETH_HLEN + ETH_FCS_LEN + NET_IP_ALIGN;
 	struct macb *bp = netdev_priv(netdev);
 	struct macb_queue *queue;
 	unsigned int q;
@@ -3196,7 +3196,7 @@ static int macb_open(struct net_device *netdev)
 	}
 
 	/* RX buffers initialization */
-	macb_init_rx_buffer_size(bp, bufsz);
+	bp->ctx->rx_buffer_size = macb_rx_buffer_size(bp, netdev->mtu);
 	bp->ctx->rx_ring_size = bp->configured_rx_ring_size;
 	bp->ctx->tx_ring_size = bp->configured_tx_ring_size;
 

-- 
2.53.0


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox