Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH v3 net-next 10/15] net/sched: sch_fq_pie: annotate data-races in fq_pie_dump_stats()
From: Eric Dumazet @ 2026-04-10 18:22 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: Simon Horman, Jamal Hadi Salim, Jiri Pirko, netdev, eric.dumazet,
	Eric Dumazet
In-Reply-To: <20260410182257.774311-1-edumazet@google.com>

fq_codel_dump_stats() acquires the qdisc spinlock a bit too late.

Move this acquisition before we fill tc_fq_pie_xstats with live data.

Alternative would be to add READ_ONCE() and WRITE_ONCE() annotations,
but the spinlock is needed anyway.

Fixes: ec97ecf1ebe4 ("net: sched: add Flow Queue PIE packet scheduler")
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/sched/sch_fq_pie.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c
index 197f0df0a6eb06ab4ce25eefe01d32a35dbd84af..72f48fa4010bebbe6be212938b457db21ff3c5a0 100644
--- a/net/sched/sch_fq_pie.c
+++ b/net/sched/sch_fq_pie.c
@@ -509,18 +509,19 @@ static int fq_pie_dump(struct Qdisc *sch, struct sk_buff *skb)
 static int fq_pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
 {
 	struct fq_pie_sched_data *q = qdisc_priv(sch);
-	struct tc_fq_pie_xstats st = {
-		.packets_in	= q->stats.packets_in,
-		.overlimit	= q->stats.overlimit,
-		.overmemory	= q->overmemory,
-		.dropped	= q->stats.dropped,
-		.ecn_mark	= q->stats.ecn_mark,
-		.new_flow_count = q->new_flow_count,
-		.memory_usage   = q->memory_usage,
-	};
+	struct tc_fq_pie_xstats st = { 0 };
 	struct list_head *pos;
 
 	sch_tree_lock(sch);
+
+	st.packets_in	= q->stats.packets_in;
+	st.overlimit	= q->stats.overlimit;
+	st.overmemory	= q->overmemory;
+	st.dropped	= q->stats.dropped;
+	st.ecn_mark	= q->stats.ecn_mark;
+	st.new_flow_count = q->new_flow_count;
+	st.memory_usage   = q->memory_usage;
+
 	list_for_each(pos, &q->new_flows)
 		st.new_flows_len++;
 
-- 
2.53.0.1213.gd9a14994de-goog


^ permalink raw reply related

* [PATCH v3 net-next 13/15] net/sched: sch_cake: annotate data-races in cake_dump_stats()
From: Eric Dumazet @ 2026-04-10 18:22 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: Simon Horman, Jamal Hadi Salim, Jiri Pirko, netdev, eric.dumazet,
	Eric Dumazet, Toke Høiland-Jørgensen
In-Reply-To: <20260410182257.774311-1-edumazet@google.com>

cake_dump_stats() and cake_dump_class_stats() run without qdisc
spinlock being held.

Add READ_ONCE()/WRITE_ONCE() annotations.

Fixes: 046f6fd5daef ("sched: Add Common Applications Kept Enhanced (cake) qdisc")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: "Toke Høiland-Jørgensen" <toke@toke.dk>
---
 net/sched/sch_cake.c | 404 ++++++++++++++++++++++++-------------------
 1 file changed, 225 insertions(+), 179 deletions(-)

diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index 32e672820c00a88c6d8fe77a6308405e016525ea..f523f0aa4d830e9d3ec4d43bb123e1dc4f8f289d 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -399,14 +399,14 @@ static void cake_configure_rates(struct Qdisc *sch, u64 rate, bool rate_adjust);
  * Here, invsqrt is a fixed point number (< 1.0), 32bit mantissa, aka Q0.32
  */
 
-static void cobalt_newton_step(struct cobalt_vars *vars)
+static void cobalt_newton_step(struct cobalt_vars *vars, u32 count)
 {
 	u32 invsqrt, invsqrt2;
 	u64 val;
 
 	invsqrt = vars->rec_inv_sqrt;
 	invsqrt2 = ((u64)invsqrt * invsqrt) >> 32;
-	val = (3LL << 32) - ((u64)vars->count * invsqrt2);
+	val = (3LL << 32) - ((u64)count * invsqrt2);
 
 	val >>= 2; /* avoid overflow in following multiply */
 	val = (val * invsqrt) >> (32 - 2 + 1);
@@ -414,12 +414,12 @@ static void cobalt_newton_step(struct cobalt_vars *vars)
 	vars->rec_inv_sqrt = val;
 }
 
-static void cobalt_invsqrt(struct cobalt_vars *vars)
+static void cobalt_invsqrt(struct cobalt_vars *vars, u32 count)
 {
-	if (vars->count < REC_INV_SQRT_CACHE)
-		vars->rec_inv_sqrt = inv_sqrt_cache[vars->count];
+	if (count < REC_INV_SQRT_CACHE)
+		vars->rec_inv_sqrt = inv_sqrt_cache[count];
 	else
-		cobalt_newton_step(vars);
+		cobalt_newton_step(vars, count);
 }
 
 static void cobalt_vars_init(struct cobalt_vars *vars)
@@ -449,16 +449,19 @@ static bool cobalt_queue_full(struct cobalt_vars *vars,
 	bool up = false;
 
 	if (ktime_to_ns(ktime_sub(now, vars->blue_timer)) > p->target) {
-		up = !vars->p_drop;
-		vars->p_drop += p->p_inc;
-		if (vars->p_drop < p->p_inc)
-			vars->p_drop = ~0;
-		vars->blue_timer = now;
-	}
-	vars->dropping = true;
-	vars->drop_next = now;
+		u32 p_drop = vars->p_drop;
+
+		up = !p_drop;
+		p_drop += p->p_inc;
+		if (p_drop < p->p_inc)
+			p_drop = ~0;
+		WRITE_ONCE(vars->p_drop, p_drop);
+		WRITE_ONCE(vars->blue_timer, now);
+	}
+	WRITE_ONCE(vars->dropping, true);
+	WRITE_ONCE(vars->drop_next, now);
 	if (!vars->count)
-		vars->count = 1;
+		WRITE_ONCE(vars->count, 1);
 
 	return up;
 }
@@ -474,21 +477,25 @@ static bool cobalt_queue_empty(struct cobalt_vars *vars,
 
 	if (vars->p_drop &&
 	    ktime_to_ns(ktime_sub(now, vars->blue_timer)) > p->target) {
-		if (vars->p_drop < p->p_dec)
-			vars->p_drop = 0;
+		u32 p_drop = vars->p_drop;
+
+		if (p_drop < p->p_dec)
+			p_drop = 0;
 		else
-			vars->p_drop -= p->p_dec;
-		vars->blue_timer = now;
-		down = !vars->p_drop;
+			p_drop -= p->p_dec;
+		WRITE_ONCE(vars->p_drop, p_drop);
+		WRITE_ONCE(vars->blue_timer, now);
+		down = !p_drop;
 	}
-	vars->dropping = false;
+	WRITE_ONCE(vars->dropping, false);
 
 	if (vars->count && ktime_to_ns(ktime_sub(now, vars->drop_next)) >= 0) {
-		vars->count--;
-		cobalt_invsqrt(vars);
-		vars->drop_next = cobalt_control(vars->drop_next,
-						 p->interval,
-						 vars->rec_inv_sqrt);
+		WRITE_ONCE(vars->count, vars->count - 1);
+		cobalt_invsqrt(vars, vars->count);
+		WRITE_ONCE(vars->drop_next,
+			   cobalt_control(vars->drop_next,
+					  p->interval,
+					  vars->rec_inv_sqrt));
 	}
 
 	return down;
@@ -507,6 +514,7 @@ static enum qdisc_drop_reason cobalt_should_drop(struct cobalt_vars *vars,
 	bool next_due, over_target;
 	ktime_t schedule;
 	u64 sojourn;
+	u32 count;
 
 /* The 'schedule' variable records, in its sign, whether 'now' is before or
  * after 'drop_next'.  This allows 'drop_next' to be updated before the next
@@ -528,45 +536,50 @@ static enum qdisc_drop_reason cobalt_should_drop(struct cobalt_vars *vars,
 	over_target = sojourn > p->target &&
 		      sojourn > p->mtu_time * bulk_flows * 2 &&
 		      sojourn > p->mtu_time * 4;
-	next_due = vars->count && ktime_to_ns(schedule) >= 0;
+	count = vars->count;
+	next_due = count && ktime_to_ns(schedule) >= 0;
 
 	vars->ecn_marked = false;
 
 	if (over_target) {
 		if (!vars->dropping) {
-			vars->dropping = true;
-			vars->drop_next = cobalt_control(now,
-							 p->interval,
-							 vars->rec_inv_sqrt);
+			WRITE_ONCE(vars->dropping, true);
+			WRITE_ONCE(vars->drop_next,
+				   cobalt_control(now,
+						  p->interval,
+						  vars->rec_inv_sqrt));
 		}
-		if (!vars->count)
-			vars->count = 1;
+		if (!count)
+			count = 1;
 	} else if (vars->dropping) {
-		vars->dropping = false;
+		WRITE_ONCE(vars->dropping, false);
 	}
 
 	if (next_due && vars->dropping) {
 		/* Use ECN mark if possible, otherwise drop */
-		if (!(vars->ecn_marked = INET_ECN_set_ce(skb)))
+		vars->ecn_marked = INET_ECN_set_ce(skb);
+		if (!vars->ecn_marked)
 			reason = QDISC_DROP_CONGESTED;
 
-		vars->count++;
-		if (!vars->count)
-			vars->count--;
-		cobalt_invsqrt(vars);
-		vars->drop_next = cobalt_control(vars->drop_next,
-						 p->interval,
-						 vars->rec_inv_sqrt);
+		count++;
+		if (!count)
+			count--;
+		cobalt_invsqrt(vars, count);
+		WRITE_ONCE(vars->drop_next,
+			   cobalt_control(vars->drop_next,
+					  p->interval,
+					  vars->rec_inv_sqrt));
 		schedule = ktime_sub(now, vars->drop_next);
 	} else {
 		while (next_due) {
-			vars->count--;
-			cobalt_invsqrt(vars);
-			vars->drop_next = cobalt_control(vars->drop_next,
-							 p->interval,
-							 vars->rec_inv_sqrt);
+			count--;
+			cobalt_invsqrt(vars, count);
+			WRITE_ONCE(vars->drop_next,
+				   cobalt_control(vars->drop_next,
+						  p->interval,
+						  vars->rec_inv_sqrt));
 			schedule = ktime_sub(now, vars->drop_next);
-			next_due = vars->count && ktime_to_ns(schedule) >= 0;
+			next_due = count && ktime_to_ns(schedule) >= 0;
 		}
 	}
 
@@ -575,11 +588,12 @@ static enum qdisc_drop_reason cobalt_should_drop(struct cobalt_vars *vars,
 	    get_random_u32() < vars->p_drop)
 		reason = QDISC_DROP_FLOOD_PROTECTION;
 
+	WRITE_ONCE(vars->count, count);
 	/* Overload the drop_next field as an activity timeout */
-	if (!vars->count)
-		vars->drop_next = ktime_add_ns(now, p->interval);
+	if (count)
+		WRITE_ONCE(vars->drop_next, ktime_add_ns(now, p->interval));
 	else if (ktime_to_ns(schedule) > 0 && reason == QDISC_DROP_UNSPEC)
-		vars->drop_next = now;
+		WRITE_ONCE(vars->drop_next, now);
 
 	return reason;
 }
@@ -813,7 +827,7 @@ static u32 cake_hash(struct cake_tin_data *q, const struct sk_buff *skb,
 		     i++, k = (k + 1) % CAKE_SET_WAYS) {
 			if (q->tags[outer_hash + k] == flow_hash) {
 				if (i)
-					q->way_hits++;
+					WRITE_ONCE(q->way_hits, q->way_hits + 1);
 
 				if (!q->flows[outer_hash + k].set) {
 					/* need to increment host refcnts */
@@ -831,7 +845,7 @@ static u32 cake_hash(struct cake_tin_data *q, const struct sk_buff *skb,
 		for (i = 0; i < CAKE_SET_WAYS;
 			 i++, k = (k + 1) % CAKE_SET_WAYS) {
 			if (!q->flows[outer_hash + k].set) {
-				q->way_misses++;
+				WRITE_ONCE(q->way_misses, q->way_misses + 1);
 				allocate_src = cake_dsrc(flow_mode);
 				allocate_dst = cake_ddst(flow_mode);
 				goto found;
@@ -841,7 +855,7 @@ static u32 cake_hash(struct cake_tin_data *q, const struct sk_buff *skb,
 		/* With no empty queues, default to the original
 		 * queue, accept the collision, update the host tags.
 		 */
-		q->way_collisions++;
+		WRITE_ONCE(q->way_collisions, q->way_collisions + 1);
 		allocate_src = cake_dsrc(flow_mode);
 		allocate_dst = cake_ddst(flow_mode);
 
@@ -875,7 +889,8 @@ static u32 cake_hash(struct cake_tin_data *q, const struct sk_buff *skb,
 			q->flows[reduced_hash].srchost = srchost_idx;
 
 			if (q->flows[reduced_hash].set == CAKE_SET_BULK)
-				cake_inc_srchost_bulk_flow_count(q, &q->flows[reduced_hash], flow_mode);
+				cake_inc_srchost_bulk_flow_count(q, &q->flows[reduced_hash],
+								 flow_mode);
 		}
 
 		if (allocate_dst) {
@@ -899,7 +914,8 @@ static u32 cake_hash(struct cake_tin_data *q, const struct sk_buff *skb,
 			q->flows[reduced_hash].dsthost = dsthost_idx;
 
 			if (q->flows[reduced_hash].set == CAKE_SET_BULK)
-				cake_inc_dsthost_bulk_flow_count(q, &q->flows[reduced_hash], flow_mode);
+				cake_inc_dsthost_bulk_flow_count(q, &q->flows[reduced_hash],
+								 flow_mode);
 		}
 	}
 
@@ -1379,9 +1395,9 @@ static u32 cake_calc_overhead(struct cake_sched_data *qd, u32 len, u32 off)
 		len -= off;
 
 	if (qd->max_netlen < len)
-		qd->max_netlen = len;
+		WRITE_ONCE(qd->max_netlen, len);
 	if (qd->min_netlen > len)
-		qd->min_netlen = len;
+		WRITE_ONCE(qd->min_netlen, len);
 
 	len += q->rate_overhead;
 
@@ -1401,9 +1417,9 @@ static u32 cake_calc_overhead(struct cake_sched_data *qd, u32 len, u32 off)
 	}
 
 	if (qd->max_adjlen < len)
-		qd->max_adjlen = len;
+		WRITE_ONCE(qd->max_adjlen, len);
 	if (qd->min_adjlen > len)
-		qd->min_adjlen = len;
+		WRITE_ONCE(qd->min_adjlen, len);
 
 	return len;
 }
@@ -1416,7 +1432,7 @@ static u32 cake_overhead(struct cake_sched_data *q, const struct sk_buff *skb)
 	u16 segs = qdisc_pkt_segs(skb);
 	u32 len = qdisc_pkt_len(skb);
 
-	q->avg_netoff = cake_ewma(q->avg_netoff, off << 16, 8);
+	WRITE_ONCE(q->avg_netoff, cake_ewma(q->avg_netoff, off << 16, 8));
 
 	if (segs == 1)
 		return cake_calc_overhead(q, len, off);
@@ -1590,16 +1606,17 @@ static unsigned int cake_drop(struct Qdisc *sch, struct sk_buff **to_free)
 	}
 
 	if (cobalt_queue_full(&flow->cvars, &b->cparams, now))
-		b->unresponsive_flow_count++;
+		WRITE_ONCE(b->unresponsive_flow_count,
+			   b->unresponsive_flow_count + 1);
 
 	len = qdisc_pkt_len(skb);
 	q->buffer_used      -= skb->truesize;
-	b->backlogs[idx]    -= len;
-	b->tin_backlog      -= len;
+	WRITE_ONCE(b->backlogs[idx], b->backlogs[idx] - len);
+	WRITE_ONCE(b->tin_backlog, b->tin_backlog - len);
 	qstats_backlog_sub(sch, len);
 
-	flow->dropped++;
-	b->tin_dropped++;
+	WRITE_ONCE(flow->dropped, flow->dropped + 1);
+	WRITE_ONCE(b->tin_dropped, b->tin_dropped + 1);
 
 	if (q->config->rate_flags & CAKE_FLAG_INGRESS)
 		cake_advance_shaper(q, b, skb, now, true);
@@ -1795,7 +1812,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 	}
 
 	if (unlikely(len > b->max_skblen))
-		b->max_skblen = len;
+		WRITE_ONCE(b->max_skblen, len);
 
 	if (qdisc_pkt_segs(skb) > 1 && q->config->rate_flags & CAKE_FLAG_SPLIT_GSO) {
 		struct sk_buff *segs, *nskb;
@@ -1819,13 +1836,13 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 			numsegs++;
 			slen += segs->len;
 			q->buffer_used += segs->truesize;
-			b->packets++;
 		}
 
 		/* stats */
-		b->bytes	    += slen;
-		b->backlogs[idx]    += slen;
-		b->tin_backlog      += slen;
+		WRITE_ONCE(b->bytes, b->bytes + slen);
+		WRITE_ONCE(b->packets, b->packets + numsegs);
+		WRITE_ONCE(b->backlogs[idx], b->backlogs[idx] + slen);
+		WRITE_ONCE(b->tin_backlog, b->tin_backlog + slen);
 		qstats_backlog_add(sch, slen);
 		q->avg_window_bytes += slen;
 
@@ -1843,10 +1860,10 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 			ack = cake_ack_filter(q, flow);
 
 		if (ack) {
-			b->ack_drops++;
+			WRITE_ONCE(b->ack_drops, b->ack_drops + 1);
 			qdisc_qstats_drop(sch);
 			ack_pkt_len = qdisc_pkt_len(ack);
-			b->bytes += ack_pkt_len;
+			WRITE_ONCE(b->bytes, b->bytes + ack_pkt_len);
 			q->buffer_used += skb->truesize - ack->truesize;
 			if (q->config->rate_flags & CAKE_FLAG_INGRESS)
 				cake_advance_shaper(q, b, ack, now, true);
@@ -1859,10 +1876,10 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 		}
 
 		/* stats */
-		b->packets++;
-		b->bytes	    += len - ack_pkt_len;
-		b->backlogs[idx]    += len - ack_pkt_len;
-		b->tin_backlog      += len - ack_pkt_len;
+		WRITE_ONCE(b->packets, b->packets + 1);
+		WRITE_ONCE(b->bytes, b->bytes + len - ack_pkt_len);
+		WRITE_ONCE(b->backlogs[idx], b->backlogs[idx] + len - ack_pkt_len);
+		WRITE_ONCE(b->tin_backlog, b->tin_backlog + len - ack_pkt_len);
 		qstats_backlog_add(sch, len - ack_pkt_len);
 		q->avg_window_bytes += len - ack_pkt_len;
 	}
@@ -1894,9 +1911,9 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 			u64 b = q->avg_window_bytes * (u64)NSEC_PER_SEC;
 
 			b = div64_u64(b, window_interval);
-			q->avg_peak_bandwidth =
-				cake_ewma(q->avg_peak_bandwidth, b,
-					  b > q->avg_peak_bandwidth ? 2 : 8);
+			WRITE_ONCE(q->avg_peak_bandwidth,
+				   cake_ewma(q->avg_peak_bandwidth, b,
+					     b > q->avg_peak_bandwidth ? 2 : 8));
 			q->avg_window_bytes = 0;
 			q->avg_window_begin = now;
 
@@ -1917,27 +1934,30 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 		if (!flow->set) {
 			list_add_tail(&flow->flowchain, &b->new_flows);
 		} else {
-			b->decaying_flow_count--;
+			WRITE_ONCE(b->decaying_flow_count,
+				   b->decaying_flow_count - 1);
 			list_move_tail(&flow->flowchain, &b->new_flows);
 		}
 		flow->set = CAKE_SET_SPARSE;
-		b->sparse_flow_count++;
+		WRITE_ONCE(b->sparse_flow_count,
+			   b->sparse_flow_count + 1);
 
-		flow->deficit = cake_get_flow_quantum(b, flow, q->config->flow_mode);
+		WRITE_ONCE(flow->deficit,
+			   cake_get_flow_quantum(b, flow, q->config->flow_mode));
 	} else if (flow->set == CAKE_SET_SPARSE_WAIT) {
 		/* this flow was empty, accounted as a sparse flow, but actually
 		 * in the bulk rotation.
 		 */
 		flow->set = CAKE_SET_BULK;
-		b->sparse_flow_count--;
-		b->bulk_flow_count++;
+		WRITE_ONCE(b->sparse_flow_count, b->sparse_flow_count - 1);
+		WRITE_ONCE(b->bulk_flow_count, b->bulk_flow_count + 1);
 
 		cake_inc_srchost_bulk_flow_count(b, flow, q->config->flow_mode);
 		cake_inc_dsthost_bulk_flow_count(b, flow, q->config->flow_mode);
 	}
 
 	if (q->buffer_used > q->buffer_max_used)
-		q->buffer_max_used = q->buffer_used;
+		WRITE_ONCE(q->buffer_max_used, q->buffer_used);
 
 	if (q->buffer_used <= q->buffer_limit)
 		return NET_XMIT_SUCCESS;
@@ -1976,8 +1996,8 @@ static struct sk_buff *cake_dequeue_one(struct Qdisc *sch)
 	if (flow->head) {
 		skb = dequeue_head(flow);
 		len = qdisc_pkt_len(skb);
-		b->backlogs[q->cur_flow] -= len;
-		b->tin_backlog		 -= len;
+		WRITE_ONCE(b->backlogs[q->cur_flow], b->backlogs[q->cur_flow] - len);
+		WRITE_ONCE(b->tin_backlog, b->tin_backlog - len);
 		qstats_backlog_sub(sch, len);
 		q->buffer_used		 -= skb->truesize;
 		qdisc_qlen_dec(sch);
@@ -2042,7 +2062,7 @@ static struct sk_buff *cake_dequeue(struct Qdisc *sch)
 
 		cake_configure_rates(sch, new_rate, true);
 		q->last_checked_active = now;
-		q->active_queues = num_active_qs;
+		WRITE_ONCE(q->active_queues, num_active_qs);
 	}
 
 begin:
@@ -2149,8 +2169,10 @@ static struct sk_buff *cake_dequeue(struct Qdisc *sch)
 		 */
 		if (flow->set == CAKE_SET_SPARSE) {
 			if (flow->head) {
-				b->sparse_flow_count--;
-				b->bulk_flow_count++;
+				WRITE_ONCE(b->sparse_flow_count,
+					   b->sparse_flow_count - 1);
+				WRITE_ONCE(b->bulk_flow_count,
+					   b->bulk_flow_count + 1);
 
 				cake_inc_srchost_bulk_flow_count(b, flow, q->config->flow_mode);
 				cake_inc_dsthost_bulk_flow_count(b, flow, q->config->flow_mode);
@@ -2165,7 +2187,8 @@ static struct sk_buff *cake_dequeue(struct Qdisc *sch)
 			}
 		}
 
-		flow->deficit += cake_get_flow_quantum(b, flow, q->config->flow_mode);
+		WRITE_ONCE(flow->deficit,
+			   flow->deficit + cake_get_flow_quantum(b, flow, q->config->flow_mode));
 		list_move_tail(&flow->flowchain, &b->old_flows);
 
 		goto retry;
@@ -2177,7 +2200,8 @@ static struct sk_buff *cake_dequeue(struct Qdisc *sch)
 		if (!skb) {
 			/* this queue was actually empty */
 			if (cobalt_queue_empty(&flow->cvars, &b->cparams, now))
-				b->unresponsive_flow_count--;
+				WRITE_ONCE(b->unresponsive_flow_count,
+					   b->unresponsive_flow_count - 1);
 
 			if (flow->cvars.p_drop || flow->cvars.count ||
 			    ktime_before(now, flow->cvars.drop_next)) {
@@ -2187,16 +2211,22 @@ static struct sk_buff *cake_dequeue(struct Qdisc *sch)
 				list_move_tail(&flow->flowchain,
 					       &b->decaying_flows);
 				if (flow->set == CAKE_SET_BULK) {
-					b->bulk_flow_count--;
+					WRITE_ONCE(b->bulk_flow_count,
+						   b->bulk_flow_count - 1);
 
-					cake_dec_srchost_bulk_flow_count(b, flow, q->config->flow_mode);
-					cake_dec_dsthost_bulk_flow_count(b, flow, q->config->flow_mode);
+					cake_dec_srchost_bulk_flow_count(b, flow,
+									 q->config->flow_mode);
+					cake_dec_dsthost_bulk_flow_count(b, flow,
+									 q->config->flow_mode);
 
-					b->decaying_flow_count++;
+					WRITE_ONCE(b->decaying_flow_count,
+						   b->decaying_flow_count + 1);
 				} else if (flow->set == CAKE_SET_SPARSE ||
 					   flow->set == CAKE_SET_SPARSE_WAIT) {
-					b->sparse_flow_count--;
-					b->decaying_flow_count++;
+					WRITE_ONCE(b->sparse_flow_count,
+						   b->sparse_flow_count - 1);
+					WRITE_ONCE(b->decaying_flow_count,
+						   b->decaying_flow_count + 1);
 				}
 				flow->set = CAKE_SET_DECAYING;
 			} else {
@@ -2204,14 +2234,20 @@ static struct sk_buff *cake_dequeue(struct Qdisc *sch)
 				list_del_init(&flow->flowchain);
 				if (flow->set == CAKE_SET_SPARSE ||
 				    flow->set == CAKE_SET_SPARSE_WAIT)
-					b->sparse_flow_count--;
+					WRITE_ONCE(b->sparse_flow_count,
+						   b->sparse_flow_count - 1);
 				else if (flow->set == CAKE_SET_BULK) {
-					b->bulk_flow_count--;
+					WRITE_ONCE(b->bulk_flow_count,
+						   b->bulk_flow_count - 1);
 
-					cake_dec_srchost_bulk_flow_count(b, flow, q->config->flow_mode);
-					cake_dec_dsthost_bulk_flow_count(b, flow, q->config->flow_mode);
-				} else
-					b->decaying_flow_count--;
+					cake_dec_srchost_bulk_flow_count(b, flow,
+									 q->config->flow_mode);
+					cake_dec_dsthost_bulk_flow_count(b, flow,
+									 q->config->flow_mode);
+				} else {
+					WRITE_ONCE(b->decaying_flow_count,
+						   b->decaying_flow_count - 1);
+				}
 
 				flow->set = CAKE_SET_NONE;
 			}
@@ -2230,11 +2266,11 @@ static struct sk_buff *cake_dequeue(struct Qdisc *sch)
 		if (q->config->rate_flags & CAKE_FLAG_INGRESS) {
 			len = cake_advance_shaper(q, b, skb,
 						  now, true);
-			flow->deficit -= len;
+			WRITE_ONCE(flow->deficit, flow->deficit - len);
 			b->tin_deficit -= len;
 		}
-		flow->dropped++;
-		b->tin_dropped++;
+		WRITE_ONCE(flow->dropped, flow->dropped + 1);
+		WRITE_ONCE(b->tin_dropped, b->tin_dropped + 1);
 		qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb));
 		qdisc_qstats_drop(sch);
 		qdisc_dequeue_drop(sch, skb, reason);
@@ -2242,20 +2278,22 @@ static struct sk_buff *cake_dequeue(struct Qdisc *sch)
 			goto retry;
 	}
 
-	b->tin_ecn_mark += !!flow->cvars.ecn_marked;
+	WRITE_ONCE(b->tin_ecn_mark, b->tin_ecn_mark + !!flow->cvars.ecn_marked);
 	qdisc_bstats_update(sch, skb);
 	WRITE_ONCE(q->last_active, now);
 
 	/* collect delay stats */
 	delay = ktime_to_ns(ktime_sub(now, cobalt_get_enqueue_time(skb)));
-	b->avge_delay = cake_ewma(b->avge_delay, delay, 8);
-	b->peak_delay = cake_ewma(b->peak_delay, delay,
-				  delay > b->peak_delay ? 2 : 8);
-	b->base_delay = cake_ewma(b->base_delay, delay,
-				  delay < b->base_delay ? 2 : 8);
+	WRITE_ONCE(b->avge_delay, cake_ewma(b->avge_delay, delay, 8));
+	WRITE_ONCE(b->peak_delay,
+		   cake_ewma(b->peak_delay, delay,
+			     delay > b->peak_delay ? 2 : 8));
+	WRITE_ONCE(b->base_delay,
+		   cake_ewma(b->base_delay, delay,
+			     delay < b->base_delay ? 2 : 8));
 
 	len = cake_advance_shaper(q, b, skb, now, false);
-	flow->deficit -= len;
+	WRITE_ONCE(flow->deficit, flow->deficit - len);
 	b->tin_deficit -= len;
 
 	if (ktime_after(q->time_next_packet, now) && sch->q.qlen) {
@@ -2329,9 +2367,8 @@ static void cake_set_rate(struct cake_tin_data *b, u64 rate, u32 mtu,
 	u8  rate_shft = 0;
 	u64 rate_ns = 0;
 
-	b->flow_quantum = 1514;
 	if (rate) {
-		b->flow_quantum = max(min(rate >> 12, 1514ULL), 300ULL);
+		WRITE_ONCE(b->flow_quantum, max(min(rate >> 12, 1514ULL), 300ULL));
 		rate_shft = 34;
 		rate_ns = ((u64)NSEC_PER_SEC) << rate_shft;
 		rate_ns = div64_u64(rate_ns, max(MIN_RATE, rate));
@@ -2339,9 +2376,11 @@ static void cake_set_rate(struct cake_tin_data *b, u64 rate, u32 mtu,
 			rate_ns >>= 1;
 			rate_shft--;
 		}
-	} /* else unlimited, ie. zero delay */
-
-	b->tin_rate_bps  = rate;
+	} else {
+		/* else unlimited, ie. zero delay */
+		WRITE_ONCE(b->flow_quantum, 1514);
+	}
+	WRITE_ONCE(b->tin_rate_bps, rate);
 	b->tin_rate_ns   = rate_ns;
 	b->tin_rate_shft = rate_shft;
 
@@ -2350,10 +2389,11 @@ static void cake_set_rate(struct cake_tin_data *b, u64 rate, u32 mtu,
 
 	byte_target_ns = (byte_target * rate_ns) >> rate_shft;
 
-	b->cparams.target = max((byte_target_ns * 3) / 2, target_ns);
-	b->cparams.interval = max(rtt_est_ns +
-				     b->cparams.target - target_ns,
-				     b->cparams.target * 2);
+	WRITE_ONCE(b->cparams.target,
+		   max((byte_target_ns * 3) / 2, target_ns));
+	WRITE_ONCE(b->cparams.interval,
+		   max(rtt_est_ns + b->cparams.target - target_ns,
+		       b->cparams.target * 2));
 	b->cparams.mtu_time = byte_target_ns;
 	b->cparams.p_inc = 1 << 24; /* 1/256 */
 	b->cparams.p_dec = 1 << 20; /* 1/4096 */
@@ -2611,25 +2651,27 @@ static void cake_reconfigure(struct Qdisc *sch)
 {
 	struct cake_sched_data *qd = qdisc_priv(sch);
 	struct cake_sched_config *q = qd->config;
+	u32 buffer_limit;
 
 	cake_configure_rates(sch, qd->config->rate_bps, false);
 
 	if (q->buffer_config_limit) {
-		qd->buffer_limit = q->buffer_config_limit;
+		buffer_limit = q->buffer_config_limit;
 	} else if (q->rate_bps) {
 		u64 t = q->rate_bps * q->interval;
 
 		do_div(t, USEC_PER_SEC / 4);
-		qd->buffer_limit = max_t(u32, t, 4U << 20);
+		buffer_limit = max_t(u32, t, 4U << 20);
 	} else {
-		qd->buffer_limit = ~0;
+		buffer_limit = ~0;
 	}
 
 	sch->flags &= ~TCQ_F_CAN_BYPASS;
 
-	qd->buffer_limit = min(qd->buffer_limit,
-			       max(sch->limit * psched_mtu(qdisc_dev(sch)),
-				   q->buffer_config_limit));
+	WRITE_ONCE(qd->buffer_limit,
+		   min(buffer_limit,
+		       max(sch->limit * psched_mtu(qdisc_dev(sch)),
+			   q->buffer_config_limit)));
 }
 
 static int cake_config_change(struct cake_sched_config *q, struct nlattr *opt,
@@ -2774,10 +2816,10 @@ static int cake_change(struct Qdisc *sch, struct nlattr *opt,
 		return ret;
 
 	if (overhead_changed) {
-		qd->max_netlen = 0;
-		qd->max_adjlen = 0;
-		qd->min_netlen = ~0;
-		qd->min_adjlen = ~0;
+		WRITE_ONCE(qd->max_netlen, 0);
+		WRITE_ONCE(qd->max_adjlen, 0);
+		WRITE_ONCE(qd->min_netlen, ~0);
+		WRITE_ONCE(qd->min_adjlen, ~0);
 	}
 
 	if (qd->tins) {
@@ -2995,15 +3037,15 @@ static int cake_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
 			goto nla_put_failure;			       \
 	} while (0)
 
-	PUT_STAT_U64(CAPACITY_ESTIMATE64, q->avg_peak_bandwidth);
-	PUT_STAT_U32(MEMORY_LIMIT, q->buffer_limit);
-	PUT_STAT_U32(MEMORY_USED, q->buffer_max_used);
-	PUT_STAT_U32(AVG_NETOFF, ((q->avg_netoff + 0x8000) >> 16));
-	PUT_STAT_U32(MAX_NETLEN, q->max_netlen);
-	PUT_STAT_U32(MAX_ADJLEN, q->max_adjlen);
-	PUT_STAT_U32(MIN_NETLEN, q->min_netlen);
-	PUT_STAT_U32(MIN_ADJLEN, q->min_adjlen);
-	PUT_STAT_U32(ACTIVE_QUEUES, q->active_queues);
+	PUT_STAT_U64(CAPACITY_ESTIMATE64, READ_ONCE(q->avg_peak_bandwidth));
+	PUT_STAT_U32(MEMORY_LIMIT, READ_ONCE(q->buffer_limit));
+	PUT_STAT_U32(MEMORY_USED, READ_ONCE(q->buffer_max_used));
+	PUT_STAT_U32(AVG_NETOFF, ((READ_ONCE(q->avg_netoff) + 0x8000) >> 16));
+	PUT_STAT_U32(MAX_NETLEN, READ_ONCE(q->max_netlen));
+	PUT_STAT_U32(MAX_ADJLEN, READ_ONCE(q->max_adjlen));
+	PUT_STAT_U32(MIN_NETLEN, READ_ONCE(q->min_netlen));
+	PUT_STAT_U32(MIN_ADJLEN, READ_ONCE(q->min_adjlen));
+	PUT_STAT_U32(ACTIVE_QUEUES, READ_ONCE(q->active_queues));
 
 #undef PUT_STAT_U32
 #undef PUT_STAT_U64
@@ -3029,38 +3071,38 @@ static int cake_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
 		if (!ts)
 			goto nla_put_failure;
 
-		PUT_TSTAT_U64(THRESHOLD_RATE64, b->tin_rate_bps);
-		PUT_TSTAT_U64(SENT_BYTES64, b->bytes);
-		PUT_TSTAT_U32(BACKLOG_BYTES, b->tin_backlog);
+		PUT_TSTAT_U64(THRESHOLD_RATE64, READ_ONCE(b->tin_rate_bps));
+		PUT_TSTAT_U64(SENT_BYTES64, READ_ONCE(b->bytes));
+		PUT_TSTAT_U32(BACKLOG_BYTES, READ_ONCE(b->tin_backlog));
 
 		PUT_TSTAT_U32(TARGET_US,
-			      ktime_to_us(ns_to_ktime(b->cparams.target)));
+			ktime_to_us(ns_to_ktime(READ_ONCE(b->cparams.target))));
 		PUT_TSTAT_U32(INTERVAL_US,
-			      ktime_to_us(ns_to_ktime(b->cparams.interval)));
+			ktime_to_us(ns_to_ktime(READ_ONCE(b->cparams.interval))));
 
-		PUT_TSTAT_U32(SENT_PACKETS, b->packets);
-		PUT_TSTAT_U32(DROPPED_PACKETS, b->tin_dropped);
-		PUT_TSTAT_U32(ECN_MARKED_PACKETS, b->tin_ecn_mark);
-		PUT_TSTAT_U32(ACKS_DROPPED_PACKETS, b->ack_drops);
+		PUT_TSTAT_U32(SENT_PACKETS, READ_ONCE(b->packets));
+		PUT_TSTAT_U32(DROPPED_PACKETS, READ_ONCE(b->tin_dropped));
+		PUT_TSTAT_U32(ECN_MARKED_PACKETS, READ_ONCE(b->tin_ecn_mark));
+		PUT_TSTAT_U32(ACKS_DROPPED_PACKETS, READ_ONCE(b->ack_drops));
 
 		PUT_TSTAT_U32(PEAK_DELAY_US,
-			      ktime_to_us(ns_to_ktime(b->peak_delay)));
+			      ktime_to_us(ns_to_ktime(READ_ONCE(b->peak_delay))));
 		PUT_TSTAT_U32(AVG_DELAY_US,
-			      ktime_to_us(ns_to_ktime(b->avge_delay)));
+			      ktime_to_us(ns_to_ktime(READ_ONCE(b->avge_delay))));
 		PUT_TSTAT_U32(BASE_DELAY_US,
-			      ktime_to_us(ns_to_ktime(b->base_delay)));
+			      ktime_to_us(ns_to_ktime(READ_ONCE(b->base_delay))));
 
-		PUT_TSTAT_U32(WAY_INDIRECT_HITS, b->way_hits);
-		PUT_TSTAT_U32(WAY_MISSES, b->way_misses);
-		PUT_TSTAT_U32(WAY_COLLISIONS, b->way_collisions);
+		PUT_TSTAT_U32(WAY_INDIRECT_HITS, READ_ONCE(b->way_hits));
+		PUT_TSTAT_U32(WAY_MISSES, READ_ONCE(b->way_misses));
+		PUT_TSTAT_U32(WAY_COLLISIONS, READ_ONCE(b->way_collisions));
 
-		PUT_TSTAT_U32(SPARSE_FLOWS, b->sparse_flow_count +
-					    b->decaying_flow_count);
-		PUT_TSTAT_U32(BULK_FLOWS, b->bulk_flow_count);
-		PUT_TSTAT_U32(UNRESPONSIVE_FLOWS, b->unresponsive_flow_count);
-		PUT_TSTAT_U32(MAX_SKBLEN, b->max_skblen);
+		PUT_TSTAT_U32(SPARSE_FLOWS, READ_ONCE(b->sparse_flow_count) +
+					    READ_ONCE(b->decaying_flow_count));
+		PUT_TSTAT_U32(BULK_FLOWS, READ_ONCE(b->bulk_flow_count));
+		PUT_TSTAT_U32(UNRESPONSIVE_FLOWS, READ_ONCE(b->unresponsive_flow_count));
+		PUT_TSTAT_U32(MAX_SKBLEN, READ_ONCE(b->max_skblen));
 
-		PUT_TSTAT_U32(FLOW_QUANTUM, b->flow_quantum);
+		PUT_TSTAT_U32(FLOW_QUANTUM, READ_ONCE(b->flow_quantum));
 		nla_nest_end(d->skb, ts);
 	}
 
@@ -3128,7 +3170,7 @@ static int cake_dump_class_stats(struct Qdisc *sch, unsigned long cl,
 
 		flow = &b->flows[idx % CAKE_QUEUES];
 
-		if (flow->head) {
+		if (READ_ONCE(flow->head)) {
 			sch_tree_lock(sch);
 			skb = flow->head;
 			while (skb) {
@@ -3137,13 +3179,15 @@ static int cake_dump_class_stats(struct Qdisc *sch, unsigned long cl,
 			}
 			sch_tree_unlock(sch);
 		}
-		qs.backlog = b->backlogs[idx % CAKE_QUEUES];
-		qs.drops = flow->dropped;
+		qs.backlog = READ_ONCE(b->backlogs[idx % CAKE_QUEUES]);
+		qs.drops = READ_ONCE(flow->dropped);
 	}
 	if (gnet_stats_copy_queue(d, NULL, &qs, qs.qlen) < 0)
 		return -1;
 	if (flow) {
 		ktime_t now = ktime_get();
+		bool dropping;
+		u32 p_drop;
 
 		stats = nla_nest_start_noflag(d->skb, TCA_STATS_APP);
 		if (!stats)
@@ -3158,21 +3202,23 @@ static int cake_dump_class_stats(struct Qdisc *sch, unsigned long cl,
 			goto nla_put_failure;			       \
 	} while (0)
 
-		PUT_STAT_S32(DEFICIT, flow->deficit);
-		PUT_STAT_U32(DROPPING, flow->cvars.dropping);
-		PUT_STAT_U32(COBALT_COUNT, flow->cvars.count);
-		PUT_STAT_U32(P_DROP, flow->cvars.p_drop);
-		if (flow->cvars.p_drop) {
+		PUT_STAT_S32(DEFICIT, READ_ONCE(flow->deficit));
+		dropping = READ_ONCE(flow->cvars.dropping);
+		PUT_STAT_U32(DROPPING, dropping);
+		PUT_STAT_U32(COBALT_COUNT, READ_ONCE(flow->cvars.count));
+		p_drop = READ_ONCE(flow->cvars.p_drop);
+		PUT_STAT_U32(P_DROP, p_drop);
+		if (p_drop) {
 			PUT_STAT_S32(BLUE_TIMER_US,
 				     ktime_to_us(
 					     ktime_sub(now,
-						       flow->cvars.blue_timer)));
+						       READ_ONCE(flow->cvars.blue_timer))));
 		}
-		if (flow->cvars.dropping) {
+		if (dropping) {
 			PUT_STAT_S32(DROP_NEXT_US,
 				     ktime_to_us(
 					     ktime_sub(now,
-						       flow->cvars.drop_next)));
+						       READ_ONCE(flow->cvars.drop_next))));
 		}
 
 		if (nla_nest_end(d->skb, stats) < 0)
@@ -3298,10 +3344,10 @@ static int cake_mq_change(struct Qdisc *sch, struct nlattr *opt,
 		struct cake_sched_data *qd = qdisc_priv(chld);
 
 		if (overhead_changed) {
-			qd->max_netlen = 0;
-			qd->max_adjlen = 0;
-			qd->min_netlen = ~0;
-			qd->min_adjlen = ~0;
+			WRITE_ONCE(qd->max_netlen, 0);
+			WRITE_ONCE(qd->max_adjlen, 0);
+			WRITE_ONCE(qd->min_netlen, ~0);
+			WRITE_ONCE(qd->min_adjlen, ~0);
 		}
 
 		if (qd->tins) {
-- 
2.53.0.1213.gd9a14994de-goog


^ permalink raw reply related

* Re: [ovs-dev] [PATCH net-next v2] net: openvswitch: decouple flow_table from ovs_mutex
From: Aaron Conole @ 2026-04-10 18:52 UTC (permalink / raw)
  To: Adrian Moreno via dev
  Cc: netdev, Adrian Moreno, open list:OPENVSWITCH, Paolo Abeni,
	open list, Ilya Maximets, Eric Dumazet, Simon Horman,
	Jakub Kicinski, David S. Miller
In-Reply-To: <20260407120418.356718-1-amorenoz@redhat.com>

Hi Adrian,

Thanks for the patch.  A few questions inline.

Adrian Moreno via dev <ovs-dev@openvswitch.org> writes:

> Currently the entire ovs module is write-protected using the global
> ovs_mutex. While this simple approach works fine for control-plane
> operations (such as vport configurations), requiring the global mutex
> for flow modifications can be problematic.
>
> During periods of high control-plane operations, e.g: netdevs (vports)
> coming and going, RTNL can suffer contention. This contention is easily
> transferred to the ovs_mutex as RTNL nests inside ovs_mutex. Flow
> modifications, however, are done as part of packet processing and having
> them wait for RTNL pressure to go away can lead to packet drops.
>
> This patch decouples flow_table modifications from ovs_mutex by means of
> the following:
>
> 1 - Make flow_table an rcu-protected pointer inside the datapath.
> This allows both objects to be protected independently while reducing the
> amount of changes required in "flow_table.c".
>
> 2 - Create a new mutex inside the flow_table that protects it from
> concurrent modifications.
> Putting the mutex inside flow_table makes it easier to consume for
> functions inside flow_table.c that do not currently take pointers to the
> datapath.
> Some function signatures need to be changed to accept flow_table so that
> lockdep checks can be performed.
>
> 3 - Create a reference count to temporarily extend rcu protection from
> the datapath to the flow_table.
> In order to use the flow_table without locking ovs_mutex, the flow_table
> pointer must be first dereferenced within an rcu-protected region.
> Next, the table->mutex needs to be locked to protect it from
> concurrent writes but mutexes must not be locked inside an rcu-protected
> region, so the rcu-protected region must be left at which point the
> datapath can be concurrently freed.
> To extend the protection beyond the rcu region, a reference count is used.
> One reference is held by the datapath, the other is temporarily
> increased during flow modifications. For example:
>
> Datapath deletion:
>
>   ovs_lock();
>   table = rcu_dereference_protected(dp->table, ...);
>   rcu_assign_pointer(dp->table, NULL);
>   ovs_flow_tbl_put(table);
>   ovs_unlock();

I guess it's possible now to have flow operations succeed on
'removed-but-not-yet-freed' tables.  That's probably worth documenting
somewhere, since it is a slight behavior change.  More below

> Flow modification:
>
>   rcu_read_lock();
>   dp = get_dp(...);
>   table = rcu_dereference(dp->table);
>   ovs_flow_tbl_get(table);
>   rcu_read_unlock();
>
>   mutex_lock(&table->lock);
>   /* Perform modifications on the flow_table */
>   mutex_unlock(&table->lock);
>   ovs_flow_tbl_put(table);
>
> Signed-off-by: Adrian Moreno <amorenoz@redhat.com>
> ---
> v2: Fix argument in ovs_flow_tbl_put (sparse)
>     Remove rcu checks in ovs_dp_masks_rebalance
> ---
>  net/openvswitch/datapath.c   | 285 ++++++++++++++++++++++++-----------
>  net/openvswitch/datapath.h   |   2 +-
>  net/openvswitch/flow.c       |  13 +-
>  net/openvswitch/flow.h       |   9 +-
>  net/openvswitch/flow_table.c | 180 ++++++++++++++--------
>  net/openvswitch/flow_table.h |  51 ++++++-
>  6 files changed, 380 insertions(+), 160 deletions(-)
>
> diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
> index e209099218b4..9c234993520c 100644
> --- a/net/openvswitch/datapath.c
> +++ b/net/openvswitch/datapath.c
> @@ -88,13 +88,17 @@ static void ovs_notify(struct genl_family *family,
>   * DOC: Locking:
>   *
>   * All writes e.g. Writes to device state (add/remove datapath, port, set
> - * operations on vports, etc.), Writes to other state (flow table
> - * modifications, set miscellaneous datapath parameters, etc.) are protected
> - * by ovs_lock.
> + * operations on vports, etc.) and writes to other datapath parameters
> + * are protected by ovs_lock.
> + *
> + * Writes to the flow table are NOT protected by ovs_lock. Instead, a per-table
> + * mutex and reference count are used (see comment above "struct flow_table"
> + * definition). On some few occasions, the per-flow table mutex is nested
> + * inside ovs_mutex.
>   *
>   * Reads are protected by RCU.
>   *
> - * There are a few special cases (mostly stats) that have their own
> + * There are a few other special cases (mostly stats) that have their own
>   * synchronization but they nest under all of above and don't interact with
>   * each other.
>   *
> @@ -166,7 +170,6 @@ static void destroy_dp_rcu(struct rcu_head *rcu)
>  {
>  	struct datapath *dp = container_of(rcu, struct datapath, rcu);
>  
> -	ovs_flow_tbl_destroy(&dp->table);
>  	free_percpu(dp->stats_percpu);
>  	kfree(dp->ports);
>  	ovs_meters_exit(dp);
> @@ -247,6 +250,7 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
>  	struct ovs_pcpu_storage *ovs_pcpu = this_cpu_ptr(ovs_pcpu_storage);
>  	const struct vport *p = OVS_CB(skb)->input_vport;
>  	struct datapath *dp = p->dp;
> +	struct flow_table *table;
>  	struct sw_flow *flow;
>  	struct sw_flow_actions *sf_acts;
>  	struct dp_stats_percpu *stats;
> @@ -257,9 +261,16 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
>  	int error;
>  
>  	stats = this_cpu_ptr(dp->stats_percpu);
> +	table = rcu_dereference(dp->table);
> +	if (!table) {
> +		net_dbg_ratelimited("ovs: no flow table on datapath %s\n",
> +				    ovs_dp_name(dp));
> +		kfree_skb(skb);
> +		return;
> +	}
>  
>  	/* Look up flow. */
> -	flow = ovs_flow_tbl_lookup_stats(&dp->table, key, skb_get_hash(skb),
> +	flow = ovs_flow_tbl_lookup_stats(table, key, skb_get_hash(skb),
>  					 &n_mask_hit, &n_cache_hit);
>  	if (unlikely(!flow)) {
>  		struct dp_upcall_info upcall;
> @@ -752,12 +763,16 @@ static struct genl_family dp_packet_genl_family __ro_after_init = {
>  static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats,
>  			 struct ovs_dp_megaflow_stats *mega_stats)
>  {
> +	struct flow_table *table = ovsl_dereference(dp->table);
>  	int i;
>  
>  	memset(mega_stats, 0, sizeof(*mega_stats));
>  
> -	stats->n_flows = ovs_flow_tbl_count(&dp->table);
> -	mega_stats->n_masks = ovs_flow_tbl_num_masks(&dp->table);
> +	if (table) {
> +		stats->n_flows = ovs_flow_tbl_count(table);

Previously, when calling this we'd be under the ovs_mutex and the read
on table->count would be somewhat coherent (for some definition of
coherent).  BUT we are now doing a bare read.  I'm not sure if we should
take the lock here, or at least give some kind of barrier (READ_ONCE and
update the count setting sites with WRITE_ONCEs)?  WDYT?

> +		mega_stats->n_masks = ovs_flow_tbl_num_masks(table);
> +	}
> +
>  
>  	stats->n_hit = stats->n_missed = stats->n_lost = 0;
>  
> @@ -829,15 +844,16 @@ static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts,
>  		+ nla_total_size_64bit(8); /* OVS_FLOW_ATTR_USED */
>  }
>  
> -/* Called with ovs_mutex or RCU read lock. */
> +/* Called with table->lock or RCU read lock. */
>  static int ovs_flow_cmd_fill_stats(const struct sw_flow *flow,
> +				   const struct flow_table *table,
>  				   struct sk_buff *skb)
>  {
>  	struct ovs_flow_stats stats;
>  	__be16 tcp_flags;
>  	unsigned long used;
>  
> -	ovs_flow_stats_get(flow, &stats, &used, &tcp_flags);
> +	ovs_flow_stats_get(flow, table, &stats, &used, &tcp_flags);
>  
>  	if (used &&
>  	    nla_put_u64_64bit(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used),
> @@ -857,8 +873,9 @@ static int ovs_flow_cmd_fill_stats(const struct sw_flow *flow,
>  	return 0;
>  }
>  
> -/* Called with ovs_mutex or RCU read lock. */
> +/* Called with RCU read lock or table->lock held. */
>  static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow,
> +				     const struct flow_table *table,
>  				     struct sk_buff *skb, int skb_orig_len)
>  {
>  	struct nlattr *start;
> @@ -878,7 +895,7 @@ static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow,
>  	if (start) {
>  		const struct sw_flow_actions *sf_acts;
>  
> -		sf_acts = rcu_dereference_ovsl(flow->sf_acts);
> +		sf_acts = rcu_dereference_ovs_tbl(flow->sf_acts, table);
>  		err = ovs_nla_put_actions(sf_acts->actions,
>  					  sf_acts->actions_len, skb);
>  
> @@ -897,8 +914,10 @@ static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow,
>  	return 0;
>  }
>  
> -/* Called with ovs_mutex or RCU read lock. */
> -static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
> +/* Called with table->lock or RCU read lock. */
> +static int ovs_flow_cmd_fill_info(const struct sw_flow *flow,
> +				  const struct flow_table *table,
> +				  int dp_ifindex,
>  				  struct sk_buff *skb, u32 portid,
>  				  u32 seq, u32 flags, u8 cmd, u32 ufid_flags)
>  {
> @@ -929,12 +948,12 @@ static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
>  			goto error;
>  	}
>  
> -	err = ovs_flow_cmd_fill_stats(flow, skb);
> +	err = ovs_flow_cmd_fill_stats(flow, table, skb);
>  	if (err)
>  		goto error;
>  
>  	if (should_fill_actions(ufid_flags)) {
> -		err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len);
> +		err = ovs_flow_cmd_fill_actions(flow, table, skb, skb_orig_len);
>  		if (err)
>  			goto error;
>  	}
> @@ -968,8 +987,9 @@ static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *act
>  	return skb;
>  }
>  
> -/* Called with ovs_mutex. */
> +/* Called with table->lock. */
>  static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow,
> +					       const struct flow_table *table,
>  					       int dp_ifindex,
>  					       struct genl_info *info, u8 cmd,
>  					       bool always, u32 ufid_flags)
> @@ -977,12 +997,12 @@ static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow,
>  	struct sk_buff *skb;
>  	int retval;
>  
> -	skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts),
> +	skb = ovs_flow_cmd_alloc_info(ovs_tbl_dereference(flow->sf_acts, table),
>  				      &flow->id, info, always, ufid_flags);
>  	if (IS_ERR_OR_NULL(skb))
>  		return skb;
>  
> -	retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb,
> +	retval = ovs_flow_cmd_fill_info(flow, table, dp_ifindex, skb,
>  					info->snd_portid, info->snd_seq, 0,
>  					cmd, ufid_flags);
>  	if (WARN_ON_ONCE(retval < 0)) {
> @@ -998,6 +1018,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
>  	struct nlattr **a = info->attrs;
>  	struct ovs_header *ovs_header = genl_info_userhdr(info);
>  	struct sw_flow *flow = NULL, *new_flow;
> +	struct flow_table *table;
>  	struct sw_flow_mask mask;
>  	struct sk_buff *reply;
>  	struct datapath *dp;
> @@ -1064,30 +1085,43 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
>  		goto err_kfree_acts;
>  	}
>  

I think this can lead to a weird(?) behavior:

thread A (dp_destroy):                   thread b (ovs_flow_cmd_new):
rcu_assign_pointer(dp->table, NULL)
                                         rcu_read_lock();
                                         table =
                                         rcu_dereference(dp->table);
                                           [old table]
                                         ovs_flow_tbl_get(table)
                                             //refcnt change
                                         rcu_read_unlock()
ovs_flow_tbl_put(table) // refcnt chg
                                         mutex_lock(table->lock)
                                         ovs_flow_table_insert(...)
                                         [success reply]
                                         mutex_unlock(table->lock)
                                         ovs_flow_tbl_put(table)
                                         // table flow flush, etc.

I guess it isn't a huge deal (installing flow while deleting table would
be weird from a userspace perspective), and I think it is safe, but it
is worth mentioning that we can have such scenario now.

> -	ovs_lock();
> +	rcu_read_lock();
>  	dp = get_dp(net, ovs_header->dp_ifindex);
>  	if (unlikely(!dp)) {
>  		error = -ENODEV;
> -		goto err_unlock_ovs;
> +		rcu_read_unlock();
> +		goto err_kfree_reply;
>  	}
> +	table = rcu_dereference(dp->table);
> +	if (!table || !ovs_flow_tbl_get(table)) {
> +		error = -ENODEV;
> +		rcu_read_unlock();
> +		goto err_kfree_reply;
> +	}
> +	rcu_read_unlock();
> +
> +	/* It is safe to dereference "table" after leaving rcu read-protected
> +	 * region because it's pinned by refcount.
> +	 */
> +	mutex_lock(&table->lock);
>  
>  	/* Check if this is a duplicate flow */
>  	if (ovs_identifier_is_ufid(&new_flow->id))
> -		flow = ovs_flow_tbl_lookup_ufid(&dp->table, &new_flow->id);
> +		flow = ovs_flow_tbl_lookup_ufid(table, &new_flow->id);
>  	if (!flow)
> -		flow = ovs_flow_tbl_lookup(&dp->table, key);
> +		flow = ovs_flow_tbl_lookup(table, key);
>  	if (likely(!flow)) {
>  		rcu_assign_pointer(new_flow->sf_acts, acts);
>  
>  		/* Put flow in bucket. */
> -		error = ovs_flow_tbl_insert(&dp->table, new_flow, &mask);
> +		error = ovs_flow_tbl_insert(table, new_flow, &mask);
>  		if (unlikely(error)) {
>  			acts = NULL;
> -			goto err_unlock_ovs;
> +			goto err_unlock_tbl;
>  		}
>  
>  		if (unlikely(reply)) {
> -			error = ovs_flow_cmd_fill_info(new_flow,
> +			error = ovs_flow_cmd_fill_info(new_flow, table,
>  						       ovs_header->dp_ifindex,
>  						       reply, info->snd_portid,
>  						       info->snd_seq, 0,
> @@ -1095,7 +1129,8 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
>  						       ufid_flags);
>  			BUG_ON(error < 0);
>  		}
> -		ovs_unlock();
> +		mutex_unlock(&table->lock);
> +		ovs_flow_tbl_put(table);
>  	} else {
>  		struct sw_flow_actions *old_acts;
>  
> @@ -1108,28 +1143,28 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
>  		if (unlikely(info->nlhdr->nlmsg_flags & (NLM_F_CREATE
>  							 | NLM_F_EXCL))) {
>  			error = -EEXIST;
> -			goto err_unlock_ovs;
> +			goto err_unlock_tbl;
>  		}
>  		/* The flow identifier has to be the same for flow updates.
>  		 * Look for any overlapping flow.
>  		 */
>  		if (unlikely(!ovs_flow_cmp(flow, &match))) {
>  			if (ovs_identifier_is_key(&flow->id))
> -				flow = ovs_flow_tbl_lookup_exact(&dp->table,
> +				flow = ovs_flow_tbl_lookup_exact(table,
>  								 &match);
>  			else /* UFID matches but key is different */
>  				flow = NULL;
>  			if (!flow) {
>  				error = -ENOENT;
> -				goto err_unlock_ovs;
> +				goto err_unlock_tbl;
>  			}
>  		}
>  		/* Update actions. */
> -		old_acts = ovsl_dereference(flow->sf_acts);
> +		old_acts = ovs_tbl_dereference(flow->sf_acts, table);
>  		rcu_assign_pointer(flow->sf_acts, acts);
>  
>  		if (unlikely(reply)) {
> -			error = ovs_flow_cmd_fill_info(flow,
> +			error = ovs_flow_cmd_fill_info(flow, table,
>  						       ovs_header->dp_ifindex,
>  						       reply, info->snd_portid,
>  						       info->snd_seq, 0,
> @@ -1137,7 +1172,8 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
>  						       ufid_flags);
>  			BUG_ON(error < 0);
>  		}
> -		ovs_unlock();
> +		mutex_unlock(&table->lock);
> +		ovs_flow_tbl_put(table);
>  
>  		ovs_nla_free_flow_actions_rcu(old_acts);
>  		ovs_flow_free(new_flow, false);
> @@ -1149,8 +1185,10 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
>  	kfree(key);
>  	return 0;
>  
> -err_unlock_ovs:
> -	ovs_unlock();
> +err_unlock_tbl:
> +	mutex_unlock(&table->lock);
> +	ovs_flow_tbl_put(table);
> +err_kfree_reply:
>  	kfree_skb(reply);
>  err_kfree_acts:
>  	ovs_nla_free_flow_actions(acts);
> @@ -1244,6 +1282,7 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
>  	struct net *net = sock_net(skb->sk);
>  	struct nlattr **a = info->attrs;
>  	struct ovs_header *ovs_header = genl_info_userhdr(info);
> +	struct flow_table *table;
>  	struct sw_flow_key key;
>  	struct sw_flow *flow;
>  	struct sk_buff *reply = NULL;
> @@ -1278,29 +1317,43 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
>  		}
>  	}
>  
> -	ovs_lock();
> +	rcu_read_lock();
>  	dp = get_dp(net, ovs_header->dp_ifindex);
>  	if (unlikely(!dp)) {
>  		error = -ENODEV;
> -		goto err_unlock_ovs;
> +		rcu_read_unlock();
> +		goto err_free_reply;
>  	}
> +	table = rcu_dereference(dp->table);
> +	if (!table || !ovs_flow_tbl_get(table)) {
> +		rcu_read_unlock();
> +		error = -ENODEV;
> +		goto err_free_reply;
> +	}
> +	rcu_read_unlock();
> +
> +	/* It is safe to dereference "table" after leaving rcu read-protected
> +	 * region because it's pinned by refcount.
> +	 */
> +	mutex_lock(&table->lock);
> +
>  	/* Check that the flow exists. */
>  	if (ufid_present)
> -		flow = ovs_flow_tbl_lookup_ufid(&dp->table, &sfid);
> +		flow = ovs_flow_tbl_lookup_ufid(table, &sfid);
>  	else
> -		flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
> +		flow = ovs_flow_tbl_lookup_exact(table, &match);
>  	if (unlikely(!flow)) {
>  		error = -ENOENT;
> -		goto err_unlock_ovs;
> +		goto err_unlock_tbl;
>  	}
>  
>  	/* Update actions, if present. */
>  	if (likely(acts)) {
> -		old_acts = ovsl_dereference(flow->sf_acts);
> +		old_acts = ovs_tbl_dereference(flow->sf_acts, table);
>  		rcu_assign_pointer(flow->sf_acts, acts);
>  
>  		if (unlikely(reply)) {
> -			error = ovs_flow_cmd_fill_info(flow,
> +			error = ovs_flow_cmd_fill_info(flow, table,
>  						       ovs_header->dp_ifindex,
>  						       reply, info->snd_portid,
>  						       info->snd_seq, 0,
> @@ -1310,20 +1363,22 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
>  		}
>  	} else {
>  		/* Could not alloc without acts before locking. */
> -		reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex,
> +		reply = ovs_flow_cmd_build_info(flow, table,
> +						ovs_header->dp_ifindex,
>  						info, OVS_FLOW_CMD_SET, false,
>  						ufid_flags);
>  
>  		if (IS_ERR(reply)) {
>  			error = PTR_ERR(reply);
> -			goto err_unlock_ovs;
> +			goto err_unlock_tbl;
>  		}
>  	}
>  
>  	/* Clear stats. */
>  	if (a[OVS_FLOW_ATTR_CLEAR])
> -		ovs_flow_stats_clear(flow);
> -	ovs_unlock();
> +		ovs_flow_stats_clear(flow, table);
> +	mutex_unlock(&table->lock);
> +	ovs_flow_tbl_put(table);
>  
>  	if (reply)
>  		ovs_notify(&dp_flow_genl_family, reply, info);
> @@ -1332,8 +1387,10 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
>  
>  	return 0;
>  
> -err_unlock_ovs:
> -	ovs_unlock();
> +err_unlock_tbl:
> +	mutex_unlock(&table->lock);
> +	ovs_flow_tbl_put(table);
> +err_free_reply:
>  	kfree_skb(reply);
>  err_kfree_acts:
>  	ovs_nla_free_flow_actions(acts);
> @@ -1346,6 +1403,7 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
>  	struct nlattr **a = info->attrs;
>  	struct ovs_header *ovs_header = genl_info_userhdr(info);
>  	struct net *net = sock_net(skb->sk);
> +	struct flow_table *table;
>  	struct sw_flow_key key;
>  	struct sk_buff *reply;
>  	struct sw_flow *flow;
> @@ -1370,33 +1428,48 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
>  	if (err)
>  		return err;
>  
> -	ovs_lock();
> +	rcu_read_lock();
>  	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
>  	if (!dp) {
> -		err = -ENODEV;
> -		goto unlock;
> +		rcu_read_unlock();
> +		return -ENODEV;
>  	}
> +	table = rcu_dereference(dp->table);
> +	if (!table || !ovs_flow_tbl_get(table)) {
> +		rcu_read_unlock();
> +		return -ENODEV;
> +	}
> +	rcu_read_unlock();
> +
> +	/* It is safe to dereference "table" after leaving rcu read-protected
> +	 * region because it's pinned by refcount.
> +	 */
> +	mutex_lock(&table->lock);
> +
>  
>  	if (ufid_present)
> -		flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
> +		flow = ovs_flow_tbl_lookup_ufid(table, &ufid);
>  	else
> -		flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
> +		flow = ovs_flow_tbl_lookup_exact(table, &match);
>  	if (!flow) {
>  		err = -ENOENT;
>  		goto unlock;
>  	}
>  
> -	reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info,
> -					OVS_FLOW_CMD_GET, true, ufid_flags);
> +	reply = ovs_flow_cmd_build_info(flow, table, ovs_header->dp_ifindex,
> +					info, OVS_FLOW_CMD_GET, true,
> +					ufid_flags);
>  	if (IS_ERR(reply)) {
>  		err = PTR_ERR(reply);
>  		goto unlock;
>  	}
>  
> -	ovs_unlock();
> +	mutex_unlock(&table->lock);
> +	ovs_flow_tbl_put(table);
>  	return genlmsg_reply(reply, info);
>  unlock:
> -	ovs_unlock();
> +	mutex_unlock(&table->lock);
> +	ovs_flow_tbl_put(table);
>  	return err;
>  }
>  
> @@ -1405,6 +1478,7 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
>  	struct nlattr **a = info->attrs;
>  	struct ovs_header *ovs_header = genl_info_userhdr(info);
>  	struct net *net = sock_net(skb->sk);
> +	struct flow_table *table;
>  	struct sw_flow_key key;
>  	struct sk_buff *reply;
>  	struct sw_flow *flow = NULL;
> @@ -1425,36 +1499,49 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
>  			return err;
>  	}
>  
> -	ovs_lock();
> +	rcu_read_lock();
>  	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
>  	if (unlikely(!dp)) {
> -		err = -ENODEV;
> -		goto unlock;
> +		rcu_read_unlock();
> +		return -ENODEV;
>  	}
> +	table = rcu_dereference(dp->table);
> +	if (!table || !ovs_flow_tbl_get(table)) {
> +		rcu_read_unlock();
> +		return -ENODEV;
> +	}
> +	rcu_read_unlock();
> +
> +	/* It is safe to dereference "table" after leaving rcu read-protected
> +	 * region because it's pinned by refcount.
> +	 */
> +	mutex_lock(&table->lock);
> +
>  
>  	if (unlikely(!a[OVS_FLOW_ATTR_KEY] && !ufid_present)) {
> -		err = ovs_flow_tbl_flush(&dp->table);
> +		err = ovs_flow_tbl_flush(table);
>  		goto unlock;
>  	}
>  
>  	if (ufid_present)
> -		flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
> +		flow = ovs_flow_tbl_lookup_ufid(table, &ufid);
>  	else
> -		flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
> +		flow = ovs_flow_tbl_lookup_exact(table, &match);
>  	if (unlikely(!flow)) {
>  		err = -ENOENT;
>  		goto unlock;
>  	}
>  
> -	ovs_flow_tbl_remove(&dp->table, flow);
> -	ovs_unlock();
> +	ovs_flow_tbl_remove(table, flow);
> +	mutex_unlock(&table->lock);
>  
>  	reply = ovs_flow_cmd_alloc_info((const struct sw_flow_actions __force *) flow->sf_acts,
>  					&flow->id, info, false, ufid_flags);
>  	if (likely(reply)) {
>  		if (!IS_ERR(reply)) {
>  			rcu_read_lock();	/*To keep RCU checker happy. */
> -			err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex,
> +			err = ovs_flow_cmd_fill_info(flow, table,
> +						     ovs_header->dp_ifindex,
>  						     reply, info->snd_portid,
>  						     info->snd_seq, 0,
>  						     OVS_FLOW_CMD_DEL,
> @@ -1473,10 +1560,12 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
>  	}
>  
>  out_free:
> +	ovs_flow_tbl_put(table);
>  	ovs_flow_free(flow, true);
>  	return 0;
>  unlock:
> -	ovs_unlock();
> +	mutex_unlock(&table->lock);
> +	ovs_flow_tbl_put(table);
>  	return err;
>  }
>  
> @@ -1485,6 +1574,7 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
>  	struct nlattr *a[__OVS_FLOW_ATTR_MAX];
>  	struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
>  	struct table_instance *ti;
> +	struct flow_table *table;
>  	struct datapath *dp;
>  	u32 ufid_flags;
>  	int err;
> @@ -1501,8 +1591,13 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
>  		rcu_read_unlock();
>  		return -ENODEV;
>  	}
> +	table = rcu_dereference(dp->table);
> +	if (!table) {
> +		rcu_read_unlock();
> +		return -ENODEV;
> +	}
>  
> -	ti = rcu_dereference(dp->table.ti);
> +	ti = rcu_dereference(table->ti);
>  	for (;;) {
>  		struct sw_flow *flow;
>  		u32 bucket, obj;
> @@ -1513,8 +1608,8 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
>  		if (!flow)
>  			break;
>  
> -		if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb,
> -					   NETLINK_CB(cb->skb).portid,
> +		if (ovs_flow_cmd_fill_info(flow, table, ovs_header->dp_ifindex,
> +					   skb, NETLINK_CB(cb->skb).portid,
>  					   cb->nlh->nlmsg_seq, NLM_F_MULTI,
>  					   OVS_FLOW_CMD_GET, ufid_flags) < 0)
>  			break;
> @@ -1598,8 +1693,13 @@ static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
>  	struct ovs_dp_stats dp_stats;
>  	struct ovs_dp_megaflow_stats dp_megaflow_stats;
>  	struct dp_nlsk_pids *pids = ovsl_dereference(dp->upcall_portids);
> +	struct flow_table *table;
>  	int err, pids_len;
>  
> +	table = ovsl_dereference(dp->table);
> +	if (!table)
> +		return -ENODEV;
> +
>  	ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family,
>  				 flags, cmd);
>  	if (!ovs_header)
> @@ -1625,7 +1725,7 @@ static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
>  		goto nla_put_failure;
>  
>  	if (nla_put_u32(skb, OVS_DP_ATTR_MASKS_CACHE_SIZE,
> -			ovs_flow_tbl_masks_cache_size(&dp->table)))
> +			ovs_flow_tbl_masks_cache_size(table)))
>  		goto nla_put_failure;
>  
>  	if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU && pids) {
> @@ -1736,6 +1836,7 @@ u32 ovs_dp_get_upcall_portid(const struct datapath *dp, uint32_t cpu_id)
>  static int ovs_dp_change(struct datapath *dp, struct nlattr *a[])
>  {
>  	u32 user_features = 0, old_features = dp->user_features;
> +	struct flow_table *table;
>  	int err;
>  
>  	if (a[OVS_DP_ATTR_USER_FEATURES]) {
> @@ -1757,8 +1858,12 @@ static int ovs_dp_change(struct datapath *dp, struct nlattr *a[])
>  		int err;
>  		u32 cache_size;
>  
> +		table = ovsl_dereference(dp->table);
> +		if (!table)
> +			return -ENODEV;
> +
>  		cache_size = nla_get_u32(a[OVS_DP_ATTR_MASKS_CACHE_SIZE]);
> -		err = ovs_flow_tbl_masks_cache_resize(&dp->table, cache_size);
> +		err = ovs_flow_tbl_masks_cache_resize(table, cache_size);
>  		if (err)
>  			return err;
>  	}
> @@ -1810,6 +1915,7 @@ static int ovs_dp_vport_init(struct datapath *dp)
>  static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
>  {
>  	struct nlattr **a = info->attrs;
> +	struct flow_table *table;
>  	struct vport_parms parms;
>  	struct sk_buff *reply;
>  	struct datapath *dp;
> @@ -1833,9 +1939,12 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
>  	ovs_dp_set_net(dp, sock_net(skb->sk));
>  
>  	/* Allocate table. */
> -	err = ovs_flow_tbl_init(&dp->table);
> -	if (err)
> +	table = ovs_flow_tbl_alloc();
> +	if (IS_ERR(table)) {
> +		err = PTR_ERR(table);
>  		goto err_destroy_dp;
> +	}
> +	rcu_assign_pointer(dp->table, table);
>  
>  	err = ovs_dp_stats_init(dp);
>  	if (err)
> @@ -1905,7 +2014,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
>  err_destroy_stats:
>  	free_percpu(dp->stats_percpu);
>  err_destroy_table:
> -	ovs_flow_tbl_destroy(&dp->table);
> +	ovs_flow_tbl_put(table);
>  err_destroy_dp:
>  	kfree(dp);
>  err_destroy_reply:
> @@ -1917,7 +2026,8 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
>  /* Called with ovs_mutex. */
>  static void __dp_destroy(struct datapath *dp)
>  {
> -	struct flow_table *table = &dp->table;
> +	struct flow_table *table = rcu_dereference_protected(dp->table,
> +					lockdep_ovsl_is_held());
>  	int i;
>  
>  	if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING)
> @@ -1939,14 +2049,10 @@ static void __dp_destroy(struct datapath *dp)
>  	 */
>  	ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
>  
> -	/* Flush sw_flow in the tables. RCU cb only releases resource
> -	 * such as dp, ports and tables. That may avoid some issues
> -	 * such as RCU usage warning.
> -	 */
> -	table_instance_flow_flush(table, ovsl_dereference(table->ti),
> -				  ovsl_dereference(table->ufid_ti));
> +	rcu_assign_pointer(dp->table, NULL);
> +	ovs_flow_tbl_put(table);
>  
> -	/* RCU destroy the ports, meters and flow tables. */
> +	/* RCU destroy the ports and meters. */
>  	call_rcu(&dp->rcu, destroy_dp_rcu);
>  }
>  
> @@ -2554,13 +2660,18 @@ static void ovs_dp_masks_rebalance(struct work_struct *work)
>  {
>  	struct ovs_net *ovs_net = container_of(work, struct ovs_net,
>  					       masks_rebalance.work);
> +	struct flow_table *table;
>  	struct datapath *dp;
>  
>  	ovs_lock();
> -
> -	list_for_each_entry(dp, &ovs_net->dps, list_node)
> -		ovs_flow_masks_rebalance(&dp->table);
> -
> +	list_for_each_entry(dp, &ovs_net->dps, list_node) {
> +		table = ovsl_dereference(dp->table);
> +		if (!table)
> +			continue;

Should we take a reference for table here?  I guess it's kindof safe
because of the ovs_lock() above, but if that gets removed it's possible
someone misses that there isn't a refcnt pin here (but everywhere else
has a ovs_flow_tbl_get before it).

> +		mutex_lock(&table->lock);
> +		ovs_flow_masks_rebalance(table);
> +		mutex_unlock(&table->lock);
> +	}
>  	ovs_unlock();
>  
>  	schedule_delayed_work(&ovs_net->masks_rebalance,
> diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
> index db0c3e69d66c..44773bf9f645 100644
> --- a/net/openvswitch/datapath.h
> +++ b/net/openvswitch/datapath.h
> @@ -90,7 +90,7 @@ struct datapath {
>  	struct list_head list_node;
>  
>  	/* Flow table. */
> -	struct flow_table table;
> +	struct flow_table __rcu *table;
>  
>  	/* Switch ports. */
>  	struct hlist_head *ports;
> diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
> index 66366982f604..0a748cf20f53 100644
> --- a/net/openvswitch/flow.c
> +++ b/net/openvswitch/flow.c
> @@ -124,8 +124,9 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,
>  	spin_unlock(&stats->lock);
>  }
>  
> -/* Must be called with rcu_read_lock or ovs_mutex. */
> +/* Must be called with rcu_read_lock or table->lock held. */
>  void ovs_flow_stats_get(const struct sw_flow *flow,
> +			const struct flow_table *table,
>  			struct ovs_flow_stats *ovs_stats,
>  			unsigned long *used, __be16 *tcp_flags)
>  {
> @@ -136,7 +137,8 @@ void ovs_flow_stats_get(const struct sw_flow *flow,
>  	memset(ovs_stats, 0, sizeof(*ovs_stats));
>  
>  	for_each_cpu(cpu, flow->cpu_used_mask) {
> -		struct sw_flow_stats *stats = rcu_dereference_ovsl(flow->stats[cpu]);
> +		struct sw_flow_stats *stats =
> +			rcu_dereference_ovs_tbl(flow->stats[cpu], table);
>  
>  		if (stats) {
>  			/* Local CPU may write on non-local stats, so we must
> @@ -153,13 +155,14 @@ void ovs_flow_stats_get(const struct sw_flow *flow,
>  	}
>  }
>  
> -/* Called with ovs_mutex. */
> -void ovs_flow_stats_clear(struct sw_flow *flow)
> +/* Called with table->lock held. */
> +void ovs_flow_stats_clear(struct sw_flow *flow, struct flow_table *table)
>  {
>  	unsigned int cpu;
>  
>  	for_each_cpu(cpu, flow->cpu_used_mask) {
> -		struct sw_flow_stats *stats = ovsl_dereference(flow->stats[cpu]);
> +		struct sw_flow_stats *stats =
> +			ovs_tbl_dereference(flow->stats[cpu], table);
>  
>  		if (stats) {
>  			spin_lock_bh(&stats->lock);
> diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
> index b5711aff6e76..e05ed6796e4e 100644
> --- a/net/openvswitch/flow.h
> +++ b/net/openvswitch/flow.h
> @@ -23,6 +23,7 @@
>  #include <net/dst_metadata.h>
>  #include <net/nsh.h>
>  
> +struct flow_table;
>  struct sk_buff;
>  
>  enum sw_flow_mac_proto {
> @@ -280,9 +281,11 @@ static inline bool ovs_identifier_is_key(const struct sw_flow_id *sfid)
>  
>  void ovs_flow_stats_update(struct sw_flow *, __be16 tcp_flags,
>  			   const struct sk_buff *);
> -void ovs_flow_stats_get(const struct sw_flow *, struct ovs_flow_stats *,
> -			unsigned long *used, __be16 *tcp_flags);
> -void ovs_flow_stats_clear(struct sw_flow *);
> +void ovs_flow_stats_get(const struct sw_flow *flow,
> +			const struct flow_table *table,
> +			struct ovs_flow_stats *stats, unsigned long *used,
> +			__be16 *tcp_flags);
> +void ovs_flow_stats_clear(struct sw_flow *flow, struct flow_table *table);
>  u64 ovs_flow_used_time(unsigned long flow_jiffies);
>  
>  int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key);
> diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
> index 61c6a5f77c2e..d9dbe4b4807c 100644
> --- a/net/openvswitch/flow_table.c
> +++ b/net/openvswitch/flow_table.c
> @@ -45,6 +45,16 @@
>  static struct kmem_cache *flow_cache;
>  struct kmem_cache *flow_stats_cache __read_mostly;
>  
> +#ifdef CONFIG_LOCKDEP
> +int lockdep_ovs_tbl_is_held(const struct flow_table *table)
> +{
> +	if (debug_locks)
> +		return lockdep_is_held(&table->lock);
> +	else
> +		return 1;
> +}
> +#endif
> +
>  static u16 range_n_bytes(const struct sw_flow_key_range *range)
>  {
>  	return range->end - range->start;
> @@ -249,12 +259,12 @@ static int tbl_mask_array_realloc(struct flow_table *tbl, int size)
>  	if (!new)
>  		return -ENOMEM;
>  
> -	old = ovsl_dereference(tbl->mask_array);
> +	old = ovs_tbl_dereference(tbl->mask_array, tbl);
>  	if (old) {
>  		int i;
>  
>  		for (i = 0; i < old->max; i++) {
> -			if (ovsl_dereference(old->masks[i]))
> +			if (ovs_tbl_dereference(old->masks[i], tbl))
>  				new->masks[new->count++] = old->masks[i];
>  		}
>  		call_rcu(&old->rcu, mask_array_rcu_cb);
> @@ -268,7 +278,7 @@ static int tbl_mask_array_realloc(struct flow_table *tbl, int size)
>  static int tbl_mask_array_add_mask(struct flow_table *tbl,
>  				   struct sw_flow_mask *new)
>  {
> -	struct mask_array *ma = ovsl_dereference(tbl->mask_array);
> +	struct mask_array *ma = ovs_tbl_dereference(tbl->mask_array, tbl);
>  	int err, ma_count = READ_ONCE(ma->count);
>  
>  	if (ma_count >= ma->max) {
> @@ -277,7 +287,7 @@ static int tbl_mask_array_add_mask(struct flow_table *tbl,
>  		if (err)
>  			return err;
>  
> -		ma = ovsl_dereference(tbl->mask_array);
> +		ma = ovs_tbl_dereference(tbl->mask_array, tbl);
>  	} else {
>  		/* On every add or delete we need to reset the counters so
>  		 * every new mask gets a fair chance of being prioritized.
> @@ -285,7 +295,7 @@ static int tbl_mask_array_add_mask(struct flow_table *tbl,
>  		tbl_mask_array_reset_counters(ma);
>  	}
>  
> -	BUG_ON(ovsl_dereference(ma->masks[ma_count]));
> +	WARN_ON_ONCE(ovs_tbl_dereference(ma->masks[ma_count], tbl));
>  
>  	rcu_assign_pointer(ma->masks[ma_count], new);
>  	WRITE_ONCE(ma->count, ma_count + 1);
> @@ -296,12 +306,12 @@ static int tbl_mask_array_add_mask(struct flow_table *tbl,
>  static void tbl_mask_array_del_mask(struct flow_table *tbl,
>  				    struct sw_flow_mask *mask)
>  {
> -	struct mask_array *ma = ovsl_dereference(tbl->mask_array);
> +	struct mask_array *ma = ovs_tbl_dereference(tbl->mask_array, tbl);
>  	int i, ma_count = READ_ONCE(ma->count);
>  
>  	/* Remove the deleted mask pointers from the array */
>  	for (i = 0; i < ma_count; i++) {
> -		if (mask == ovsl_dereference(ma->masks[i]))
> +		if (mask == ovs_tbl_dereference(ma->masks[i], tbl))
>  			goto found;
>  	}
>  
> @@ -329,10 +339,10 @@ static void tbl_mask_array_del_mask(struct flow_table *tbl,
>  static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask)
>  {
>  	if (mask) {
> -		/* ovs-lock is required to protect mask-refcount and
> +		/* table lock is required to protect mask-refcount and
>  		 * mask list.
>  		 */
> -		ASSERT_OVSL();
> +		ASSERT_OVS_TBL(tbl);
>  		BUG_ON(!mask->ref_count);
>  		mask->ref_count--;
>  
> @@ -386,7 +396,8 @@ static struct mask_cache *tbl_mask_cache_alloc(u32 size)
>  }
>  int ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32 size)
>  {
> -	struct mask_cache *mc = rcu_dereference_ovsl(table->mask_cache);
> +	struct mask_cache *mc = rcu_dereference_ovs_tbl(table->mask_cache,
> +							table);
>  	struct mask_cache *new;
>  
>  	if (size == mc->cache_size)
> @@ -406,15 +417,23 @@ int ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32 size)
>  	return 0;
>  }
>  
> -int ovs_flow_tbl_init(struct flow_table *table)
> +struct flow_table *ovs_flow_tbl_alloc(void)
>  {
>  	struct table_instance *ti, *ufid_ti;
> +	struct flow_table *table;
>  	struct mask_cache *mc;
>  	struct mask_array *ma;
>  
> +	table = kzalloc_obj(*table, GFP_KERNEL);
> +	if (!table)
> +		return ERR_PTR(-ENOMEM);
> +
> +	mutex_init(&table->lock);
> +	refcount_set(&table->refcnt, 1);
> +
>  	mc = tbl_mask_cache_alloc(MC_DEFAULT_HASH_ENTRIES);
>  	if (!mc)
> -		return -ENOMEM;
> +		goto free_table;
>  
>  	ma = tbl_mask_array_alloc(MASK_ARRAY_SIZE_MIN);
>  	if (!ma)
> @@ -435,7 +454,7 @@ int ovs_flow_tbl_init(struct flow_table *table)
>  	table->last_rehash = jiffies;
>  	table->count = 0;
>  	table->ufid_count = 0;
> -	return 0;
> +	return table;
>  
>  free_ti:
>  	__table_instance_destroy(ti);
> @@ -443,7 +462,10 @@ int ovs_flow_tbl_init(struct flow_table *table)
>  	__mask_array_destroy(ma);
>  free_mask_cache:
>  	__mask_cache_destroy(mc);
> -	return -ENOMEM;
> +free_table:
> +	mutex_destroy(&table->lock);
> +	kfree(table);
> +	return ERR_PTR(-ENOMEM);
>  }
>  
>  static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu)
> @@ -470,7 +492,7 @@ static void table_instance_flow_free(struct flow_table *table,
>  	flow_mask_remove(table, flow->mask);
>  }
>  
> -/* Must be called with OVS mutex held. */
> +/* Must be called with table mutex held. */
>  void table_instance_flow_flush(struct flow_table *table,
>  			       struct table_instance *ti,
>  			       struct table_instance *ufid_ti)
> @@ -505,11 +527,11 @@ static void table_instance_destroy(struct table_instance *ti,
>  	call_rcu(&ufid_ti->rcu, flow_tbl_destroy_rcu_cb);
>  }
>  
> -/* No need for locking this function is called from RCU callback or
> - * error path.
> - */
> -void ovs_flow_tbl_destroy(struct flow_table *table)
> +/* No need for locking this function is called from RCU callback. */
> +static void ovs_flow_tbl_destroy_rcu(struct rcu_head *rcu)
>  {
> +	struct flow_table *table = container_of(rcu, struct flow_table, rcu);
> +
>  	struct table_instance *ti = rcu_dereference_raw(table->ti);
>  	struct table_instance *ufid_ti = rcu_dereference_raw(table->ufid_ti);
>  	struct mask_cache *mc = rcu_dereference_raw(table->mask_cache);
> @@ -518,6 +540,20 @@ void ovs_flow_tbl_destroy(struct flow_table *table)
>  	call_rcu(&mc->rcu, mask_cache_rcu_cb);
>  	call_rcu(&ma->rcu, mask_array_rcu_cb);
>  	table_instance_destroy(ti, ufid_ti);
> +	mutex_destroy(&table->lock);
> +	kfree(table);
> +}
> +
> +void ovs_flow_tbl_put(struct flow_table *table)
> +{
> +	if (refcount_dec_and_test(&table->refcnt)) {
> +		mutex_lock(&table->lock);
> +		table_instance_flow_flush(table,
> +					  ovs_tbl_dereference(table->ti, table),
> +					  ovs_tbl_dereference(table->ufid_ti, table));
> +		mutex_unlock(&table->lock);
> +		call_rcu(&table->rcu, ovs_flow_tbl_destroy_rcu);
> +	}
>  }
>  
>  struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti,
> @@ -571,7 +607,8 @@ static void ufid_table_instance_insert(struct table_instance *ti,
>  	hlist_add_head_rcu(&flow->ufid_table.node[ti->node_ver], head);
>  }
>  
> -static void flow_table_copy_flows(struct table_instance *old,
> +static void flow_table_copy_flows(struct flow_table *table,
> +				  struct table_instance *old,
>  				  struct table_instance *new, bool ufid)
>  {
>  	int old_ver;
> @@ -588,17 +625,18 @@ static void flow_table_copy_flows(struct table_instance *old,
>  		if (ufid)
>  			hlist_for_each_entry_rcu(flow, head,
>  						 ufid_table.node[old_ver],
> -						 lockdep_ovsl_is_held())
> +						 lockdep_ovs_tbl_is_held(table))
>  				ufid_table_instance_insert(new, flow);
>  		else
>  			hlist_for_each_entry_rcu(flow, head,
>  						 flow_table.node[old_ver],
> -						 lockdep_ovsl_is_held())
> +						 lockdep_ovs_tbl_is_held(table))
>  				table_instance_insert(new, flow);
>  	}
>  }
>  
> -static struct table_instance *table_instance_rehash(struct table_instance *ti,
> +static struct table_instance *table_instance_rehash(struct flow_table *table,
> +						    struct table_instance *ti,
>  						    int n_buckets, bool ufid)
>  {
>  	struct table_instance *new_ti;
> @@ -607,16 +645,19 @@ static struct table_instance *table_instance_rehash(struct table_instance *ti,
>  	if (!new_ti)
>  		return NULL;
>  
> -	flow_table_copy_flows(ti, new_ti, ufid);
> +	flow_table_copy_flows(table, ti, new_ti, ufid);
>  
>  	return new_ti;
>  }
>  
> +/* Must be called with flow_table->lock held. */
>  int ovs_flow_tbl_flush(struct flow_table *flow_table)
>  {
>  	struct table_instance *old_ti, *new_ti;
>  	struct table_instance *old_ufid_ti, *new_ufid_ti;
>  
> +	ASSERT_OVS_TBL(flow_table);
> +
>  	new_ti = table_instance_alloc(TBL_MIN_BUCKETS);
>  	if (!new_ti)
>  		return -ENOMEM;
> @@ -624,8 +665,8 @@ int ovs_flow_tbl_flush(struct flow_table *flow_table)
>  	if (!new_ufid_ti)
>  		goto err_free_ti;
>  
> -	old_ti = ovsl_dereference(flow_table->ti);
> -	old_ufid_ti = ovsl_dereference(flow_table->ufid_ti);
> +	old_ti = ovs_tbl_dereference(flow_table->ti, flow_table);
> +	old_ufid_ti = ovs_tbl_dereference(flow_table->ufid_ti, flow_table);
>  
>  	rcu_assign_pointer(flow_table->ti, new_ti);
>  	rcu_assign_pointer(flow_table->ufid_ti, new_ufid_ti);
> @@ -693,7 +734,8 @@ static bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
>  	return cmp_key(flow->id.unmasked_key, key, key_start, key_end);
>  }
>  
> -static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
> +static struct sw_flow *masked_flow_lookup(struct flow_table *tbl,
> +					  struct table_instance *ti,
>  					  const struct sw_flow_key *unmasked,
>  					  const struct sw_flow_mask *mask,
>  					  u32 *n_mask_hit)
> @@ -709,7 +751,7 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
>  	(*n_mask_hit)++;
>  
>  	hlist_for_each_entry_rcu(flow, head, flow_table.node[ti->node_ver],
> -				 lockdep_ovsl_is_held()) {
> +				 lockdep_ovs_tbl_is_held(tbl)) {
>  		if (flow->mask == mask && flow->flow_table.hash == hash &&
>  		    flow_cmp_masked_key(flow, &masked_key, &mask->range))
>  			return flow;
> @@ -736,9 +778,9 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl,
>  	int i;
>  
>  	if (likely(*index < ma->max)) {
> -		mask = rcu_dereference_ovsl(ma->masks[*index]);
> +		mask = rcu_dereference_ovs_tbl(ma->masks[*index], tbl);
>  		if (mask) {
> -			flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
> +			flow = masked_flow_lookup(tbl, ti, key, mask, n_mask_hit);
>  			if (flow) {
>  				u64_stats_update_begin(&stats->syncp);
>  				stats->usage_cntrs[*index]++;
> @@ -754,11 +796,11 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl,
>  		if (i == *index)
>  			continue;
>  
> -		mask = rcu_dereference_ovsl(ma->masks[i]);
> +		mask = rcu_dereference_ovs_tbl(ma->masks[i], tbl);
>  		if (unlikely(!mask))
>  			break;
>  
> -		flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
> +		flow = masked_flow_lookup(tbl, ti, key, mask, n_mask_hit);
>  		if (flow) { /* Found */
>  			*index = i;
>  			u64_stats_update_begin(&stats->syncp);
> @@ -845,8 +887,8 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
>  struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl,
>  				    const struct sw_flow_key *key)
>  {
> -	struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
> -	struct mask_array *ma = rcu_dereference_ovsl(tbl->mask_array);
> +	struct table_instance *ti = rcu_dereference_ovs_tbl(tbl->ti, tbl);
> +	struct mask_array *ma = rcu_dereference_ovs_tbl(tbl->mask_array, tbl);
>  	u32 __always_unused n_mask_hit;
>  	u32 __always_unused n_cache_hit;
>  	struct sw_flow *flow;
> @@ -865,21 +907,22 @@ struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl,
>  struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
>  					  const struct sw_flow_match *match)
>  {
> -	struct mask_array *ma = ovsl_dereference(tbl->mask_array);
> +	struct mask_array *ma = ovs_tbl_dereference(tbl->mask_array, tbl);
>  	int i;
>  
> -	/* Always called under ovs-mutex. */
> +	/* Always called under tbl->lock. */
>  	for (i = 0; i < ma->max; i++) {
> -		struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
> +		struct table_instance *ti =
> +				rcu_dereference_ovs_tbl(tbl->ti, tbl);
>  		u32 __always_unused n_mask_hit;
>  		struct sw_flow_mask *mask;
>  		struct sw_flow *flow;
>  
> -		mask = ovsl_dereference(ma->masks[i]);
> +		mask = ovs_tbl_dereference(ma->masks[i], tbl);
>  		if (!mask)
>  			continue;
>  
> -		flow = masked_flow_lookup(ti, match->key, mask, &n_mask_hit);
> +		flow = masked_flow_lookup(tbl, ti, match->key, mask, &n_mask_hit);
>  		if (flow && ovs_identifier_is_key(&flow->id) &&
>  		    ovs_flow_cmp_unmasked_key(flow, match)) {
>  			return flow;
> @@ -915,7 +958,7 @@ bool ovs_flow_cmp(const struct sw_flow *flow,
>  struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *tbl,
>  					 const struct sw_flow_id *ufid)
>  {
> -	struct table_instance *ti = rcu_dereference_ovsl(tbl->ufid_ti);
> +	struct table_instance *ti = rcu_dereference_ovs_tbl(tbl->ufid_ti, tbl);
>  	struct sw_flow *flow;
>  	struct hlist_head *head;
>  	u32 hash;
> @@ -923,7 +966,7 @@ struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *tbl,
>  	hash = ufid_hash(ufid);
>  	head = find_bucket(ti, hash);
>  	hlist_for_each_entry_rcu(flow, head, ufid_table.node[ti->node_ver],
> -				 lockdep_ovsl_is_held()) {
> +				 lockdep_ovs_tbl_is_held(tbl)) {
>  		if (flow->ufid_table.hash == hash &&
>  		    ovs_flow_cmp_ufid(flow, ufid))
>  			return flow;
> @@ -933,28 +976,33 @@ struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *tbl,
>  
>  int ovs_flow_tbl_num_masks(const struct flow_table *table)
>  {
> -	struct mask_array *ma = rcu_dereference_ovsl(table->mask_array);
> +	struct mask_array *ma = rcu_dereference_ovs_tbl(table->mask_array,
> +							table);
>  	return READ_ONCE(ma->count);
>  }
>  
>  u32 ovs_flow_tbl_masks_cache_size(const struct flow_table *table)
>  {
> -	struct mask_cache *mc = rcu_dereference_ovsl(table->mask_cache);
> +	struct mask_cache *mc = rcu_dereference_ovs_tbl(table->mask_cache,
> +							table);
>  
>  	return READ_ONCE(mc->cache_size);
>  }
>  
> -static struct table_instance *table_instance_expand(struct table_instance *ti,
> +static struct table_instance *table_instance_expand(struct flow_table *table,
> +						    struct table_instance *ti,
>  						    bool ufid)
>  {
> -	return table_instance_rehash(ti, ti->n_buckets * 2, ufid);
> +	return table_instance_rehash(table, ti, ti->n_buckets * 2, ufid);
>  }
>  
> -/* Must be called with OVS mutex held. */
> +/* Must be called with table mutex held. */
>  void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
>  {
> -	struct table_instance *ti = ovsl_dereference(table->ti);
> -	struct table_instance *ufid_ti = ovsl_dereference(table->ufid_ti);
> +	struct table_instance *ti = ovs_tbl_dereference(table->ti,
> +							table);
> +	struct table_instance *ufid_ti = ovs_tbl_dereference(table->ufid_ti,
> +							     table);
>  
>  	BUG_ON(table->count == 0);
>  	table_instance_flow_free(table, ti, ufid_ti, flow);
> @@ -988,10 +1036,10 @@ static struct sw_flow_mask *flow_mask_find(const struct flow_table *tbl,
>  	struct mask_array *ma;
>  	int i;
>  
> -	ma = ovsl_dereference(tbl->mask_array);
> +	ma = ovs_tbl_dereference(tbl->mask_array, tbl);
>  	for (i = 0; i < ma->max; i++) {
>  		struct sw_flow_mask *t;
> -		t = ovsl_dereference(ma->masks[i]);
> +		t = ovs_tbl_dereference(ma->masks[i], tbl);
>  
>  		if (t && mask_equal(mask, t))
>  			return t;
> @@ -1029,22 +1077,25 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow,
>  	return 0;
>  }
>  
> -/* Must be called with OVS mutex held. */
> +/* Must be called with table mutex held. */
>  static void flow_key_insert(struct flow_table *table, struct sw_flow *flow)
>  {
>  	struct table_instance *new_ti = NULL;
>  	struct table_instance *ti;
>  
> +	ASSERT_OVS_TBL(table);
> +
>  	flow->flow_table.hash = flow_hash(&flow->key, &flow->mask->range);
> -	ti = ovsl_dereference(table->ti);
> +	ti = ovs_tbl_dereference(table->ti, table);
>  	table_instance_insert(ti, flow);
>  	table->count++;
>  
>  	/* Expand table, if necessary, to make room. */
>  	if (table->count > ti->n_buckets)
> -		new_ti = table_instance_expand(ti, false);
> +		new_ti = table_instance_expand(table, ti, false);
>  	else if (time_after(jiffies, table->last_rehash + REHASH_INTERVAL))
> -		new_ti = table_instance_rehash(ti, ti->n_buckets, false);
> +		new_ti = table_instance_rehash(table, ti, ti->n_buckets,
> +					       false);
>  
>  	if (new_ti) {
>  		rcu_assign_pointer(table->ti, new_ti);
> @@ -1053,13 +1104,15 @@ static void flow_key_insert(struct flow_table *table, struct sw_flow *flow)
>  	}
>  }
>  
> -/* Must be called with OVS mutex held. */
> +/* Must be called with table mutex held. */
>  static void flow_ufid_insert(struct flow_table *table, struct sw_flow *flow)
>  {
>  	struct table_instance *ti;
>  
> +	ASSERT_OVS_TBL(table);
> +
>  	flow->ufid_table.hash = ufid_hash(&flow->id);
> -	ti = ovsl_dereference(table->ufid_ti);
> +	ti = ovs_tbl_dereference(table->ufid_ti, table);
>  	ufid_table_instance_insert(ti, flow);
>  	table->ufid_count++;
>  
> @@ -1067,7 +1120,7 @@ static void flow_ufid_insert(struct flow_table *table, struct sw_flow *flow)
>  	if (table->ufid_count > ti->n_buckets) {
>  		struct table_instance *new_ti;
>  
> -		new_ti = table_instance_expand(ti, true);
> +		new_ti = table_instance_expand(table, ti, true);
>  		if (new_ti) {
>  			rcu_assign_pointer(table->ufid_ti, new_ti);
>  			call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb);
> @@ -1075,12 +1128,14 @@ static void flow_ufid_insert(struct flow_table *table, struct sw_flow *flow)
>  	}
>  }
>  
> -/* Must be called with OVS mutex held. */
> +/* Must be called with table mutex held. */
>  int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
>  			const struct sw_flow_mask *mask)
>  {
>  	int err;
>  
> +	ASSERT_OVS_TBL(table);
> +
>  	err = flow_mask_insert(table, flow, mask);
>  	if (err)
>  		return err;
> @@ -1099,10 +1154,11 @@ static int compare_mask_and_count(const void *a, const void *b)
>  	return (s64)mc_b->counter - (s64)mc_a->counter;
>  }
>  
> -/* Must be called with OVS mutex held. */
> +/* Must be called with table->lock held. */
>  void ovs_flow_masks_rebalance(struct flow_table *table)
>  {
> -	struct mask_array *ma = rcu_dereference_ovsl(table->mask_array);
> +	struct mask_array *ma = rcu_dereference_ovs_tbl(table->mask_array,
> +							table);
>  	struct mask_count *masks_and_count;
>  	struct mask_array *new;
>  	int masks_entries = 0;
> @@ -1117,7 +1173,7 @@ void ovs_flow_masks_rebalance(struct flow_table *table)
>  		struct sw_flow_mask *mask;
>  		int cpu;
>  
> -		mask = rcu_dereference_ovsl(ma->masks[i]);
> +		mask = rcu_dereference_ovs_tbl(ma->masks[i], table);
>  		if (unlikely(!mask))
>  			break;
>  
> @@ -1171,7 +1227,7 @@ void ovs_flow_masks_rebalance(struct flow_table *table)
>  	for (i = 0; i < masks_entries; i++) {
>  		int index = masks_and_count[i].index;
>  
> -		if (ovsl_dereference(ma->masks[index]))
> +		if (ovs_tbl_dereference(ma->masks[index], table))
>  			new->masks[new->count++] = ma->masks[index];
>  	}
>  
> diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h
> index f524dc3e4862..cffd412c9045 100644
> --- a/net/openvswitch/flow_table.h
> +++ b/net/openvswitch/flow_table.h
> @@ -59,7 +59,29 @@ struct table_instance {
>  	u32 hash_seed;
>  };
>  
> +/* Locking:
> + *
> + * flow_table is _not_ protected by ovs_lock (see comment above ovs_mutex
> + * in datapath.c).
> + *
> + * All writes to flow_table are protected by the embedded "lock".
> + * In order to ensure datapath destruction does not trigger the destruction
> + * of the flow_table, "refcnt" is used. Therefore, writers must:
> + * 1 - Enter rcu read-protected section
> + * 2 - Increase "table->refcnt"
> + * 3 - Leave rcu read-protected section (to avoid using mutexes inside rcu)
> + * 4 - Lock "table->lock"
> + * 5 - Perform modifications
> + * 6 - Release "table->lock"
> + * 7 - Decrease "table->refcnt"
> + *
> + * Reads are protected by RCU.
> + */
>  struct flow_table {
> +	/* Locks flow table writes. */
> +	struct mutex lock;
> +	refcount_t refcnt;
> +	struct rcu_head rcu;
>  	struct table_instance __rcu *ti;
>  	struct table_instance __rcu *ufid_ti;
>  	struct mask_cache __rcu *mask_cache;
> @@ -71,15 +93,40 @@ struct flow_table {
>  
>  extern struct kmem_cache *flow_stats_cache;
>  
> +#ifdef CONFIG_LOCKDEP
> +int lockdep_ovs_tbl_is_held(const struct flow_table *table);
> +#else
> +static inline int lockdep_ovs_tbl_is_held(const struct flow_table *table)
> +{
> +	(void)table;
> +	return 1;
> +}
> +#endif
> +
> +#define ASSERT_OVS_TBL(tbl)   WARN_ON(!lockdep_ovs_tbl_is_held(tbl))
> +
> +/* Lock-protected update-allowed dereferences.*/
> +#define ovs_tbl_dereference(p, tbl)	\
> +	rcu_dereference_protected(p, lockdep_ovs_tbl_is_held(tbl))
> +
> +/* Read dereferences can be protected by either RCU, table lock or ovs_mutex. */
> +#define rcu_dereference_ovs_tbl(p, tbl) \
> +	rcu_dereference_check(p,		\
> +		lockdep_ovs_tbl_is_held(tbl) || lockdep_ovsl_is_held())
> +
>  int ovs_flow_init(void);
>  void ovs_flow_exit(void);
>  
>  struct sw_flow *ovs_flow_alloc(void);
>  void ovs_flow_free(struct sw_flow *, bool deferred);
>  
> -int ovs_flow_tbl_init(struct flow_table *);
> +struct flow_table *ovs_flow_tbl_alloc(void);
> +void ovs_flow_tbl_put(struct flow_table *table);
> +static inline bool ovs_flow_tbl_get(struct flow_table *table)
> +{
> +	return refcount_inc_not_zero(&table->refcnt);
> +}
>  int ovs_flow_tbl_count(const struct flow_table *table);
> -void ovs_flow_tbl_destroy(struct flow_table *table);
>  int ovs_flow_tbl_flush(struct flow_table *flow_table);
>  
>  int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,


^ permalink raw reply

* Re: [PATCH iwl-net 6/10] ice: check PHY autoneg capability before rejecting ethtool autoneg setting
From: Tony Nguyen @ 2026-04-10 18:58 UTC (permalink / raw)
  To: Aleksandr Loktionov, intel-wired-lan; +Cc: netdev, Jan Glaza
In-Reply-To: <20260403054029.3789616-7-aleksandr.loktionov@intel.com>



On 4/2/2026 10:40 PM, Aleksandr Loktionov wrote:
> ice_set_link_ksettings() rejects autoneg requests by comparing
> user settings against safe_ks which is populated by
> ice_phy_type_to_ethtool(). The Autoneg bit in safe_ks is set
> only if the current PHY configuration reports it supported,
> but this misses PHYs that support autoneg and have it available
> through PHY capabilities. Pull the autoneg flag from the actual
> PHY capabilities (already fetched earlier in the function) to
> ensure the user can toggle autoneg on any capable PHY.
> 
> Fixes: 5cd349c349d6 ("ice: report supported and advertised autoneg using PHY capabilities")
> Cc: stable@vger.kernel.org
> Signed-off-by: Jan Glaza <jan.glaza@intel.com>
> Signed-off-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
> ---
>   drivers/net/ethernet/intel/ice/ice_ethtool.c | 8 ++++++++
>   1 file changed, 8 insertions(+)
> 
> diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
> index 49b9376..44483bc 100644
> --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
> +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
> @@ -2654,6 +2654,14 @@ ice_set_link_ksettings(struct net_device *netdev,
>   	/* Get link modes supported by hardware.*/
>   	ice_phy_type_to_ethtool(netdev, &safe_ks);
>   
> +	/* Pull the value of autoneg from phy caps to ensure we allow
> +	 * toggling it on all PHYs that support it.
> +	 */
> +	if (ice_is_phy_caps_an_enabled(phy_caps)) {
> +		ethtool_link_ksettings_add_link_mode(&safe_ks, supported, Autoneg);
> +		set_bit(ETHTOOL_LINK_MODE_FEC_NONE_BIT, safe_ks.link_modes.supported);

 From Sashiko:

This isn't a bug, but should this use 
ethtool_link_ksettings_add_link_mode() instead of calling set_bit() 
directly? Using set_bit() on the link modes breaks the ethtool interface 
abstraction.

Also, does this incorrectly couple the ETHTOOL_LINK_MODE_FEC_NONE_BIT 
support with Autonegotiation support? Forward Error Correction support 
is independent of Autonegotiation.

For PHYs lacking Autonegotiation, the FEC none bit will not be added to 
safe_ks.link_modes.supported. When a user requests settings via ethtool, 
copy_ks.link_modes.advertising will likely contain the FEC none bit 
since it is unconditionally returned by ice_get_link_ksettings().

> +	}
> +
>   	/* and check against modes requested by user.
>   	 * Return an error if unsupported mode was set.
>   	 */


^ permalink raw reply

* Re: [PATCH iwl-net 10/10] ice: allow setting min_tx_rate to 0 to resolve VF bandwidth oversubscription
From: Tony Nguyen @ 2026-04-10 18:58 UTC (permalink / raw)
  To: Aleksandr Loktionov, intel-wired-lan; +Cc: netdev
In-Reply-To: <20260403054029.3789616-11-aleksandr.loktionov@intel.com>



On 4/2/2026 10:40 PM, Aleksandr Loktionov wrote:
> ice_set_vf_bw() refuses to accept any min_tx_rate value when the
> total guaranteed bandwidth is already oversubscribed, even when the
> requested value is 0. This makes it impossible to recover from an
> oversubscribed state via "ip link set <pf> vf <id> min_tx_rate 0".
> 
> Allow a zero min_tx_rate to bypass the oversubscription check so
> users can always clear the guaranteed rate. Additionally print an
> informational message when the oversubscription guard fires to help
> diagnose why a non-zero request was rejected.
> 
> Fixes: 4ecc8633056b ("ice: Add support for VF rate limiting")
> Cc: stable@vger.kernel.org
> Signed-off-by: Sudheer Mogilappagari <sudheer.mogilappagari@intel.com>
> Signed-off-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
> ---
>   drivers/net/ethernet/intel/ice/ice_sriov.c | 8 +++++++-
>   1 file changed, 7 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c
> index 7e00e09..6e3bec7 100644
> --- a/drivers/net/ethernet/intel/ice/ice_sriov.c
> +++ b/drivers/net/ethernet/intel/ice/ice_sriov.c
> @@ -1507,6 +1507,12 @@ ice_min_tx_rate_oversubscribed(struct ice_vf *vf, int min_tx_rate)
>   	all_vfs_min_tx_rate -= vf->min_tx_rate;
>   
>   	if (all_vfs_min_tx_rate + min_tx_rate > link_speed_mbps) {
> +		if (ice_calc_all_vfs_min_tx_rate(vf->pf) > link_speed_mbps) {

ice_calc_all_vfs_min_tx_rate() is already called above (out of this 
patch context), can we save that to an interim var and save this second 
call?

> +			dev_info(ice_pf_to_dev(vf->pf),
> +				 "The sum of min_tx_rate for all VFs is greater than the link speed\n");
> +			dev_info(ice_pf_to_dev(vf->pf),
> +				 "Set min_tx_rate to 0 on VFs to resolve oversubscription\n");

Why not 1 string/call?

Thanks,
Tony

> +		}
>   		dev_err(ice_pf_to_dev(vf->pf), "min_tx_rate of %d Mbps on VF %u would cause oversubscription of %d Mbps based on the current link speed %d Mbps\n",
>   			min_tx_rate, vf->vf_id,
>   			all_vfs_min_tx_rate + min_tx_rate - link_speed_mbps,
> @@ -1556,7 +1562,7 @@ ice_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate,
>   		goto out_put_vf;
>   	}
>   
> -	if (ice_min_tx_rate_oversubscribed(vf, min_tx_rate)) {
> +	if (min_tx_rate && ice_min_tx_rate_oversubscribed(vf, min_tx_rate)) {
>   		ret = -EINVAL;
>   		goto out_put_vf;
>   	}


^ permalink raw reply

* [PATCH net 0/2] sctp: fix a vtag verification failure caused by stale INITs
From: Xin Long @ 2026-04-10 18:59 UTC (permalink / raw)
  To: network dev, linux-sctp
  Cc: davem, kuba, Eric Dumazet, Paolo Abeni, Simon Horman,
	Marcelo Ricardo Leitner, Florian Westphal, Yi Chen

Similar to Scenario B in commit 8e56b063c865 ( netfilter: handle the
connecting collision properly in nf_conntrack_proto_sctp"):

Scenario B: INIT_ACK is delayed until the peer completes its own handshake

  192.168.1.2 > 192.168.1.1: sctp (1) [INIT] [init tag: 3922216408]
    192.168.1.1 > 192.168.1.2: sctp (1) [INIT] [init tag: 144230885]
    192.168.1.2 > 192.168.1.1: sctp (1) [INIT ACK] [init tag: 3922216408]
    192.168.1.1 > 192.168.1.2: sctp (1) [COOKIE ECHO]
    192.168.1.2 > 192.168.1.1: sctp (1) [COOKIE ACK]
  192.168.1.1 > 192.168.1.2: sctp (1) [INIT ACK] [init tag: 3914796021] *

There is another case:

Scenario F: INIT is delayed until the peer completes its own handshake

  192.168.1.2 > 192.168.1.1: sctp (1) [INIT] [init tag: 3922216408]
  (OVS upcall)
    192.168.1.1 > 192.168.1.2: sctp (1) [INIT] [init tag: 144230885]
    192.168.1.2 > 192.168.1.1: sctp (1) [INIT ACK] [init tag: 3922216408]
    192.168.1.1 > 192.168.1.2: sctp (1) [COOKIE ECHO]
    192.168.1.2 > 192.168.1.1: sctp (1) [COOKIE ACK]
  192.168.1.2 > 192.168.1.1: sctp (1) [INIT] [init tag: 3922216408]
  (delayed)
  192.168.1.1 > 192.168.1.2: sctp (1) [INIT ACK] [init tag: 3914796021] *

In this case, the delayed INIT (e.g. due to OVS upcall) is recorded by
conntrack, which prevents vtag verification from dropping the unexpected
INIT-ACK in nf_conntrack_sctp_packet():

  vtag = ct->proto.sctp.vtag[!dir];
  if (!ct->proto.sctp.init[!dir] && vtag && vtag != ih->init_tag)
          goto out_unlock;

This happens because ct->proto.sctp.init[!dir] is set by the delayed INIT,
even though it is stale.

Fix this in two parts:

- In netfilter: Do not record INITs whose init_tag matches the peer vtag,
  as they carry no new handshake state in the 1st patch.

- In SCTP: Prevent endpoints from responding to such INITs with INIT-ACK,
  ensuring correctness even when middleboxes lack the netfilter fix in
  the 2nd patch.

A follow-up selftest for this scenario will be posted in a separate patch
by Yi Chen.

Xin Long (2):
  netfilter: skip recording stale or retransmitted INIT
  sctp: discard stale INIT after handshake completion

 net/netfilter/nf_conntrack_proto_sctp.c | 10 +++++++---
 net/sctp/sm_statefuns.c                 |  6 ++++++
 2 files changed, 13 insertions(+), 3 deletions(-)

-- 
2.47.1


^ permalink raw reply

* [PATCH net 1/2] netfilter: skip recording stale or retransmitted INIT
From: Xin Long @ 2026-04-10 18:59 UTC (permalink / raw)
  To: network dev, linux-sctp
  Cc: davem, kuba, Eric Dumazet, Paolo Abeni, Simon Horman,
	Marcelo Ricardo Leitner, Florian Westphal, Yi Chen
In-Reply-To: <cover.1775847557.git.lucien.xin@gmail.com>

An INIT whose init_tag matches the peer's vtag does not provide new state
information. It indicates either:

- a stale INIT (after INIT-ACK has already been seen on the same side), or
- a retransmitted INIT (after INIT has already been recorded on the same
  side).

In both cases, the INIT must not update ct->proto.sctp.init[] state, since
it does not advance the handshake tracking and may otherwise corrupt
INIT/INIT-ACK validation logic.

Allow INIT processing only when the conntrack entry is newly created
(SCTP_CONNTRACK_NONE), or when the init_tag differs from the stored peer
vtag.

Note it skips the check for the ct with old_state SCTP_CONNTRACK_NONE in
nf_conntrack_sctp_packet(), as it is just created in sctp_new() where it
set ct->proto.sctp.vtag[IP_CT_DIR_REPLY] = ih->init_tag.

Fixes: 9fb9cbb1082d ("[NETFILTER]: Add nf_conntrack subsystem.")
Signed-off-by: Xin Long <lucien.xin@gmail.com>
---
 net/netfilter/nf_conntrack_proto_sctp.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 645d2c43ebf7..7e10fa65cbdd 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -466,9 +466,13 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct,
 			if (!ih)
 				goto out_unlock;
 
-			if (ct->proto.sctp.init[dir] && ct->proto.sctp.init[!dir])
-				ct->proto.sctp.init[!dir] = 0;
-			ct->proto.sctp.init[dir] = 1;
+			/* Do not record INIT matching peer vtag (stale or retransmitted INIT). */
+			if (old_state == SCTP_CONNTRACK_NONE ||
+			    ct->proto.sctp.vtag[!dir] != ih->init_tag) {
+				if (ct->proto.sctp.init[dir] && ct->proto.sctp.init[!dir])
+					ct->proto.sctp.init[!dir] = 0;
+				ct->proto.sctp.init[dir] = 1;
+			}
 
 			pr_debug("Setting vtag %x for dir %d\n", ih->init_tag, !dir);
 			ct->proto.sctp.vtag[!dir] = ih->init_tag;
-- 
2.47.1


^ permalink raw reply related

* [PATCH net 2/2] sctp: discard stale INIT after handshake completion
From: Xin Long @ 2026-04-10 18:59 UTC (permalink / raw)
  To: network dev, linux-sctp
  Cc: davem, kuba, Eric Dumazet, Paolo Abeni, Simon Horman,
	Marcelo Ricardo Leitner, Florian Westphal, Yi Chen
In-Reply-To: <cover.1775847557.git.lucien.xin@gmail.com>

After an association reaches ESTABLISHED, the peer’s init_tag is already
known from the handshake. Any subsequent INIT with the same init_tag is
not a valid restart, but a delayed or duplicate INIT.

Drop such INIT chunks in sctp_sf_do_unexpected_init() instead of
processing them as new association attempts.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Xin Long <lucien.xin@gmail.com>
---
 net/sctp/sm_statefuns.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 7b823d759141..3bec026ecbc0 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -1556,6 +1556,12 @@ static enum sctp_disposition sctp_sf_do_unexpected_init(
 	/* Tag the variable length parameters.  */
 	chunk->param_hdr.v = skb_pull(chunk->skb, sizeof(struct sctp_inithdr));
 
+	if (asoc->state >= SCTP_STATE_ESTABLISHED) {
+		/* Discard INIT matching peer vtag after handshake completion (stale INIT). */
+		if (chunk->subh.init_hdr->init_tag == asoc->peer.i.init_tag)
+			return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+	}
+
 	/* Verify the INIT chunk before processing it. */
 	err_chunk = NULL;
 	if (!sctp_verify_init(net, ep, asoc, chunk->chunk_hdr->type,
-- 
2.47.1


^ permalink raw reply related

* Re: [PATCH net-next 1/3] psp: add crypt-offset and spi-threshold get/set attributes
From: Akhilesh Samineni @ 2026-04-10 19:34 UTC (permalink / raw)
  To: Willem de Bruijn
  Cc: davem, edumazet, kuba, pabeni, andrew+netdev, horms, willemb,
	daniel.zahka, netdev, linux-kernel, jayakrishnan.udayavarma,
	ajit.khaparde, kiran.kella, sachin.suman
In-Reply-To: <willemdebruijn.kernel.1d7f9f774aa55@gmail.com>

[-- Attachment #1: Type: text/plain, Size: 10028 bytes --]

On Wed, Apr 8, 2026 at 3:07 AM Willem de Bruijn
<willemdebruijn.kernel@gmail.com> wrote:
>
> Akhilesh Samineni wrote:
> > crypt-offset (Crypt Offset)
> > ----------------------------------
> > The crypt-offset attribute specifies the byte offset within a packet
> > from which encryption begins. This is a per-device attribute that
> > allows a portion of the packet header to remain in plaintext while
> > the rest of the payload is encrypted. This is useful in scenarios
> > where intermediate nodes need to inspect or process a fixed-size
> > header before the encrypted payload.
> >
> > The default value is 0, meaning encryption starts from the beginning
> > of the payload following the PSP header.
> >
> > spi-threshold (SPI Threshold)
> > ------------------------------
> > The SPI (Security Parameter Index) is a 32-bit per-device identifier
> > used to distinguish security associations. As SPI values are allocated
> > monotonically, a threshold is needed to trigger timely SPI rotation
> > before the space is exhausted.
> >
> > The spi-threshold attribute allows userspace to configure the value at
> > which an SPI rotation should be initiated. The default is set to
> > PSP_SPI_THRESHOLD_DEFAULT (~90% of 0x7FFFFFFF), providing a comfortable
> > margin to perform rotation without racing to exhaustion.
> >
> > NOTE: A follow-up series will add notification support to alert
> > subscribed users when the configured spi-threshold is reached, enabling
> > timely SPI rotation.
> >
> > Signed-off-by: Akhilesh Samineni <akhilesh.samineni@broadcom.com>
> > Reviewed-by: Kiran Kella <kiran.kella@broadcom.com>
> > Reviewed-by: Ajit Kumar Khaparde <ajit.khaparde@broadcom.com>
> > ---
> >  Documentation/netlink/specs/psp.yaml | 13 +++++++++++++
> >  include/net/psp/types.h              |  7 +++++++
> >  include/uapi/linux/psp.h             |  2 ++
> >  net/psp/psp-nl-gen.c                 |  6 ++++--
> >  net/psp/psp_main.c                   |  3 +++
> >  net/psp/psp_nl.c                     | 27 +++++++++++++++++++++++----
> >  6 files changed, 52 insertions(+), 6 deletions(-)
> >
> > diff --git a/Documentation/netlink/specs/psp.yaml b/Documentation/netlink/specs/psp.yaml
> > index f3a57782d2cf..b22869be91cf 100644
> > --- a/Documentation/netlink/specs/psp.yaml
> > +++ b/Documentation/netlink/specs/psp.yaml
> > @@ -38,6 +38,15 @@ attribute-sets:
> >          type: u32
> >          enum: version
> >          enum-as-flags: true
> > +      -
> > +        name: crypt-offset
> > +        doc: The offset from the end of the PSP header to the start of the encrypted payload.
>
> In 4 octet units?
>

Yes. crypt-offset is in 4 octet units only. I will update the
description accordingly in the next v2 patch.

> > +        type: u8
> > +      -
> > +        name: spi-threshold
> > +        doc: Threshold for the SPI to trigger notification to the user for appropriate rotate action.
> > +        type: u32
> > +
> >    -
> >      name: assoc
> >      attributes:
> > @@ -170,6 +179,8 @@ operations:
> >              - ifindex
> >              - psp-versions-cap
> >              - psp-versions-ena
> > +            - crypt-offset
> > +            - spi-threshold
> >          pre: psp-device-get-locked
> >          post: psp-device-unlock
> >        dump:
> > @@ -193,6 +204,8 @@ operations:
> >            attributes:
> >              - id
> >              - psp-versions-ena
> > +            - crypt-offset
> > +            - spi-threshold
> >          reply:
> >            attributes: []
> >          pre: psp-device-get-locked
> > diff --git a/include/net/psp/types.h b/include/net/psp/types.h
> > index 25a9096d4e7d..875f7822557f 100644
> > --- a/include/net/psp/types.h
> > +++ b/include/net/psp/types.h
> > @@ -25,6 +25,9 @@ struct psphdr {
> >  #define PSP_SPI_KEY_ID               GENMASK(30, 0)
> >  #define PSP_SPI_KEY_PHASE    BIT(31)
> >
> > +/* Default SPI threshold: ~90% of max SPI (0x7FFFFFFF) to allow rotation before exhaustion */
> > +#define PSP_SPI_THRESHOLD_DEFAULT    0x73333333
>
> Do you want to choose a more round number, in either hex or dec?
>

I think we can use 0x70000000; it's approximately 87.5% of the maximum SPI.

> > +
> >  #define PSPHDR_CRYPT_OFFSET  GENMASK(5, 0)
> >
> >  #define PSPHDR_VERFL_SAMPLE  BIT(7)
> > @@ -38,9 +41,13 @@ struct psphdr {
> >  /**
> >   * struct psp_dev_config - PSP device configuration
> >   * @versions: PSP versions enabled on the device
> > + * @crypt_offset: crypto offset configured on the device
> > + * @spi_threshold: SPI threshold value on the device
> >   */
> >  struct psp_dev_config {
> >       u32 versions;
> > +     u8 crypt_offset;
> > +     u32 spi_threshold;
> >  };
> >
> >  /**
> > diff --git a/include/uapi/linux/psp.h b/include/uapi/linux/psp.h
> > index a3a336488dc3..bb390159dc72 100644
> > --- a/include/uapi/linux/psp.h
> > +++ b/include/uapi/linux/psp.h
> > @@ -22,6 +22,8 @@ enum {
> >       PSP_A_DEV_IFINDEX,
> >       PSP_A_DEV_PSP_VERSIONS_CAP,
> >       PSP_A_DEV_PSP_VERSIONS_ENA,
> > +     PSP_A_DEV_CRYPT_OFFSET,
> > +     PSP_A_DEV_SPI_THRESHOLD,
> >
> >       __PSP_A_DEV_MAX,
> >       PSP_A_DEV_MAX = (__PSP_A_DEV_MAX - 1)
> > diff --git a/net/psp/psp-nl-gen.c b/net/psp/psp-nl-gen.c
> > index 22a48d0fa378..e50b8b80955c 100644
> > --- a/net/psp/psp-nl-gen.c
> > +++ b/net/psp/psp-nl-gen.c
> > @@ -23,9 +23,11 @@ static const struct nla_policy psp_dev_get_nl_policy[PSP_A_DEV_ID + 1] = {
> >  };
> >
> >  /* PSP_CMD_DEV_SET - do */
> > -static const struct nla_policy psp_dev_set_nl_policy[PSP_A_DEV_PSP_VERSIONS_ENA + 1] = {
> > +static const struct nla_policy psp_dev_set_nl_policy[PSP_A_DEV_SPI_THRESHOLD + 1] = {
> >       [PSP_A_DEV_ID] = NLA_POLICY_MIN(NLA_U32, 1),
> >       [PSP_A_DEV_PSP_VERSIONS_ENA] = NLA_POLICY_MASK(NLA_U32, 0xf),
> > +     [PSP_A_DEV_CRYPT_OFFSET] = { .type = NLA_U8, },
> > +     [PSP_A_DEV_SPI_THRESHOLD] = { .type = NLA_U32, },
> >  };
> >
> >  /* PSP_CMD_KEY_ROTATE - do */
> > @@ -75,7 +77,7 @@ static const struct genl_split_ops psp_nl_ops[] = {
> >               .doit           = psp_nl_dev_set_doit,
> >               .post_doit      = psp_device_unlock,
> >               .policy         = psp_dev_set_nl_policy,
> > -             .maxattr        = PSP_A_DEV_PSP_VERSIONS_ENA,
> > +             .maxattr        = PSP_A_DEV_SPI_THRESHOLD,
> >               .flags          = GENL_CMD_CAP_DO,
> >       },
> >       {
> > diff --git a/net/psp/psp_main.c b/net/psp/psp_main.c
> > index 9508b6c38003..536ee44db09d 100644
> > --- a/net/psp/psp_main.c
> > +++ b/net/psp/psp_main.c
> > @@ -79,6 +79,9 @@ psp_dev_create(struct net_device *netdev,
> >       INIT_LIST_HEAD(&psd->stale_assocs);
> >       refcount_set(&psd->refcnt, 1);
> >
> > +     /* ~90% of 0x7FFFFFFF; allows SPI rotation well before space is exhausted */
>
> Repeat comment. Not needed here.
>

Ack

> > +     psd->config.spi_threshold = PSP_SPI_THRESHOLD_DEFAULT;
> > +
> >       mutex_lock(&psp_devs_lock);
> >       err = xa_alloc_cyclic(&psp_devs, &psd->id, psd, xa_limit_16b,
> >                             &last_id, GFP_KERNEL);
> > diff --git a/net/psp/psp_nl.c b/net/psp/psp_nl.c
> > index 6afd7707ec12..fbb77460a24b 100644
> > --- a/net/psp/psp_nl.c
> > +++ b/net/psp/psp_nl.c
> > @@ -101,7 +101,9 @@ psp_nl_dev_fill(struct psp_dev *psd, struct sk_buff *rsp,
> >       if (nla_put_u32(rsp, PSP_A_DEV_ID, psd->id) ||
> >           nla_put_u32(rsp, PSP_A_DEV_IFINDEX, psd->main_netdev->ifindex) ||
> >           nla_put_u32(rsp, PSP_A_DEV_PSP_VERSIONS_CAP, psd->caps->versions) ||
> > -         nla_put_u32(rsp, PSP_A_DEV_PSP_VERSIONS_ENA, psd->config.versions))
> > +         nla_put_u32(rsp, PSP_A_DEV_PSP_VERSIONS_ENA, psd->config.versions) ||
> > +         nla_put_u8(rsp, PSP_A_DEV_CRYPT_OFFSET, psd->config.crypt_offset) ||
> > +         nla_put_u32(rsp, PSP_A_DEV_SPI_THRESHOLD, psd->config.spi_threshold))
> >               goto err_cancel_msg;
> >
> >       genlmsg_end(rsp, hdr);
> > @@ -193,6 +195,13 @@ int psp_nl_dev_set_doit(struct sk_buff *skb, struct genl_info *info)
> >
> >       memcpy(&new_config, &psd->config, sizeof(new_config));
> >
> > +     if (!info->attrs[PSP_A_DEV_PSP_VERSIONS_ENA] &&
> > +         !info->attrs[PSP_A_DEV_CRYPT_OFFSET] &&
> > +         !info->attrs[PSP_A_DEV_SPI_THRESHOLD]) {
> > +             NL_SET_ERR_MSG(info->extack, "No settings present");
> > +             return -EINVAL;
> > +     }
> > +
> >       if (info->attrs[PSP_A_DEV_PSP_VERSIONS_ENA]) {
> >               new_config.versions =
> >                       nla_get_u32(info->attrs[PSP_A_DEV_PSP_VERSIONS_ENA]);
> > @@ -200,9 +209,19 @@ int psp_nl_dev_set_doit(struct sk_buff *skb, struct genl_info *info)
> >                       NL_SET_ERR_MSG(info->extack, "Requested PSP versions not supported by the device");
> >                       return -EINVAL;
> >               }
> > -     } else {
> > -             NL_SET_ERR_MSG(info->extack, "No settings present");
> > -             return -EINVAL;
> > +     }
> > +
> > +     if (info->attrs[PSP_A_DEV_CRYPT_OFFSET])
> > +             new_config.crypt_offset =
> > +                     nla_get_u8(info->attrs[PSP_A_DEV_CRYPT_OFFSET]);
>
> PSP defines a 6-bit field in 4 octet units. Does this need bounds checking?
>

 Yes, I will add the bound checks in the next v2 patch.
> > +
> > +     if (info->attrs[PSP_A_DEV_SPI_THRESHOLD]) {
> > +             new_config.spi_threshold =
> > +                     nla_get_u32(info->attrs[PSP_A_DEV_SPI_THRESHOLD]);
> > +             if (new_config.spi_threshold & PSP_SPI_KEY_PHASE) {
> > +                     NL_SET_ERR_MSG(info->extack, "SPI threshold must not have bit 31 set");
> > +                     return -EINVAL;
> > +             }
> >       }
> >
> >       rsp = psp_nl_reply_new(info);
> > --
> > 2.45.4
> >
>
>

[-- Attachment #2: S/MIME Cryptographic Signature --]
[-- Type: application/pkcs7-signature, Size: 4211 bytes --]

^ permalink raw reply

* Re: [PATCH net-next 1/3] psp: add crypt-offset and spi-threshold get/set attributes
From: Akhilesh Samineni @ 2026-04-10 19:36 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: Willem de Bruijn, davem, edumazet, pabeni, andrew+netdev, horms,
	willemb, daniel.zahka, netdev, linux-kernel,
	jayakrishnan.udayavarma, ajit.khaparde, kiran.kella, sachin.suman
In-Reply-To: <20260407180432.102073cf@kernel.org>

[-- Attachment #1: Type: text/plain, Size: 596 bytes --]

On Wed, Apr 8, 2026 at 6:34 AM Jakub Kicinski <kuba@kernel.org> wrote:
>
> On Tue, 07 Apr 2026 17:37:41 -0400 Willem de Bruijn wrote:
> > > +   if (info->attrs[PSP_A_DEV_CRYPT_OFFSET])
> > > +           new_config.crypt_offset =
> > > +                   nla_get_u8(info->attrs[PSP_A_DEV_CRYPT_OFFSET]);
> >
> > PSP defines a 6-bit field in 4 octet units. Does this need bounds checking?
>
> More fundamentally, were we to support this -- is it a device property
> or an assoc property?

It's a device property. All associations under the device will share
the same crypt-offset.

[-- Attachment #2: S/MIME Cryptographic Signature --]
[-- Type: application/pkcs7-signature, Size: 4211 bytes --]

^ permalink raw reply

* Re: [PATCH net-next 2/3] netdevsim: psp: handle the new crypt-offset and spi-threshold get/set operations
From: Akhilesh Samineni @ 2026-04-10 19:45 UTC (permalink / raw)
  To: Willem de Bruijn
  Cc: davem, edumazet, kuba, pabeni, andrew+netdev, horms, willemb,
	daniel.zahka, netdev, linux-kernel, jayakrishnan.udayavarma,
	ajit.khaparde, kiran.kella, sachin.suman
In-Reply-To: <willemdebruijn.kernel.2484afecaca4d@gmail.com>

[-- Attachment #1: Type: text/plain, Size: 2059 bytes --]

On Wed, Apr 8, 2026 at 3:13 AM Willem de Bruijn
<willemdebruijn.kernel@gmail.com> wrote:
>
> Akhilesh Samineni wrote:
> > Implement the crypt-offset and spi-threshold get/set in netdevsim PSP.
> >
> > Signed-off-by: Akhilesh Samineni <akhilesh.samineni@broadcom.com>
> > Reviewed-by: Kiran Kella <kiran.kella@broadcom.com>
> > Reviewed-by: Ajit Kumar Khaparde <ajit.khaparde@broadcom.com>
> > ---
> >  drivers/net/netdevsim/netdevsim.h | 2 ++
> >  drivers/net/netdevsim/psp.c       | 6 ++++++
> >  2 files changed, 8 insertions(+)
> >
> > diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h
> > index c904e14f6b3f..3ad7d42391c0 100644
> > --- a/drivers/net/netdevsim/netdevsim.h
> > +++ b/drivers/net/netdevsim/netdevsim.h
> > @@ -117,6 +117,8 @@ struct netdevsim {
> >               struct psp_dev *dev;
> >               u32 spi;
> >               u32 assoc_cnt;
> > +             u8  crypt_offset;
>
> Minor: variable names are already not aligned. No need for two spaces.
>

Ack
> > +             u32 spi_threshold;
> >       } psp;
> >
> >       struct nsim_bus_dev *nsim_bus_dev;
> > diff --git a/drivers/net/netdevsim/psp.c b/drivers/net/netdevsim/psp.c
> > index 0b4d717253b0..9098edf00c5c 100644
> > --- a/drivers/net/netdevsim/psp.c
> > +++ b/drivers/net/netdevsim/psp.c
> > @@ -122,6 +122,11 @@ static int
> >  nsim_psp_set_config(struct psp_dev *psd, struct psp_dev_config *conf,
> >                   struct netlink_ext_ack *extack)
> >  {
> > +     struct netdevsim *ns = psd->drv_priv;
> > +
> > +     ns->psp.crypt_offset = conf->crypt_offset;
> > +     ns->psp.spi_threshold = conf->spi_threshold;
> > +
> >       return 0;
> >  }
> >
> > @@ -249,6 +254,7 @@ int nsim_psp_init(struct netdevsim *ns)
> >       if (err)
> >               return err;
> >
> > +     ns->psp.spi_threshold = PSP_SPI_THRESHOLD_DEFAULT;
> >       debugfs_create_file("psp_rereg", 0200, ddir, ns, &nsim_psp_rereg_fops);
> >       return 0;
> >  }
> > --
> > 2.45.4
> >
>
>

[-- Attachment #2: S/MIME Cryptographic Signature --]
[-- Type: application/pkcs7-signature, Size: 4211 bytes --]

^ permalink raw reply

* Re: [PATCH net-next 2/3] netdevsim: psp: handle the new crypt-offset and spi-threshold get/set operations
From: Akhilesh Samineni @ 2026-04-10 19:48 UTC (permalink / raw)
  To: Willem de Bruijn
  Cc: davem, edumazet, kuba, pabeni, andrew+netdev, horms, willemb,
	daniel.zahka, netdev, linux-kernel, jayakrishnan.udayavarma,
	ajit.khaparde, kiran.kella, sachin.suman
In-Reply-To: <willemdebruijn.kernel.327df0cb46f23@gmail.com>

[-- Attachment #1: Type: text/plain, Size: 2096 bytes --]

On Wed, Apr 8, 2026 at 3:19 AM Willem de Bruijn
<willemdebruijn.kernel@gmail.com> wrote:
>
> Akhilesh Samineni wrote:
> > Implement the crypt-offset and spi-threshold get/set in netdevsim PSP.
> >
> > Signed-off-by: Akhilesh Samineni <akhilesh.samineni@broadcom.com>
> > Reviewed-by: Kiran Kella <kiran.kella@broadcom.com>
> > Reviewed-by: Ajit Kumar Khaparde <ajit.khaparde@broadcom.com>
> > ---
> >  drivers/net/netdevsim/netdevsim.h | 2 ++
> >  drivers/net/netdevsim/psp.c       | 6 ++++++
> >  2 files changed, 8 insertions(+)
> >
> > diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h
> > index c904e14f6b3f..3ad7d42391c0 100644
> > --- a/drivers/net/netdevsim/netdevsim.h
> > +++ b/drivers/net/netdevsim/netdevsim.h
> > @@ -117,6 +117,8 @@ struct netdevsim {
> >               struct psp_dev *dev;
> >               u32 spi;
> >               u32 assoc_cnt;
> > +             u8  crypt_offset;
> > +             u32 spi_threshold;
> >       } psp;
> >
> >       struct nsim_bus_dev *nsim_bus_dev;
> > diff --git a/drivers/net/netdevsim/psp.c b/drivers/net/netdevsim/psp.c
> > index 0b4d717253b0..9098edf00c5c 100644
> > --- a/drivers/net/netdevsim/psp.c
> > +++ b/drivers/net/netdevsim/psp.c
> > @@ -122,6 +122,11 @@ static int
> >  nsim_psp_set_config(struct psp_dev *psd, struct psp_dev_config *conf,
> >                   struct netlink_ext_ack *extack)
> >  {
> > +     struct netdevsim *ns = psd->drv_priv;
> > +
> > +     ns->psp.crypt_offset = conf->crypt_offset;
> > +     ns->psp.spi_threshold = conf->spi_threshold;
> > +
> >       return 0;
> >  }
> >
> > @@ -249,6 +254,7 @@ int nsim_psp_init(struct netdevsim *ns)
> >       if (err)
> >               return err;
> >
> > +     ns->psp.spi_threshold = PSP_SPI_THRESHOLD_DEFAULT;
> >       debugfs_create_file("psp_rereg", 0200, ddir, ns, &nsim_psp_rereg_fops);
> >       return 0;
>
> Default initialization should probably all complete before the device
> is made visible with psp_dev_create.

Yes. I will update it in the next v2 patch.

[-- Attachment #2: S/MIME Cryptographic Signature --]
[-- Type: application/pkcs7-signature, Size: 4211 bytes --]

^ permalink raw reply

* [PATCH net-next v2 00/14] net: macb: implement context swapping
From: Théo Lebrun @ 2026-04-10 19:51 UTC (permalink / raw)
  To: Nicolas Ferre, Claudiu Beznea, Andrew Lunn, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Richard Cochran,
	Russell King
  Cc: Paolo Valerio, Conor Dooley, Nicolai Buchwitz,
	Vladimir Kondratiev, Gregory CLEMENT, Benoît Monin,
	Tawfik Bayouk, Thomas Petazzoni, Maxime Chevallier, netdev,
	linux-kernel, Théo Lebrun

MACB has a pretty primitive approach to buffer management. They are all
stored in `struct macb *bp`. On operations that require buffer realloc
(set_ringparam & change_mtu ATM), the only option is to close the
interface, change our global state and re-open the interface.

Two issues:
- It doesn't fly on memory pressured systems; we free our precious
  buffers and don't manage to reallocate fully, meaning our machine
  just lost its network access.
- Anecdotally, it is pretty slow because it implies a full PHY reinit.

Instead, we shall:
 - allocate a new context (including buffers) first
 - if it fails, early return without any impact to the interface
 - stop interface
 - update global state (bp, netdev, etc)
 - pass newly allocated buffer pointers to the hardware
 - start interface
 - free old context

This is what we implement here. Both .set_ringparam() and
.ndo_change_mtu() are covered by this series. In the future,
at least .set_channels() [0], XDP [1] and XSK [2] would benefit.

The change is super intrusive so conflicts will be major. Sorry!

Thanks,
Have a nice day,
Théo

[0]: https://lore.kernel.org/netdev/20260317-macb-set-channels-v4-0-1bd4f4ffcfca@bootlin.com/
[1]: https://lore.kernel.org/netdev/20260323221047.2749577-1-pvalerio@redhat.com/
[2]: https://lore.kernel.org/netdev/20260304-macb-xsk-v1-0-ba2ebe2bdaa3@bootlin.com/

Signed-off-by: Théo Lebrun <theo.lebrun@bootlin.com>
---
Changes in v2:
- Patch "add subset of `struct macb` to `struct macb_context`" was
  messed up. It contained much more than what the name implied. Split
  into three commits (I caused trouble by rebase reordering).
- Fix tieoff; V1 allocated it without initialisation.
- Fix NULL pointer dereference on context in mab_get_regs() and
  macb_get_ringparam() when interface is offline.
- Patch "unify device pointer naming convention":
  - Fix build issue when CONFIG_NETCONSOLE=y.
  - Rename `struct net_device *dev` to `netdev` in macb.h.
  - Rename `struct phy_device *phy` to `phydev` in macb_main.c.
- On swap, call netdev_tx_reset_queue() to reset all DQL counters.
- At end of swap, add missing kfree(old_ctx).
- During HW disabling in swap, grab bp->lock to protect against IRQ
  handler.
- On swap, cancel the three BH features MACB has:
  bp->hresp_err_bh_work, bp->tx_lpi_work and queue->tx_error_task.
- On swap, call macb_configure_dma() which writes buffer size to
  hardware registers. This is important because the change_mtu codepath
  changes the buffer size.
- Rebase onto latest net-next/main (58dd34dbd5b0) & resolve conflicts.
- Link to v1: https://patch.msgid.link/20260401-macb-context-v1-0-9590c5ab7272@bootlin.com

---
Théo Lebrun (14):
      net: macb: unify device pointer naming convention
      net: macb: unify `struct macb *` naming convention
      net: macb: unify queue index variable naming convention and types
      net: macb: enforce reverse christmas tree (RCT) convention
      net: macb: allocate tieoff descriptor once across device lifetime
      net: macb: introduce macb_context struct for buffer management
      net: macb: avoid macb_init_rx_buffer_size() modifying state
      net: macb: make `struct macb` subset reachable from macb_context struct
      net: macb: change caps helpers signatures
      net: macb: change function signatures to take contexts
      net: macb: introduce macb_context_alloc() helper
      net: macb: re-read ISR inside IRQ handler locked section
      net: macb: use context swapping in .set_ringparam()
      net: macb: use context swapping in .ndo_change_mtu()

 drivers/net/ethernet/cadence/macb.h      |  125 ++-
 drivers/net/ethernet/cadence/macb_main.c | 1767 +++++++++++++++++-------------
 drivers/net/ethernet/cadence/macb_pci.c  |   46 +-
 drivers/net/ethernet/cadence/macb_ptp.c  |   26 +-
 4 files changed, 1126 insertions(+), 838 deletions(-)
---
base-commit: 6b6916526425235d5875df21dfa6f31fdc098599
change-id: 20260401-macb-context-bd0caf20414d

Best regards,
--  
Théo Lebrun <theo.lebrun@bootlin.com>


^ permalink raw reply

* [PATCH net-next v2 01/14] net: macb: unify device pointer naming convention
From: Théo Lebrun @ 2026-04-10 19:51 UTC (permalink / raw)
  To: Nicolas Ferre, Claudiu Beznea, Andrew Lunn, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Richard Cochran,
	Russell King
  Cc: Paolo Valerio, Conor Dooley, Nicolai Buchwitz,
	Vladimir Kondratiev, Gregory CLEMENT, Benoît Monin,
	Tawfik Bayouk, Thomas Petazzoni, Maxime Chevallier, netdev,
	linux-kernel, Théo Lebrun
In-Reply-To: <20260410-macb-context-v2-0-af39f71d40b6@bootlin.com>

Here are all device pointer variable permutations inside MACB:

   struct device *dev;
   struct net_device *dev;
   struct net_device *ndev;
   struct net_device *netdev;
   struct pci_dev *pdev;              // inside macb_pci.c
   struct platform_device *pdev;
   struct platform_device *plat_dev;  // inside macb_pci.c

Unify to this convention:

   struct device *dev;
   struct net_device *netdev;
   struct pci_dev *pci;
   struct platform_device *pdev;

Ensure nothing slipped through using ctags tooling:

⟩ ctags -o - --kinds-c='{local}{member}{parameter}' \
    --fields='{typeref}' drivers/net/ethernet/cadence/* | \
  awk -F"\t" '
    $NF~/struct:.*(device|dev) / {print $NF, $1}' | \
  sort -u
typeref:struct:device * dev
typeref:struct:in_device * idev        // ignored
typeref:struct:net_device * netdev
typeref:struct:pci_dev * pci
typeref:struct:phy_device * phy        // ignored
typeref:struct:phy_device * phydev     // ignored
typeref:struct:platform_device * pdev

Signed-off-by: Théo Lebrun <theo.lebrun@bootlin.com>
---
 drivers/net/ethernet/cadence/macb.h      |  20 +-
 drivers/net/ethernet/cadence/macb_main.c | 632 ++++++++++++++++---------------
 drivers/net/ethernet/cadence/macb_pci.c  |  46 +--
 drivers/net/ethernet/cadence/macb_ptp.c  |  18 +-
 4 files changed, 359 insertions(+), 357 deletions(-)

diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
index 2de56017ee0d..9857df5b57f0 100644
--- a/drivers/net/ethernet/cadence/macb.h
+++ b/drivers/net/ethernet/cadence/macb.h
@@ -1207,11 +1207,11 @@ struct macb_or_gem_ops {
 
 /* MACB-PTP interface: adapt to platform needs. */
 struct macb_ptp_info {
-	void (*ptp_init)(struct net_device *ndev);
-	void (*ptp_remove)(struct net_device *ndev);
+	void (*ptp_init)(struct net_device *netdev);
+	void (*ptp_remove)(struct net_device *netdev);
 	s32 (*get_ptp_max_adj)(void);
 	unsigned int (*get_tsu_rate)(struct macb *bp);
-	int (*get_ts_info)(struct net_device *dev,
+	int (*get_ts_info)(struct net_device *netdev,
 			   struct kernel_ethtool_ts_info *info);
 	int (*get_hwtst)(struct net_device *netdev,
 			 struct kernel_hwtstamp_config *tstamp_config);
@@ -1326,7 +1326,7 @@ struct macb {
 	struct clk		*tx_clk;
 	struct clk		*rx_clk;
 	struct clk		*tsu_clk;
-	struct net_device	*dev;
+	struct net_device	*netdev;
 	/* Protects hw_stats and ethtool_stats */
 	spinlock_t		stats_lock;
 	union {
@@ -1406,8 +1406,8 @@ enum macb_bd_control {
 	TSTAMP_ALL_FRAMES,
 };
 
-void gem_ptp_init(struct net_device *ndev);
-void gem_ptp_remove(struct net_device *ndev);
+void gem_ptp_init(struct net_device *netdev);
+void gem_ptp_remove(struct net_device *netdev);
 void gem_ptp_txstamp(struct macb *bp, struct sk_buff *skb, struct macb_dma_desc *desc);
 void gem_ptp_rxstamp(struct macb *bp, struct sk_buff *skb, struct macb_dma_desc *desc);
 static inline void gem_ptp_do_txstamp(struct macb *bp, struct sk_buff *skb, struct macb_dma_desc *desc)
@@ -1426,14 +1426,14 @@ static inline void gem_ptp_do_rxstamp(struct macb *bp, struct sk_buff *skb, stru
 	gem_ptp_rxstamp(bp, skb, desc);
 }
 
-int gem_get_hwtst(struct net_device *dev,
+int gem_get_hwtst(struct net_device *netdev,
 		  struct kernel_hwtstamp_config *tstamp_config);
-int gem_set_hwtst(struct net_device *dev,
+int gem_set_hwtst(struct net_device *netdev,
 		  struct kernel_hwtstamp_config *tstamp_config,
 		  struct netlink_ext_ack *extack);
 #else
-static inline void gem_ptp_init(struct net_device *ndev) { }
-static inline void gem_ptp_remove(struct net_device *ndev) { }
+static inline void gem_ptp_init(struct net_device *netdev) { }
+static inline void gem_ptp_remove(struct net_device *netdev) { }
 
 static inline void gem_ptp_do_txstamp(struct macb *bp, struct sk_buff *skb, struct macb_dma_desc *desc) { }
 static inline void gem_ptp_do_rxstamp(struct macb *bp, struct sk_buff *skb, struct macb_dma_desc *desc) { }
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index d9716c56f705..896d481e0f95 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -252,9 +252,9 @@ static void macb_set_hwaddr(struct macb *bp)
 	u32 bottom;
 	u16 top;
 
-	bottom = get_unaligned_le32(bp->dev->dev_addr);
+	bottom = get_unaligned_le32(bp->netdev->dev_addr);
 	macb_or_gem_writel(bp, SA1B, bottom);
-	top = get_unaligned_le16(bp->dev->dev_addr + 4);
+	top = get_unaligned_le16(bp->netdev->dev_addr + 4);
 	macb_or_gem_writel(bp, SA1T, top);
 
 	if (gem_has_ptp(bp)) {
@@ -291,13 +291,13 @@ static void macb_get_hwaddr(struct macb *bp)
 		addr[5] = (top >> 8) & 0xff;
 
 		if (is_valid_ether_addr(addr)) {
-			eth_hw_addr_set(bp->dev, addr);
+			eth_hw_addr_set(bp->netdev, addr);
 			return;
 		}
 	}
 
 	dev_info(&bp->pdev->dev, "invalid hw address, using random\n");
-	eth_hw_addr_random(bp->dev);
+	eth_hw_addr_random(bp->netdev);
 }
 
 static int macb_mdio_wait_for_idle(struct macb *bp)
@@ -509,12 +509,12 @@ static void macb_set_tx_clk(struct macb *bp, int speed)
 	ferr = abs(rate_rounded - rate);
 	ferr = DIV_ROUND_UP(ferr, rate / 100000);
 	if (ferr > 5)
-		netdev_warn(bp->dev,
+		netdev_warn(bp->netdev,
 			    "unable to generate target frequency: %ld Hz\n",
 			    rate);
 
 	if (clk_set_rate(bp->tx_clk, rate_rounded))
-		netdev_err(bp->dev, "adjusting tx_clk failed.\n");
+		netdev_err(bp->netdev, "adjusting tx_clk failed.\n");
 }
 
 static void macb_usx_pcs_link_up(struct phylink_pcs *pcs, unsigned int neg_mode,
@@ -697,8 +697,8 @@ static void macb_tx_lpi_wake(struct macb *bp)
 
 static void macb_mac_disable_tx_lpi(struct phylink_config *config)
 {
-	struct net_device *ndev = to_net_dev(config->dev);
-	struct macb *bp = netdev_priv(ndev);
+	struct net_device *netdev = to_net_dev(config->dev);
+	struct macb *bp = netdev_priv(netdev);
 	unsigned long flags;
 
 	cancel_delayed_work_sync(&bp->tx_lpi_work);
@@ -712,8 +712,8 @@ static void macb_mac_disable_tx_lpi(struct phylink_config *config)
 static int macb_mac_enable_tx_lpi(struct phylink_config *config, u32 timer,
 				  bool tx_clk_stop)
 {
-	struct net_device *ndev = to_net_dev(config->dev);
-	struct macb *bp = netdev_priv(ndev);
+	struct net_device *netdev = to_net_dev(config->dev);
+	struct macb *bp = netdev_priv(netdev);
 	unsigned long flags;
 
 	spin_lock_irqsave(&bp->lock, flags);
@@ -732,8 +732,8 @@ static int macb_mac_enable_tx_lpi(struct phylink_config *config, u32 timer,
 static void macb_mac_config(struct phylink_config *config, unsigned int mode,
 			    const struct phylink_link_state *state)
 {
-	struct net_device *ndev = to_net_dev(config->dev);
-	struct macb *bp = netdev_priv(ndev);
+	struct net_device *netdev = to_net_dev(config->dev);
+	struct macb *bp = netdev_priv(netdev);
 	unsigned long flags;
 	u32 old_ctrl, ctrl;
 	u32 old_ncr, ncr;
@@ -774,8 +774,8 @@ static void macb_mac_config(struct phylink_config *config, unsigned int mode,
 static void macb_mac_link_down(struct phylink_config *config, unsigned int mode,
 			       phy_interface_t interface)
 {
-	struct net_device *ndev = to_net_dev(config->dev);
-	struct macb *bp = netdev_priv(ndev);
+	struct net_device *netdev = to_net_dev(config->dev);
+	struct macb *bp = netdev_priv(netdev);
 	struct macb_queue *queue;
 	unsigned int q;
 	u32 ctrl;
@@ -789,7 +789,7 @@ static void macb_mac_link_down(struct phylink_config *config, unsigned int mode,
 	ctrl = macb_readl(bp, NCR) & ~(MACB_BIT(RE) | MACB_BIT(TE));
 	macb_writel(bp, NCR, ctrl);
 
-	netif_tx_stop_all_queues(ndev);
+	netif_tx_stop_all_queues(netdev);
 }
 
 /* Use juggling algorithm to left rotate tx ring and tx skb array */
@@ -884,13 +884,13 @@ static void gem_shuffle_tx_rings(struct macb *bp)
 }
 
 static void macb_mac_link_up(struct phylink_config *config,
-			     struct phy_device *phy,
+			     struct phy_device *phydev,
 			     unsigned int mode, phy_interface_t interface,
 			     int speed, int duplex,
 			     bool tx_pause, bool rx_pause)
 {
-	struct net_device *ndev = to_net_dev(config->dev);
-	struct macb *bp = netdev_priv(ndev);
+	struct net_device *netdev = to_net_dev(config->dev);
+	struct macb *bp = netdev_priv(netdev);
 	struct macb_queue *queue;
 	unsigned long flags;
 	unsigned int q;
@@ -946,14 +946,14 @@ static void macb_mac_link_up(struct phylink_config *config,
 
 	macb_writel(bp, NCR, ctrl | MACB_BIT(RE) | MACB_BIT(TE));
 
-	netif_tx_wake_all_queues(ndev);
+	netif_tx_wake_all_queues(netdev);
 }
 
 static struct phylink_pcs *macb_mac_select_pcs(struct phylink_config *config,
 					       phy_interface_t interface)
 {
-	struct net_device *ndev = to_net_dev(config->dev);
-	struct macb *bp = netdev_priv(ndev);
+	struct net_device *netdev = to_net_dev(config->dev);
+	struct macb *bp = netdev_priv(netdev);
 
 	if (interface == PHY_INTERFACE_MODE_10GBASER)
 		return &bp->phylink_usx_pcs;
@@ -982,7 +982,7 @@ static bool macb_phy_handle_exists(struct device_node *dn)
 static int macb_phylink_connect(struct macb *bp)
 {
 	struct device_node *dn = bp->pdev->dev.of_node;
-	struct net_device *dev = bp->dev;
+	struct net_device *netdev = bp->netdev;
 	struct phy_device *phydev;
 	int ret;
 
@@ -992,7 +992,7 @@ static int macb_phylink_connect(struct macb *bp)
 	if (!dn || (ret && !macb_phy_handle_exists(dn))) {
 		phydev = phy_find_first(bp->mii_bus);
 		if (!phydev) {
-			netdev_err(dev, "no PHY found\n");
+			netdev_err(netdev, "no PHY found\n");
 			return -ENXIO;
 		}
 
@@ -1001,7 +1001,7 @@ static int macb_phylink_connect(struct macb *bp)
 	}
 
 	if (ret) {
-		netdev_err(dev, "Could not attach PHY (%d)\n", ret);
+		netdev_err(netdev, "Could not attach PHY (%d)\n", ret);
 		return ret;
 	}
 
@@ -1013,21 +1013,21 @@ static int macb_phylink_connect(struct macb *bp)
 static void macb_get_pcs_fixed_state(struct phylink_config *config,
 				     struct phylink_link_state *state)
 {
-	struct net_device *ndev = to_net_dev(config->dev);
-	struct macb *bp = netdev_priv(ndev);
+	struct net_device *netdev = to_net_dev(config->dev);
+	struct macb *bp = netdev_priv(netdev);
 
 	state->link = (macb_readl(bp, NSR) & MACB_BIT(NSR_LINK)) != 0;
 }
 
 /* based on au1000_eth. c*/
-static int macb_mii_probe(struct net_device *dev)
+static int macb_mii_probe(struct net_device *netdev)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 
 	bp->phylink_sgmii_pcs.ops = &macb_phylink_pcs_ops;
 	bp->phylink_usx_pcs.ops = &macb_phylink_usx_pcs_ops;
 
-	bp->phylink_config.dev = &dev->dev;
+	bp->phylink_config.dev = &netdev->dev;
 	bp->phylink_config.type = PHYLINK_NETDEV;
 	bp->phylink_config.mac_managed_pm = true;
 
@@ -1086,7 +1086,7 @@ static int macb_mii_probe(struct net_device *dev)
 	bp->phylink = phylink_create(&bp->phylink_config, bp->pdev->dev.fwnode,
 				     bp->phy_interface, &macb_phylink_ops);
 	if (IS_ERR(bp->phylink)) {
-		netdev_err(dev, "Could not create a phylink instance (%ld)\n",
+		netdev_err(netdev, "Could not create a phylink instance (%ld)\n",
 			   PTR_ERR(bp->phylink));
 		return PTR_ERR(bp->phylink);
 	}
@@ -1133,7 +1133,7 @@ static int macb_mii_init(struct macb *bp)
 	 */
 	mdio_np = of_get_child_by_name(np, "mdio");
 	if (!mdio_np && of_phy_is_fixed_link(np))
-		return macb_mii_probe(bp->dev);
+		return macb_mii_probe(bp->netdev);
 
 	/* Enable management port */
 	macb_writel(bp, NCR, MACB_BIT(MPE));
@@ -1154,13 +1154,13 @@ static int macb_mii_init(struct macb *bp)
 	bp->mii_bus->priv = bp;
 	bp->mii_bus->parent = &bp->pdev->dev;
 
-	dev_set_drvdata(&bp->dev->dev, bp->mii_bus);
+	dev_set_drvdata(&bp->netdev->dev, bp->mii_bus);
 
 	err = macb_mdiobus_register(bp, mdio_np);
 	if (err)
 		goto err_out_free_mdiobus;
 
-	err = macb_mii_probe(bp->dev);
+	err = macb_mii_probe(bp->netdev);
 	if (err)
 		goto err_out_unregister_bus;
 
@@ -1268,7 +1268,7 @@ static void macb_tx_error_task(struct work_struct *work)
 	unsigned long		flags;
 
 	queue_index = queue - bp->queues;
-	netdev_vdbg(bp->dev, "macb_tx_error_task: q = %u, t = %u, h = %u\n",
+	netdev_vdbg(bp->netdev, "macb_tx_error_task: q = %u, t = %u, h = %u\n",
 		    queue_index, queue->tx_tail, queue->tx_head);
 
 	/* Prevent the queue NAPI TX poll from running, as it calls
@@ -1281,14 +1281,14 @@ static void macb_tx_error_task(struct work_struct *work)
 	spin_lock_irqsave(&bp->lock, flags);
 
 	/* Make sure nobody is trying to queue up new packets */
-	netif_tx_stop_all_queues(bp->dev);
+	netif_tx_stop_all_queues(bp->netdev);
 
 	/* Stop transmission now
 	 * (in case we have just queued new packets)
 	 * macb/gem must be halted to write TBQP register
 	 */
 	if (macb_halt_tx(bp)) {
-		netdev_err(bp->dev, "BUG: halt tx timed out\n");
+		netdev_err(bp->netdev, "BUG: halt tx timed out\n");
 		macb_writel(bp, NCR, macb_readl(bp, NCR) & (~MACB_BIT(TE)));
 		halt_timeout = true;
 	}
@@ -1317,13 +1317,13 @@ static void macb_tx_error_task(struct work_struct *work)
 			 * since it's the only one written back by the hardware
 			 */
 			if (!(ctrl & MACB_BIT(TX_BUF_EXHAUSTED))) {
-				netdev_vdbg(bp->dev, "txerr skb %u (data %p) TX complete\n",
+				netdev_vdbg(bp->netdev, "txerr skb %u (data %p) TX complete\n",
 					    macb_tx_ring_wrap(bp, tail),
 					    skb->data);
-				bp->dev->stats.tx_packets++;
+				bp->netdev->stats.tx_packets++;
 				queue->stats.tx_packets++;
 				packets++;
-				bp->dev->stats.tx_bytes += skb->len;
+				bp->netdev->stats.tx_bytes += skb->len;
 				queue->stats.tx_bytes += skb->len;
 				bytes += skb->len;
 			}
@@ -1333,7 +1333,7 @@ static void macb_tx_error_task(struct work_struct *work)
 			 * those. Statistics are updated by hardware.
 			 */
 			if (ctrl & MACB_BIT(TX_BUF_EXHAUSTED))
-				netdev_err(bp->dev,
+				netdev_err(bp->netdev,
 					   "BUG: TX buffers exhausted mid-frame\n");
 
 			desc->ctrl = ctrl | MACB_BIT(TX_USED);
@@ -1342,7 +1342,7 @@ static void macb_tx_error_task(struct work_struct *work)
 		macb_tx_unmap(bp, tx_skb, 0);
 	}
 
-	netdev_tx_completed_queue(netdev_get_tx_queue(bp->dev, queue_index),
+	netdev_tx_completed_queue(netdev_get_tx_queue(bp->netdev, queue_index),
 				  packets, bytes);
 
 	/* Set end of TX queue */
@@ -1367,7 +1367,7 @@ static void macb_tx_error_task(struct work_struct *work)
 		macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(TE));
 
 	/* Now we are ready to start transmission again */
-	netif_tx_start_all_queues(bp->dev);
+	netif_tx_start_all_queues(bp->netdev);
 	macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(TSTART));
 
 	spin_unlock_irqrestore(&bp->lock, flags);
@@ -1446,12 +1446,12 @@ static int macb_tx_complete(struct macb_queue *queue, int budget)
 				    !ptp_one_step_sync(skb))
 					gem_ptp_do_txstamp(bp, skb, desc);
 
-				netdev_vdbg(bp->dev, "skb %u (data %p) TX complete\n",
+				netdev_vdbg(bp->netdev, "skb %u (data %p) TX complete\n",
 					    macb_tx_ring_wrap(bp, tail),
 					    skb->data);
-				bp->dev->stats.tx_packets++;
+				bp->netdev->stats.tx_packets++;
 				queue->stats.tx_packets++;
-				bp->dev->stats.tx_bytes += skb->len;
+				bp->netdev->stats.tx_bytes += skb->len;
 				queue->stats.tx_bytes += skb->len;
 				packets++;
 				bytes += skb->len;
@@ -1469,14 +1469,14 @@ static int macb_tx_complete(struct macb_queue *queue, int budget)
 		}
 	}
 
-	netdev_tx_completed_queue(netdev_get_tx_queue(bp->dev, queue_index),
+	netdev_tx_completed_queue(netdev_get_tx_queue(bp->netdev, queue_index),
 				  packets, bytes);
 
 	queue->tx_tail = tail;
-	if (__netif_subqueue_stopped(bp->dev, queue_index) &&
+	if (__netif_subqueue_stopped(bp->netdev, queue_index) &&
 	    CIRC_CNT(queue->tx_head, queue->tx_tail,
 		     bp->tx_ring_size) <= MACB_TX_WAKEUP_THRESH(bp))
-		netif_wake_subqueue(bp->dev, queue_index);
+		netif_wake_subqueue(bp->netdev, queue_index);
 	spin_unlock_irqrestore(&queue->tx_ptr_lock, flags);
 
 	if (packets)
@@ -1504,9 +1504,9 @@ static void gem_rx_refill(struct macb_queue *queue)
 
 		if (!queue->rx_skbuff[entry]) {
 			/* allocate sk_buff for this free entry in ring */
-			skb = netdev_alloc_skb(bp->dev, bp->rx_buffer_size);
+			skb = netdev_alloc_skb(bp->netdev, bp->rx_buffer_size);
 			if (unlikely(!skb)) {
-				netdev_err(bp->dev,
+				netdev_err(bp->netdev,
 					   "Unable to allocate sk_buff\n");
 				break;
 			}
@@ -1555,8 +1555,8 @@ static void gem_rx_refill(struct macb_queue *queue)
 	/* Make descriptor updates visible to hardware */
 	wmb();
 
-	netdev_vdbg(bp->dev, "rx ring: queue: %p, prepared head %d, tail %d\n",
-			queue, queue->rx_prepared_head, queue->rx_tail);
+	netdev_vdbg(bp->netdev, "rx ring: queue: %p, prepared head %d, tail %d\n",
+		    queue, queue->rx_prepared_head, queue->rx_tail);
 }
 
 /* Mark DMA descriptors from begin up to and not including end as unused */
@@ -1616,17 +1616,17 @@ static int gem_rx(struct macb_queue *queue, struct napi_struct *napi,
 		count++;
 
 		if (!(ctrl & MACB_BIT(RX_SOF) && ctrl & MACB_BIT(RX_EOF))) {
-			netdev_err(bp->dev,
+			netdev_err(bp->netdev,
 				   "not whole frame pointed by descriptor\n");
-			bp->dev->stats.rx_dropped++;
+			bp->netdev->stats.rx_dropped++;
 			queue->stats.rx_dropped++;
 			break;
 		}
 		skb = queue->rx_skbuff[entry];
 		if (unlikely(!skb)) {
-			netdev_err(bp->dev,
+			netdev_err(bp->netdev,
 				   "inconsistent Rx descriptor chain\n");
-			bp->dev->stats.rx_dropped++;
+			bp->netdev->stats.rx_dropped++;
 			queue->stats.rx_dropped++;
 			break;
 		}
@@ -1634,28 +1634,28 @@ static int gem_rx(struct macb_queue *queue, struct napi_struct *napi,
 		queue->rx_skbuff[entry] = NULL;
 		len = ctrl & bp->rx_frm_len_mask;
 
-		netdev_vdbg(bp->dev, "gem_rx %u (len %u)\n", entry, len);
+		netdev_vdbg(bp->netdev, "gem_rx %u (len %u)\n", entry, len);
 
 		skb_put(skb, len);
 		dma_unmap_single(&bp->pdev->dev, addr,
 				 bp->rx_buffer_size, DMA_FROM_DEVICE);
 
-		skb->protocol = eth_type_trans(skb, bp->dev);
+		skb->protocol = eth_type_trans(skb, bp->netdev);
 		skb_checksum_none_assert(skb);
-		if (bp->dev->features & NETIF_F_RXCSUM &&
-		    !(bp->dev->flags & IFF_PROMISC) &&
+		if (bp->netdev->features & NETIF_F_RXCSUM &&
+		    !(bp->netdev->flags & IFF_PROMISC) &&
 		    GEM_BFEXT(RX_CSUM, ctrl) & GEM_RX_CSUM_CHECKED_MASK)
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
 
-		bp->dev->stats.rx_packets++;
+		bp->netdev->stats.rx_packets++;
 		queue->stats.rx_packets++;
-		bp->dev->stats.rx_bytes += skb->len;
+		bp->netdev->stats.rx_bytes += skb->len;
 		queue->stats.rx_bytes += skb->len;
 
 		gem_ptp_do_rxstamp(bp, skb, desc);
 
 #if defined(DEBUG) && defined(VERBOSE_DEBUG)
-		netdev_vdbg(bp->dev, "received skb of length %u, csum: %08x\n",
+		netdev_vdbg(bp->netdev, "received skb of length %u, csum: %08x\n",
 			    skb->len, skb->csum);
 		print_hex_dump(KERN_DEBUG, " mac: ", DUMP_PREFIX_ADDRESS, 16, 1,
 			       skb_mac_header(skb), 16, true);
@@ -1684,9 +1684,9 @@ static int macb_rx_frame(struct macb_queue *queue, struct napi_struct *napi,
 	desc = macb_rx_desc(queue, last_frag);
 	len = desc->ctrl & bp->rx_frm_len_mask;
 
-	netdev_vdbg(bp->dev, "macb_rx_frame frags %u - %u (len %u)\n",
-		macb_rx_ring_wrap(bp, first_frag),
-		macb_rx_ring_wrap(bp, last_frag), len);
+	netdev_vdbg(bp->netdev, "macb_rx_frame frags %u - %u (len %u)\n",
+		    macb_rx_ring_wrap(bp, first_frag),
+		    macb_rx_ring_wrap(bp, last_frag), len);
 
 	/* The ethernet header starts NET_IP_ALIGN bytes into the
 	 * first buffer. Since the header is 14 bytes, this makes the
@@ -1696,9 +1696,9 @@ static int macb_rx_frame(struct macb_queue *queue, struct napi_struct *napi,
 	 * the two padding bytes into the skb so that we avoid hitting
 	 * the slowpath in memcpy(), and pull them off afterwards.
 	 */
-	skb = netdev_alloc_skb(bp->dev, len + NET_IP_ALIGN);
+	skb = netdev_alloc_skb(bp->netdev, len + NET_IP_ALIGN);
 	if (!skb) {
-		bp->dev->stats.rx_dropped++;
+		bp->netdev->stats.rx_dropped++;
 		for (frag = first_frag; ; frag++) {
 			desc = macb_rx_desc(queue, frag);
 			desc->addr &= ~MACB_BIT(RX_USED);
@@ -1742,11 +1742,11 @@ static int macb_rx_frame(struct macb_queue *queue, struct napi_struct *napi,
 	wmb();
 
 	__skb_pull(skb, NET_IP_ALIGN);
-	skb->protocol = eth_type_trans(skb, bp->dev);
+	skb->protocol = eth_type_trans(skb, bp->netdev);
 
-	bp->dev->stats.rx_packets++;
-	bp->dev->stats.rx_bytes += skb->len;
-	netdev_vdbg(bp->dev, "received skb of length %u, csum: %08x\n",
+	bp->netdev->stats.rx_packets++;
+	bp->netdev->stats.rx_bytes += skb->len;
+	netdev_vdbg(bp->netdev, "received skb of length %u, csum: %08x\n",
 		    skb->len, skb->csum);
 	napi_gro_receive(napi, skb);
 
@@ -1826,7 +1826,7 @@ static int macb_rx(struct macb_queue *queue, struct napi_struct *napi,
 		unsigned long flags;
 		u32 ctrl;
 
-		netdev_err(bp->dev, "RX queue corruption: reset it\n");
+		netdev_err(bp->netdev, "RX queue corruption: reset it\n");
 
 		spin_lock_irqsave(&bp->lock, flags);
 
@@ -1873,7 +1873,7 @@ static int macb_rx_poll(struct napi_struct *napi, int budget)
 
 	work_done = bp->macbgem_ops.mog_rx(queue, napi, budget);
 
-	netdev_vdbg(bp->dev, "RX poll: queue = %u, work_done = %d, budget = %d\n",
+	netdev_vdbg(bp->netdev, "RX poll: queue = %u, work_done = %d, budget = %d\n",
 		    (unsigned int)(queue - bp->queues), work_done, budget);
 
 	if (work_done < budget && napi_complete_done(napi, work_done)) {
@@ -1892,7 +1892,7 @@ static int macb_rx_poll(struct napi_struct *napi, int budget)
 		if (macb_rx_pending(queue)) {
 			queue_writel(queue, IDR, bp->rx_intr_mask);
 			macb_queue_isr_clear(bp, queue, MACB_BIT(RCOMP));
-			netdev_vdbg(bp->dev, "poll: packets pending, reschedule\n");
+			netdev_vdbg(bp->netdev, "poll: packets pending, reschedule\n");
 			napi_schedule(napi);
 		}
 	}
@@ -1956,11 +1956,11 @@ static int macb_tx_poll(struct napi_struct *napi, int budget)
 	rmb(); // ensure txubr_pending is up to date
 	if (queue->txubr_pending) {
 		queue->txubr_pending = false;
-		netdev_vdbg(bp->dev, "poll: tx restart\n");
+		netdev_vdbg(bp->netdev, "poll: tx restart\n");
 		macb_tx_restart(queue);
 	}
 
-	netdev_vdbg(bp->dev, "TX poll: queue = %u, work_done = %d, budget = %d\n",
+	netdev_vdbg(bp->netdev, "TX poll: queue = %u, work_done = %d, budget = %d\n",
 		    (unsigned int)(queue - bp->queues), work_done, budget);
 
 	if (work_done < budget && napi_complete_done(napi, work_done)) {
@@ -1979,7 +1979,7 @@ static int macb_tx_poll(struct napi_struct *napi, int budget)
 		if (macb_tx_complete_pending(queue)) {
 			queue_writel(queue, IDR, MACB_BIT(TCOMP));
 			macb_queue_isr_clear(bp, queue, MACB_BIT(TCOMP));
-			netdev_vdbg(bp->dev, "TX poll: packets pending, reschedule\n");
+			netdev_vdbg(bp->netdev, "TX poll: packets pending, reschedule\n");
 			napi_schedule(napi);
 		}
 	}
@@ -1990,7 +1990,7 @@ static int macb_tx_poll(struct napi_struct *napi, int budget)
 static void macb_hresp_error_task(struct work_struct *work)
 {
 	struct macb *bp = from_work(bp, work, hresp_err_bh_work);
-	struct net_device *dev = bp->dev;
+	struct net_device *netdev = bp->netdev;
 	struct macb_queue *queue;
 	unsigned int q;
 	u32 ctrl;
@@ -2004,8 +2004,8 @@ static void macb_hresp_error_task(struct work_struct *work)
 	ctrl &= ~(MACB_BIT(RE) | MACB_BIT(TE));
 	macb_writel(bp, NCR, ctrl);
 
-	netif_tx_stop_all_queues(dev);
-	netif_carrier_off(dev);
+	netif_tx_stop_all_queues(netdev);
+	netif_carrier_off(netdev);
 
 	bp->macbgem_ops.mog_init_rings(bp);
 
@@ -2022,8 +2022,8 @@ static void macb_hresp_error_task(struct work_struct *work)
 	ctrl |= MACB_BIT(RE) | MACB_BIT(TE);
 	macb_writel(bp, NCR, ctrl);
 
-	netif_carrier_on(dev);
-	netif_tx_start_all_queues(dev);
+	netif_carrier_on(netdev);
+	netif_tx_start_all_queues(netdev);
 }
 
 static void macb_wol_interrupt(struct macb_queue *queue, u32 status)
@@ -2032,7 +2032,7 @@ static void macb_wol_interrupt(struct macb_queue *queue, u32 status)
 
 	queue_writel(queue, IDR, MACB_BIT(WOL));
 	macb_writel(bp, WOL, 0);
-	netdev_vdbg(bp->dev, "MACB WoL: queue = %u, isr = 0x%08lx\n",
+	netdev_vdbg(bp->netdev, "MACB WoL: queue = %u, isr = 0x%08lx\n",
 		    (unsigned int)(queue - bp->queues),
 		    (unsigned long)status);
 	macb_queue_isr_clear(bp, queue, MACB_BIT(WOL));
@@ -2045,7 +2045,7 @@ static void gem_wol_interrupt(struct macb_queue *queue, u32 status)
 
 	queue_writel(queue, IDR, GEM_BIT(WOL));
 	gem_writel(bp, WOL, 0);
-	netdev_vdbg(bp->dev, "GEM WoL: queue = %u, isr = 0x%08lx\n",
+	netdev_vdbg(bp->netdev, "GEM WoL: queue = %u, isr = 0x%08lx\n",
 		    (unsigned int)(queue - bp->queues),
 		    (unsigned long)status);
 	macb_queue_isr_clear(bp, queue, GEM_BIT(WOL));
@@ -2055,10 +2055,10 @@ static void gem_wol_interrupt(struct macb_queue *queue, u32 status)
 static int macb_interrupt_misc(struct macb_queue *queue, u32 status)
 {
 	struct macb *bp = queue->bp;
-	struct net_device *dev;
+	struct net_device *netdev;
 	u32 ctrl;
 
-	dev = bp->dev;
+	netdev = bp->netdev;
 
 	if (unlikely(status & (MACB_TX_ERR_FLAGS))) {
 		queue_writel(queue, IDR, MACB_TX_INT_FLAGS);
@@ -2099,7 +2099,7 @@ static int macb_interrupt_misc(struct macb_queue *queue, u32 status)
 
 	if (status & MACB_BIT(HRESP)) {
 		queue_work(system_bh_wq, &bp->hresp_err_bh_work);
-		netdev_err(dev, "DMA bus error: HRESP not OK\n");
+		netdev_err(netdev, "DMA bus error: HRESP not OK\n");
 		macb_queue_isr_clear(bp, queue, MACB_BIT(HRESP));
 	}
 
@@ -2118,7 +2118,7 @@ static irqreturn_t macb_interrupt(int irq, void *dev_id)
 {
 	struct macb_queue *queue = dev_id;
 	struct macb *bp = queue->bp;
-	struct net_device *dev = bp->dev;
+	struct net_device *netdev = bp->netdev;
 	u32 status;
 
 	status = queue_readl(queue, ISR);
@@ -2130,13 +2130,13 @@ static irqreturn_t macb_interrupt(int irq, void *dev_id)
 
 	while (status) {
 		/* close possible race with dev_close */
-		if (unlikely(!netif_running(dev))) {
+		if (unlikely(!netif_running(netdev))) {
 			queue_writel(queue, IDR, -1);
 			macb_queue_isr_clear(bp, queue, -1);
 			break;
 		}
 
-		netdev_vdbg(bp->dev, "queue = %u, isr = 0x%08lx\n",
+		netdev_vdbg(netdev, "queue = %u, isr = 0x%08lx\n",
 			    (unsigned int)(queue - bp->queues),
 			    (unsigned long)status);
 
@@ -2181,16 +2181,16 @@ static irqreturn_t macb_interrupt(int irq, void *dev_id)
 /* Polling receive - used by netconsole and other diagnostic tools
  * to allow network i/o with interrupts disabled.
  */
-static void macb_poll_controller(struct net_device *dev)
+static void macb_poll_controller(struct net_device *netdev)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 	struct macb_queue *queue;
 	unsigned long flags;
 	unsigned int q;
 
 	local_irq_save(flags);
 	for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue)
-		macb_interrupt(dev->irq, queue);
+		macb_interrupt(netdev->irq, queue);
 	local_irq_restore(flags);
 }
 #endif
@@ -2277,7 +2277,7 @@ static unsigned int macb_tx_map(struct macb *bp,
 
 	/* Should never happen */
 	if (unlikely(!tx_skb)) {
-		netdev_err(bp->dev, "BUG! empty skb!\n");
+		netdev_err(bp->netdev, "BUG! empty skb!\n");
 		return 0;
 	}
 
@@ -2328,7 +2328,7 @@ static unsigned int macb_tx_map(struct macb *bp,
 		if (i == queue->tx_head) {
 			ctrl |= MACB_BF(TX_LSO, lso_ctrl);
 			ctrl |= MACB_BF(TX_TCP_SEQ_SRC, seq_ctrl);
-			if ((bp->dev->features & NETIF_F_HW_CSUM) &&
+			if ((bp->netdev->features & NETIF_F_HW_CSUM) &&
 			    skb->ip_summed != CHECKSUM_PARTIAL && !lso_ctrl &&
 			    !ptp_one_step_sync(skb))
 				ctrl |= MACB_BIT(TX_NOCRC);
@@ -2352,7 +2352,7 @@ static unsigned int macb_tx_map(struct macb *bp,
 	return 0;
 
 dma_error:
-	netdev_err(bp->dev, "TX DMA map failed\n");
+	netdev_err(bp->netdev, "TX DMA map failed\n");
 
 	for (i = queue->tx_head; i != tx_head; i++) {
 		tx_skb = macb_tx_skb(queue, i);
@@ -2364,7 +2364,7 @@ static unsigned int macb_tx_map(struct macb *bp,
 }
 
 static netdev_features_t macb_features_check(struct sk_buff *skb,
-					     struct net_device *dev,
+					     struct net_device *netdev,
 					     netdev_features_t features)
 {
 	unsigned int nr_frags, f;
@@ -2416,7 +2416,7 @@ static inline int macb_clear_csum(struct sk_buff *skb)
 	return 0;
 }
 
-static int macb_pad_and_fcs(struct sk_buff **skb, struct net_device *ndev)
+static int macb_pad_and_fcs(struct sk_buff **skb, struct net_device *netdev)
 {
 	bool cloned = skb_cloned(*skb) || skb_header_cloned(*skb) ||
 		      skb_is_nonlinear(*skb);
@@ -2425,7 +2425,7 @@ static int macb_pad_and_fcs(struct sk_buff **skb, struct net_device *ndev)
 	struct sk_buff *nskb;
 	u32 fcs;
 
-	if (!(ndev->features & NETIF_F_HW_CSUM) ||
+	if (!(netdev->features & NETIF_F_HW_CSUM) ||
 	    !((*skb)->ip_summed != CHECKSUM_PARTIAL) ||
 	    skb_shinfo(*skb)->gso_size || ptp_one_step_sync(*skb))
 		return 0;
@@ -2467,10 +2467,11 @@ static int macb_pad_and_fcs(struct sk_buff **skb, struct net_device *ndev)
 	return 0;
 }
 
-static netdev_tx_t macb_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t macb_start_xmit(struct sk_buff *skb,
+				   struct net_device *netdev)
 {
 	u16 queue_index = skb_get_queue_mapping(skb);
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 	struct macb_queue *queue = &bp->queues[queue_index];
 	unsigned int desc_cnt, nr_frags, frag_size, f;
 	unsigned int hdrlen;
@@ -2483,7 +2484,7 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		return ret;
 	}
 
-	if (macb_pad_and_fcs(&skb, dev)) {
+	if (macb_pad_and_fcs(&skb, netdev)) {
 		dev_kfree_skb_any(skb);
 		return ret;
 	}
@@ -2502,7 +2503,7 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		else
 			hdrlen = skb_tcp_all_headers(skb);
 		if (skb_headlen(skb) < hdrlen) {
-			netdev_err(bp->dev, "Error - LSO headers fragmented!!!\n");
+			netdev_err(bp->netdev, "Error - LSO headers fragmented!!!\n");
 			/* if this is required, would need to copy to single buffer */
 			return NETDEV_TX_BUSY;
 		}
@@ -2510,7 +2511,7 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		hdrlen = umin(skb_headlen(skb), bp->max_tx_length);
 
 #if defined(DEBUG) && defined(VERBOSE_DEBUG)
-	netdev_vdbg(bp->dev,
+	netdev_vdbg(bp->netdev,
 		    "start_xmit: queue %hu len %u head %p data %p tail %p end %p\n",
 		    queue_index, skb->len, skb->head, skb->data,
 		    skb_tail_pointer(skb), skb_end_pointer(skb));
@@ -2538,8 +2539,8 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	/* This is a hard error, log it. */
 	if (CIRC_SPACE(queue->tx_head, queue->tx_tail,
 		       bp->tx_ring_size) < desc_cnt) {
-		netif_stop_subqueue(dev, queue_index);
-		netdev_dbg(bp->dev, "tx_head = %u, tx_tail = %u\n",
+		netif_stop_subqueue(netdev, queue_index);
+		netdev_dbg(netdev, "tx_head = %u, tx_tail = %u\n",
 			   queue->tx_head, queue->tx_tail);
 		ret = NETDEV_TX_BUSY;
 		goto unlock;
@@ -2554,7 +2555,7 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	/* Make newly initialized descriptor visible to hardware */
 	wmb();
 	skb_tx_timestamp(skb);
-	netdev_tx_sent_queue(netdev_get_tx_queue(bp->dev, queue_index),
+	netdev_tx_sent_queue(netdev_get_tx_queue(bp->netdev, queue_index),
 			     skb->len);
 
 	spin_lock(&bp->lock);
@@ -2563,7 +2564,7 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	spin_unlock(&bp->lock);
 
 	if (CIRC_SPACE(queue->tx_head, queue->tx_tail, bp->tx_ring_size) < 1)
-		netif_stop_subqueue(dev, queue_index);
+		netif_stop_subqueue(netdev, queue_index);
 
 unlock:
 	spin_unlock_irqrestore(&queue->tx_ptr_lock, flags);
@@ -2579,7 +2580,7 @@ static void macb_init_rx_buffer_size(struct macb *bp, size_t size)
 		bp->rx_buffer_size = MIN(size, RX_BUFFER_MAX);
 
 		if (bp->rx_buffer_size % RX_BUFFER_MULTIPLE) {
-			netdev_dbg(bp->dev,
+			netdev_dbg(bp->netdev,
 				   "RX buffer must be multiple of %d bytes, expanding\n",
 				   RX_BUFFER_MULTIPLE);
 			bp->rx_buffer_size =
@@ -2587,8 +2588,8 @@ static void macb_init_rx_buffer_size(struct macb *bp, size_t size)
 		}
 	}
 
-	netdev_dbg(bp->dev, "mtu [%u] rx_buffer_size [%zu]\n",
-		   bp->dev->mtu, bp->rx_buffer_size);
+	netdev_dbg(bp->netdev, "mtu [%u] rx_buffer_size [%zu]\n",
+		   bp->netdev->mtu, bp->rx_buffer_size);
 }
 
 static void gem_free_rx_buffers(struct macb *bp)
@@ -2687,7 +2688,7 @@ static int gem_alloc_rx_buffers(struct macb *bp)
 		if (!queue->rx_skbuff)
 			return -ENOMEM;
 		else
-			netdev_dbg(bp->dev,
+			netdev_dbg(bp->netdev,
 				   "Allocated %d RX struct sk_buff entries at %p\n",
 				   bp->rx_ring_size, queue->rx_skbuff);
 	}
@@ -2705,7 +2706,7 @@ static int macb_alloc_rx_buffers(struct macb *bp)
 	if (!queue->rx_buffers)
 		return -ENOMEM;
 
-	netdev_dbg(bp->dev,
+	netdev_dbg(bp->netdev,
 		   "Allocated RX buffers of %d bytes at %08lx (mapped %p)\n",
 		   size, (unsigned long)queue->rx_buffers_dma, queue->rx_buffers);
 	return 0;
@@ -2731,14 +2732,14 @@ static int macb_alloc_consistent(struct macb *bp)
 	tx = dma_alloc_coherent(dev, size, &tx_dma, GFP_KERNEL);
 	if (!tx || upper_32_bits(tx_dma) != upper_32_bits(tx_dma + size - 1))
 		goto out_err;
-	netdev_dbg(bp->dev, "Allocated %zu bytes for %u TX rings at %08lx (mapped %p)\n",
+	netdev_dbg(bp->netdev, "Allocated %zu bytes for %u TX rings at %08lx (mapped %p)\n",
 		   size, bp->num_queues, (unsigned long)tx_dma, tx);
 
 	size = bp->num_queues * macb_rx_ring_size_per_queue(bp);
 	rx = dma_alloc_coherent(dev, size, &rx_dma, GFP_KERNEL);
 	if (!rx || upper_32_bits(rx_dma) != upper_32_bits(rx_dma + size - 1))
 		goto out_err;
-	netdev_dbg(bp->dev, "Allocated %zu bytes for %u RX rings at %08lx (mapped %p)\n",
+	netdev_dbg(bp->netdev, "Allocated %zu bytes for %u RX rings at %08lx (mapped %p)\n",
 		   size, bp->num_queues, (unsigned long)rx_dma, rx);
 
 	for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
@@ -2966,7 +2967,7 @@ static void macb_configure_dma(struct macb *bp)
 		else
 			dmacfg |= GEM_BIT(ENDIA_DESC); /* CPU in big endian */
 
-		if (bp->dev->features & NETIF_F_HW_CSUM)
+		if (bp->netdev->features & NETIF_F_HW_CSUM)
 			dmacfg |= GEM_BIT(TXCOEN);
 		else
 			dmacfg &= ~GEM_BIT(TXCOEN);
@@ -2976,7 +2977,7 @@ static void macb_configure_dma(struct macb *bp)
 			dmacfg |= GEM_BIT(ADDR64);
 		if (macb_dma_ptp(bp))
 			dmacfg |= GEM_BIT(RXEXT) | GEM_BIT(TXEXT);
-		netdev_dbg(bp->dev, "Cadence configure DMA with 0x%08x\n",
+		netdev_dbg(bp->netdev, "Cadence configure DMA with 0x%08x\n",
 			   dmacfg);
 		gem_writel(bp, DMACFG, dmacfg);
 	}
@@ -3000,11 +3001,11 @@ static void macb_init_hw(struct macb *bp)
 		config |= MACB_BIT(JFRAME);	/* Enable jumbo frames */
 	else
 		config |= MACB_BIT(BIG);	/* Receive oversized frames */
-	if (bp->dev->flags & IFF_PROMISC)
+	if (bp->netdev->flags & IFF_PROMISC)
 		config |= MACB_BIT(CAF);	/* Copy All Frames */
-	else if (macb_is_gem(bp) && bp->dev->features & NETIF_F_RXCSUM)
+	else if (macb_is_gem(bp) && bp->netdev->features & NETIF_F_RXCSUM)
 		config |= GEM_BIT(RXCOEN);
-	if (!(bp->dev->flags & IFF_BROADCAST))
+	if (!(bp->netdev->flags & IFF_BROADCAST))
 		config |= MACB_BIT(NBC);	/* No BroadCast */
 	config |= macb_dbw(bp);
 	macb_writel(bp, NCFGR, config);
@@ -3078,17 +3079,17 @@ static int hash_get_index(__u8 *addr)
 }
 
 /* Add multicast addresses to the internal multicast-hash table. */
-static void macb_sethashtable(struct net_device *dev)
+static void macb_sethashtable(struct net_device *netdev)
 {
 	struct netdev_hw_addr *ha;
 	unsigned long mc_filter[2];
 	unsigned int bitnr;
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 
 	mc_filter[0] = 0;
 	mc_filter[1] = 0;
 
-	netdev_for_each_mc_addr(ha, dev) {
+	netdev_for_each_mc_addr(ha, netdev) {
 		bitnr = hash_get_index(ha->addr);
 		mc_filter[bitnr >> 5] |= 1 << (bitnr & 31);
 	}
@@ -3098,14 +3099,14 @@ static void macb_sethashtable(struct net_device *dev)
 }
 
 /* Enable/Disable promiscuous and multicast modes. */
-static void macb_set_rx_mode(struct net_device *dev)
+static void macb_set_rx_mode(struct net_device *netdev)
 {
 	unsigned long cfg;
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 
 	cfg = macb_readl(bp, NCFGR);
 
-	if (dev->flags & IFF_PROMISC) {
+	if (netdev->flags & IFF_PROMISC) {
 		/* Enable promiscuous mode */
 		cfg |= MACB_BIT(CAF);
 
@@ -3117,20 +3118,20 @@ static void macb_set_rx_mode(struct net_device *dev)
 		cfg &= ~MACB_BIT(CAF);
 
 		/* Enable RX checksum offload only if requested */
-		if (macb_is_gem(bp) && dev->features & NETIF_F_RXCSUM)
+		if (macb_is_gem(bp) && netdev->features & NETIF_F_RXCSUM)
 			cfg |= GEM_BIT(RXCOEN);
 	}
 
-	if (dev->flags & IFF_ALLMULTI) {
+	if (netdev->flags & IFF_ALLMULTI) {
 		/* Enable all multicast mode */
 		macb_or_gem_writel(bp, HRB, -1);
 		macb_or_gem_writel(bp, HRT, -1);
 		cfg |= MACB_BIT(NCFGR_MTI);
-	} else if (!netdev_mc_empty(dev)) {
+	} else if (!netdev_mc_empty(netdev)) {
 		/* Enable specific multicasts */
-		macb_sethashtable(dev);
+		macb_sethashtable(netdev);
 		cfg |= MACB_BIT(NCFGR_MTI);
-	} else if (dev->flags & (~IFF_ALLMULTI)) {
+	} else if (netdev->flags & (~IFF_ALLMULTI)) {
 		/* Disable all multicast mode */
 		macb_or_gem_writel(bp, HRB, 0);
 		macb_or_gem_writel(bp, HRT, 0);
@@ -3140,15 +3141,15 @@ static void macb_set_rx_mode(struct net_device *dev)
 	macb_writel(bp, NCFGR, cfg);
 }
 
-static int macb_open(struct net_device *dev)
+static int macb_open(struct net_device *netdev)
 {
-	size_t bufsz = dev->mtu + ETH_HLEN + ETH_FCS_LEN + NET_IP_ALIGN;
-	struct macb *bp = netdev_priv(dev);
+	size_t bufsz = netdev->mtu + ETH_HLEN + ETH_FCS_LEN + NET_IP_ALIGN;
+	struct macb *bp = netdev_priv(netdev);
 	struct macb_queue *queue;
 	unsigned int q;
 	int err;
 
-	netdev_dbg(bp->dev, "open\n");
+	netdev_dbg(bp->netdev, "open\n");
 
 	err = pm_runtime_resume_and_get(&bp->pdev->dev);
 	if (err < 0)
@@ -3159,7 +3160,7 @@ static int macb_open(struct net_device *dev)
 
 	err = macb_alloc_consistent(bp);
 	if (err) {
-		netdev_err(dev, "Unable to allocate DMA memory (error %d)\n",
+		netdev_err(netdev, "Unable to allocate DMA memory (error %d)\n",
 			   err);
 		goto pm_exit;
 	}
@@ -3186,10 +3187,10 @@ static int macb_open(struct net_device *dev)
 	if (err)
 		goto phy_off;
 
-	netif_tx_start_all_queues(dev);
+	netif_tx_start_all_queues(netdev);
 
 	if (bp->ptp_info)
-		bp->ptp_info->ptp_init(dev);
+		bp->ptp_info->ptp_init(netdev);
 
 	return 0;
 
@@ -3208,19 +3209,19 @@ static int macb_open(struct net_device *dev)
 	return err;
 }
 
-static int macb_close(struct net_device *dev)
+static int macb_close(struct net_device *netdev)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 	struct macb_queue *queue;
 	unsigned long flags;
 	unsigned int q;
 
-	netif_tx_stop_all_queues(dev);
+	netif_tx_stop_all_queues(netdev);
 
 	for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
 		napi_disable(&queue->napi_rx);
 		napi_disable(&queue->napi_tx);
-		netdev_tx_reset_queue(netdev_get_tx_queue(dev, q));
+		netdev_tx_reset_queue(netdev_get_tx_queue(netdev, q));
 	}
 
 	cancel_delayed_work_sync(&bp->tx_lpi_work);
@@ -3232,38 +3233,38 @@ static int macb_close(struct net_device *dev)
 
 	spin_lock_irqsave(&bp->lock, flags);
 	macb_reset_hw(bp);
-	netif_carrier_off(dev);
+	netif_carrier_off(netdev);
 	spin_unlock_irqrestore(&bp->lock, flags);
 
 	macb_free_consistent(bp);
 
 	if (bp->ptp_info)
-		bp->ptp_info->ptp_remove(dev);
+		bp->ptp_info->ptp_remove(netdev);
 
 	pm_runtime_put(&bp->pdev->dev);
 
 	return 0;
 }
 
-static int macb_change_mtu(struct net_device *dev, int new_mtu)
+static int macb_change_mtu(struct net_device *netdev, int new_mtu)
 {
-	if (netif_running(dev))
+	if (netif_running(netdev))
 		return -EBUSY;
 
-	WRITE_ONCE(dev->mtu, new_mtu);
+	WRITE_ONCE(netdev->mtu, new_mtu);
 
 	return 0;
 }
 
-static int macb_set_mac_addr(struct net_device *dev, void *addr)
+static int macb_set_mac_addr(struct net_device *netdev, void *addr)
 {
 	int err;
 
-	err = eth_mac_addr(dev, addr);
+	err = eth_mac_addr(netdev, addr);
 	if (err < 0)
 		return err;
 
-	macb_set_hwaddr(netdev_priv(dev));
+	macb_set_hwaddr(netdev_priv(netdev));
 	return 0;
 }
 
@@ -3301,7 +3302,7 @@ static void gem_get_stats(struct macb *bp, struct rtnl_link_stats64 *nstat)
 	struct gem_stats *hwstat = &bp->hw_stats.gem;
 
 	spin_lock_irq(&bp->stats_lock);
-	if (netif_running(bp->dev))
+	if (netif_running(bp->netdev))
 		gem_update_stats(bp);
 
 	nstat->rx_errors = (hwstat->rx_frame_check_sequence_errors +
@@ -3334,10 +3335,10 @@ static void gem_get_stats(struct macb *bp, struct rtnl_link_stats64 *nstat)
 	spin_unlock_irq(&bp->stats_lock);
 }
 
-static void gem_get_ethtool_stats(struct net_device *dev,
+static void gem_get_ethtool_stats(struct net_device *netdev,
 				  struct ethtool_stats *stats, u64 *data)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 
 	spin_lock_irq(&bp->stats_lock);
 	gem_update_stats(bp);
@@ -3346,9 +3347,9 @@ static void gem_get_ethtool_stats(struct net_device *dev,
 	spin_unlock_irq(&bp->stats_lock);
 }
 
-static int gem_get_sset_count(struct net_device *dev, int sset)
+static int gem_get_sset_count(struct net_device *netdev, int sset)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 
 	switch (sset) {
 	case ETH_SS_STATS:
@@ -3358,9 +3359,9 @@ static int gem_get_sset_count(struct net_device *dev, int sset)
 	}
 }
 
-static void gem_get_ethtool_strings(struct net_device *dev, u32 sset, u8 *p)
+static void gem_get_ethtool_strings(struct net_device *netdev, u32 sset, u8 *p)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 	struct macb_queue *queue;
 	unsigned int i;
 	unsigned int q;
@@ -3379,13 +3380,13 @@ static void gem_get_ethtool_strings(struct net_device *dev, u32 sset, u8 *p)
 	}
 }
 
-static void macb_get_stats(struct net_device *dev,
+static void macb_get_stats(struct net_device *netdev,
 			   struct rtnl_link_stats64 *nstat)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 	struct macb_stats *hwstat = &bp->hw_stats.macb;
 
-	netdev_stats_to_stats64(nstat, &bp->dev->stats);
+	netdev_stats_to_stats64(nstat, &bp->netdev->stats);
 	if (macb_is_gem(bp)) {
 		gem_get_stats(bp, nstat);
 		return;
@@ -3429,10 +3430,10 @@ static void macb_get_stats(struct net_device *dev,
 	spin_unlock_irq(&bp->stats_lock);
 }
 
-static void macb_get_pause_stats(struct net_device *dev,
+static void macb_get_pause_stats(struct net_device *netdev,
 				 struct ethtool_pause_stats *pause_stats)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 	struct macb_stats *hwstat = &bp->hw_stats.macb;
 
 	spin_lock_irq(&bp->stats_lock);
@@ -3442,10 +3443,10 @@ static void macb_get_pause_stats(struct net_device *dev,
 	spin_unlock_irq(&bp->stats_lock);
 }
 
-static void gem_get_pause_stats(struct net_device *dev,
+static void gem_get_pause_stats(struct net_device *netdev,
 				struct ethtool_pause_stats *pause_stats)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 	struct gem_stats *hwstat = &bp->hw_stats.gem;
 
 	spin_lock_irq(&bp->stats_lock);
@@ -3455,10 +3456,10 @@ static void gem_get_pause_stats(struct net_device *dev,
 	spin_unlock_irq(&bp->stats_lock);
 }
 
-static void macb_get_eth_mac_stats(struct net_device *dev,
+static void macb_get_eth_mac_stats(struct net_device *netdev,
 				   struct ethtool_eth_mac_stats *mac_stats)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 	struct macb_stats *hwstat = &bp->hw_stats.macb;
 
 	spin_lock_irq(&bp->stats_lock);
@@ -3480,10 +3481,10 @@ static void macb_get_eth_mac_stats(struct net_device *dev,
 	spin_unlock_irq(&bp->stats_lock);
 }
 
-static void gem_get_eth_mac_stats(struct net_device *dev,
+static void gem_get_eth_mac_stats(struct net_device *netdev,
 				  struct ethtool_eth_mac_stats *mac_stats)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 	struct gem_stats *hwstat = &bp->hw_stats.gem;
 
 	spin_lock_irq(&bp->stats_lock);
@@ -3513,10 +3514,10 @@ static void gem_get_eth_mac_stats(struct net_device *dev,
 }
 
 /* TODO: Report SQE test errors when added to phy_stats */
-static void macb_get_eth_phy_stats(struct net_device *dev,
+static void macb_get_eth_phy_stats(struct net_device *netdev,
 				   struct ethtool_eth_phy_stats *phy_stats)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 	struct macb_stats *hwstat = &bp->hw_stats.macb;
 
 	spin_lock_irq(&bp->stats_lock);
@@ -3525,10 +3526,10 @@ static void macb_get_eth_phy_stats(struct net_device *dev,
 	spin_unlock_irq(&bp->stats_lock);
 }
 
-static void gem_get_eth_phy_stats(struct net_device *dev,
+static void gem_get_eth_phy_stats(struct net_device *netdev,
 				  struct ethtool_eth_phy_stats *phy_stats)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 	struct gem_stats *hwstat = &bp->hw_stats.gem;
 
 	spin_lock_irq(&bp->stats_lock);
@@ -3537,11 +3538,11 @@ static void gem_get_eth_phy_stats(struct net_device *dev,
 	spin_unlock_irq(&bp->stats_lock);
 }
 
-static void macb_get_rmon_stats(struct net_device *dev,
+static void macb_get_rmon_stats(struct net_device *netdev,
 				struct ethtool_rmon_stats *rmon_stats,
 				const struct ethtool_rmon_hist_range **ranges)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 	struct macb_stats *hwstat = &bp->hw_stats.macb;
 
 	spin_lock_irq(&bp->stats_lock);
@@ -3563,11 +3564,11 @@ static const struct ethtool_rmon_hist_range gem_rmon_ranges[] = {
 	{ },
 };
 
-static void gem_get_rmon_stats(struct net_device *dev,
+static void gem_get_rmon_stats(struct net_device *netdev,
 			       struct ethtool_rmon_stats *rmon_stats,
 			       const struct ethtool_rmon_hist_range **ranges)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 	struct gem_stats *hwstat = &bp->hw_stats.gem;
 
 	spin_lock_irq(&bp->stats_lock);
@@ -3598,10 +3599,10 @@ static int macb_get_regs_len(struct net_device *netdev)
 	return MACB_GREGS_NBR * sizeof(u32);
 }
 
-static void macb_get_regs(struct net_device *dev, struct ethtool_regs *regs,
+static void macb_get_regs(struct net_device *netdev, struct ethtool_regs *regs,
 			  void *p)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 	unsigned int tail, head;
 	u32 *regs_buff = p;
 
@@ -3718,16 +3719,16 @@ static int macb_set_ringparam(struct net_device *netdev,
 		return 0;
 	}
 
-	if (netif_running(bp->dev)) {
+	if (netif_running(bp->netdev)) {
 		reset = 1;
-		macb_close(bp->dev);
+		macb_close(bp->netdev);
 	}
 
 	bp->rx_ring_size = new_rx_size;
 	bp->tx_ring_size = new_tx_size;
 
 	if (reset)
-		macb_open(bp->dev);
+		macb_open(bp->netdev);
 
 	return 0;
 }
@@ -3754,13 +3755,13 @@ static s32 gem_get_ptp_max_adj(void)
 	return 64000000;
 }
 
-static int gem_get_ts_info(struct net_device *dev,
+static int gem_get_ts_info(struct net_device *netdev,
 			   struct kernel_ethtool_ts_info *info)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 
 	if (!macb_dma_ptp(bp)) {
-		ethtool_op_get_ts_info(dev, info);
+		ethtool_op_get_ts_info(netdev, info);
 		return 0;
 	}
 
@@ -3807,7 +3808,7 @@ static int macb_get_ts_info(struct net_device *netdev,
 
 static void gem_enable_flow_filters(struct macb *bp, bool enable)
 {
-	struct net_device *netdev = bp->dev;
+	struct net_device *netdev = bp->netdev;
 	struct ethtool_rx_fs_item *item;
 	u32 t2_scr;
 	int num_t2_scr;
@@ -4137,16 +4138,16 @@ static const struct ethtool_ops macb_ethtool_ops = {
 	.set_ringparam		= macb_set_ringparam,
 };
 
-static int macb_get_eee(struct net_device *dev, struct ethtool_keee *eee)
+static int macb_get_eee(struct net_device *netdev, struct ethtool_keee *eee)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 
 	return phylink_ethtool_get_eee(bp->phylink, eee);
 }
 
-static int macb_set_eee(struct net_device *dev, struct ethtool_keee *eee)
+static int macb_set_eee(struct net_device *netdev, struct ethtool_keee *eee)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 
 	return phylink_ethtool_set_eee(bp->phylink, eee);
 }
@@ -4177,43 +4178,43 @@ static const struct ethtool_ops gem_ethtool_ops = {
 	.set_eee		= macb_set_eee,
 };
 
-static int macb_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
+static int macb_ioctl(struct net_device *netdev, struct ifreq *rq, int cmd)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 
-	if (!netif_running(dev))
+	if (!netif_running(netdev))
 		return -EINVAL;
 
 	return phylink_mii_ioctl(bp->phylink, rq, cmd);
 }
 
-static int macb_hwtstamp_get(struct net_device *dev,
+static int macb_hwtstamp_get(struct net_device *netdev,
 			     struct kernel_hwtstamp_config *cfg)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 
-	if (!netif_running(dev))
+	if (!netif_running(netdev))
 		return -EINVAL;
 
 	if (!bp->ptp_info)
 		return -EOPNOTSUPP;
 
-	return bp->ptp_info->get_hwtst(dev, cfg);
+	return bp->ptp_info->get_hwtst(netdev, cfg);
 }
 
-static int macb_hwtstamp_set(struct net_device *dev,
+static int macb_hwtstamp_set(struct net_device *netdev,
 			     struct kernel_hwtstamp_config *cfg,
 			     struct netlink_ext_ack *extack)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 
-	if (!netif_running(dev))
+	if (!netif_running(netdev))
 		return -EINVAL;
 
 	if (!bp->ptp_info)
 		return -EOPNOTSUPP;
 
-	return bp->ptp_info->set_hwtst(dev, cfg, extack);
+	return bp->ptp_info->set_hwtst(netdev, cfg, extack);
 }
 
 static inline void macb_set_txcsum_feature(struct macb *bp,
@@ -4236,7 +4237,7 @@ static inline void macb_set_txcsum_feature(struct macb *bp,
 static inline void macb_set_rxcsum_feature(struct macb *bp,
 					   netdev_features_t features)
 {
-	struct net_device *netdev = bp->dev;
+	struct net_device *netdev = bp->netdev;
 	u32 val;
 
 	if (!macb_is_gem(bp))
@@ -4283,7 +4284,7 @@ static int macb_set_features(struct net_device *netdev,
 
 static void macb_restore_features(struct macb *bp)
 {
-	struct net_device *netdev = bp->dev;
+	struct net_device *netdev = bp->netdev;
 	netdev_features_t features = netdev->features;
 	struct ethtool_rx_fs_item *item;
 
@@ -4300,14 +4301,14 @@ static void macb_restore_features(struct macb *bp)
 	macb_set_rxflow_feature(bp, features);
 }
 
-static int macb_taprio_setup_replace(struct net_device *ndev,
+static int macb_taprio_setup_replace(struct net_device *netdev,
 				     struct tc_taprio_qopt_offload *conf)
 {
 	u64 total_on_time = 0, start_time_sec = 0, start_time = conf->base_time;
 	u32 configured_queues = 0, speed = 0, start_time_nsec;
 	struct macb_queue_enst_config *enst_queue;
 	struct tc_taprio_sched_entry *entry;
-	struct macb *bp = netdev_priv(ndev);
+	struct macb *bp = netdev_priv(netdev);
 	struct ethtool_link_ksettings kset;
 	struct macb_queue *queue;
 	u32 queue_mask;
@@ -4316,13 +4317,13 @@ static int macb_taprio_setup_replace(struct net_device *ndev,
 	int err;
 
 	if (conf->num_entries > bp->num_queues) {
-		netdev_err(ndev, "Too many TAPRIO entries: %zu > %d queues\n",
+		netdev_err(netdev, "Too many TAPRIO entries: %zu > %d queues\n",
 			   conf->num_entries, bp->num_queues);
 		return -EINVAL;
 	}
 
 	if (conf->base_time < 0) {
-		netdev_err(ndev, "Invalid base_time: must be 0 or positive, got %lld\n",
+		netdev_err(netdev, "Invalid base_time: must be 0 or positive, got %lld\n",
 			   conf->base_time);
 		return -ERANGE;
 	}
@@ -4330,13 +4331,13 @@ static int macb_taprio_setup_replace(struct net_device *ndev,
 	/* Get the current link speed */
 	err = phylink_ethtool_ksettings_get(bp->phylink, &kset);
 	if (unlikely(err)) {
-		netdev_err(ndev, "Failed to get link settings: %d\n", err);
+		netdev_err(netdev, "Failed to get link settings: %d\n", err);
 		return err;
 	}
 
 	speed = kset.base.speed;
 	if (unlikely(speed <= 0)) {
-		netdev_err(ndev, "Invalid speed: %d\n", speed);
+		netdev_err(netdev, "Invalid speed: %d\n", speed);
 		return -EINVAL;
 	}
 
@@ -4349,7 +4350,7 @@ static int macb_taprio_setup_replace(struct net_device *ndev,
 		entry = &conf->entries[i];
 
 		if (entry->command != TC_TAPRIO_CMD_SET_GATES) {
-			netdev_err(ndev, "Entry %zu: unsupported command %d\n",
+			netdev_err(netdev, "Entry %zu: unsupported command %d\n",
 				   i, entry->command);
 			err = -EOPNOTSUPP;
 			goto cleanup;
@@ -4357,7 +4358,7 @@ static int macb_taprio_setup_replace(struct net_device *ndev,
 
 		/* Validate gate_mask: must be nonzero, single queue, and within range */
 		if (!is_power_of_2(entry->gate_mask)) {
-			netdev_err(ndev, "Entry %zu: gate_mask 0x%x is not a power of 2 (only one queue per entry allowed)\n",
+			netdev_err(netdev, "Entry %zu: gate_mask 0x%x is not a power of 2 (only one queue per entry allowed)\n",
 				   i, entry->gate_mask);
 			err = -EINVAL;
 			goto cleanup;
@@ -4366,7 +4367,7 @@ static int macb_taprio_setup_replace(struct net_device *ndev,
 		/* gate_mask must not select queues outside the valid queues */
 		queue_id = order_base_2(entry->gate_mask);
 		if (queue_id >= bp->num_queues) {
-			netdev_err(ndev, "Entry %zu: gate_mask 0x%x exceeds queue range (max_queues=%d)\n",
+			netdev_err(netdev, "Entry %zu: gate_mask 0x%x exceeds queue range (max_queues=%d)\n",
 				   i, entry->gate_mask, bp->num_queues);
 			err = -EINVAL;
 			goto cleanup;
@@ -4376,7 +4377,7 @@ static int macb_taprio_setup_replace(struct net_device *ndev,
 		start_time_sec = start_time;
 		start_time_nsec = do_div(start_time_sec, NSEC_PER_SEC);
 		if (start_time_sec > GENMASK(GEM_START_TIME_SEC_SIZE - 1, 0)) {
-			netdev_err(ndev, "Entry %zu: Start time %llu s exceeds hardware limit\n",
+			netdev_err(netdev, "Entry %zu: Start time %llu s exceeds hardware limit\n",
 				   i, start_time_sec);
 			err = -ERANGE;
 			goto cleanup;
@@ -4384,7 +4385,7 @@ static int macb_taprio_setup_replace(struct net_device *ndev,
 
 		/* Check for on time limit */
 		if (entry->interval > enst_max_hw_interval(speed)) {
-			netdev_err(ndev, "Entry %zu: interval %u ns exceeds hardware limit %llu ns\n",
+			netdev_err(netdev, "Entry %zu: interval %u ns exceeds hardware limit %llu ns\n",
 				   i, entry->interval, enst_max_hw_interval(speed));
 			err = -ERANGE;
 			goto cleanup;
@@ -4392,7 +4393,7 @@ static int macb_taprio_setup_replace(struct net_device *ndev,
 
 		/* Check for off time limit*/
 		if ((conf->cycle_time - entry->interval) > enst_max_hw_interval(speed)) {
-			netdev_err(ndev, "Entry %zu: off_time %llu ns exceeds hardware limit %llu ns\n",
+			netdev_err(netdev, "Entry %zu: off_time %llu ns exceeds hardware limit %llu ns\n",
 				   i, conf->cycle_time - entry->interval,
 				   enst_max_hw_interval(speed));
 			err = -ERANGE;
@@ -4415,13 +4416,13 @@ static int macb_taprio_setup_replace(struct net_device *ndev,
 
 	/* Check total interval doesn't exceed cycle time */
 	if (total_on_time > conf->cycle_time) {
-		netdev_err(ndev, "Total ON %llu ns exceeds cycle time %llu ns\n",
+		netdev_err(netdev, "Total ON %llu ns exceeds cycle time %llu ns\n",
 			   total_on_time, conf->cycle_time);
 		err = -EINVAL;
 		goto cleanup;
 	}
 
-	netdev_dbg(ndev, "TAPRIO setup: %zu entries, base_time=%lld ns, cycle_time=%llu ns\n",
+	netdev_dbg(netdev, "TAPRIO setup: %zu entries, base_time=%lld ns, cycle_time=%llu ns\n",
 		   conf->num_entries, conf->base_time, conf->cycle_time);
 
 	/* All validations passed - proceed with hardware configuration */
@@ -4446,7 +4447,7 @@ static int macb_taprio_setup_replace(struct net_device *ndev,
 		gem_writel(bp, ENST_CONTROL, configured_queues);
 	}
 
-	netdev_info(ndev, "TAPRIO configuration completed successfully: %zu entries, %d queues configured\n",
+	netdev_info(netdev, "TAPRIO configuration completed successfully: %zu entries, %d queues configured\n",
 		    conf->num_entries, hweight32(configured_queues));
 
 cleanup:
@@ -4454,14 +4455,14 @@ static int macb_taprio_setup_replace(struct net_device *ndev,
 	return err;
 }
 
-static void macb_taprio_destroy(struct net_device *ndev)
+static void macb_taprio_destroy(struct net_device *netdev)
 {
-	struct macb *bp = netdev_priv(ndev);
+	struct macb *bp = netdev_priv(netdev);
 	struct macb_queue *queue;
 	u32 queue_mask;
 	unsigned int q;
 
-	netdev_reset_tc(ndev);
+	netdev_reset_tc(netdev);
 	queue_mask = BIT_U32(bp->num_queues) - 1;
 
 	scoped_guard(spinlock_irqsave, &bp->lock) {
@@ -4476,30 +4477,30 @@ static void macb_taprio_destroy(struct net_device *ndev)
 			queue_writel(queue, ENST_OFF_TIME, 0);
 		}
 	}
-	netdev_info(ndev, "TAPRIO destroy: All gates disabled\n");
+	netdev_info(netdev, "TAPRIO destroy: All gates disabled\n");
 }
 
-static int macb_setup_taprio(struct net_device *ndev,
+static int macb_setup_taprio(struct net_device *netdev,
 			     struct tc_taprio_qopt_offload *taprio)
 {
-	struct macb *bp = netdev_priv(ndev);
+	struct macb *bp = netdev_priv(netdev);
 	int err = 0;
 
-	if (unlikely(!(ndev->hw_features & NETIF_F_HW_TC)))
+	if (unlikely(!(netdev->hw_features & NETIF_F_HW_TC)))
 		return -EOPNOTSUPP;
 
 	/* Check if Device is in runtime suspend */
 	if (unlikely(pm_runtime_suspended(&bp->pdev->dev))) {
-		netdev_err(ndev, "Device is in runtime suspend\n");
+		netdev_err(netdev, "Device is in runtime suspend\n");
 		return -EOPNOTSUPP;
 	}
 
 	switch (taprio->cmd) {
 	case TAPRIO_CMD_REPLACE:
-		err = macb_taprio_setup_replace(ndev, taprio);
+		err = macb_taprio_setup_replace(netdev, taprio);
 		break;
 	case TAPRIO_CMD_DESTROY:
-		macb_taprio_destroy(ndev);
+		macb_taprio_destroy(netdev);
 		break;
 	default:
 		err = -EOPNOTSUPP;
@@ -4508,15 +4509,15 @@ static int macb_setup_taprio(struct net_device *ndev,
 	return err;
 }
 
-static int macb_setup_tc(struct net_device *dev, enum tc_setup_type type,
+static int macb_setup_tc(struct net_device *netdev, enum tc_setup_type type,
 			 void *type_data)
 {
-	if (!dev || !type_data)
+	if (!netdev || !type_data)
 		return -EINVAL;
 
 	switch (type) {
 	case TC_SETUP_QDISC_TAPRIO:
-		return macb_setup_taprio(dev, type_data);
+		return macb_setup_taprio(netdev, type_data);
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -4724,9 +4725,9 @@ static int macb_clk_init(struct platform_device *pdev, struct clk **pclk,
 
 static int macb_init_dflt(struct platform_device *pdev)
 {
-	struct net_device *dev = platform_get_drvdata(pdev);
+	struct net_device *netdev = platform_get_drvdata(pdev);
 	unsigned int hw_q, q;
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 	struct macb_queue *queue;
 	int err;
 	u32 val, reg;
@@ -4742,8 +4743,8 @@ static int macb_init_dflt(struct platform_device *pdev)
 		queue = &bp->queues[q];
 		queue->bp = bp;
 		spin_lock_init(&queue->tx_ptr_lock);
-		netif_napi_add(dev, &queue->napi_rx, macb_rx_poll);
-		netif_napi_add_tx(dev, &queue->napi_tx, macb_tx_poll);
+		netif_napi_add(netdev, &queue->napi_rx, macb_rx_poll);
+		netif_napi_add_tx(netdev, &queue->napi_tx, macb_tx_poll);
 		if (hw_q) {
 			queue->ISR  = GEM_ISR(hw_q - 1);
 			queue->IER  = GEM_IER(hw_q - 1);
@@ -4773,7 +4774,7 @@ static int macb_init_dflt(struct platform_device *pdev)
 		 */
 		queue->irq = platform_get_irq(pdev, q);
 		err = devm_request_irq(&pdev->dev, queue->irq, macb_interrupt,
-				       IRQF_SHARED, dev->name, queue);
+				       IRQF_SHARED, netdev->name, queue);
 		if (err) {
 			dev_err(&pdev->dev,
 				"Unable to request IRQ %d (error %d)\n",
@@ -4785,7 +4786,7 @@ static int macb_init_dflt(struct platform_device *pdev)
 		q++;
 	}
 
-	dev->netdev_ops = &macb_netdev_ops;
+	netdev->netdev_ops = &macb_netdev_ops;
 
 	/* setup appropriated routines according to adapter type */
 	if (macb_is_gem(bp)) {
@@ -4793,39 +4794,39 @@ static int macb_init_dflt(struct platform_device *pdev)
 		bp->macbgem_ops.mog_free_rx_buffers = gem_free_rx_buffers;
 		bp->macbgem_ops.mog_init_rings = gem_init_rings;
 		bp->macbgem_ops.mog_rx = gem_rx;
-		dev->ethtool_ops = &gem_ethtool_ops;
+		netdev->ethtool_ops = &gem_ethtool_ops;
 	} else {
 		bp->macbgem_ops.mog_alloc_rx_buffers = macb_alloc_rx_buffers;
 		bp->macbgem_ops.mog_free_rx_buffers = macb_free_rx_buffers;
 		bp->macbgem_ops.mog_init_rings = macb_init_rings;
 		bp->macbgem_ops.mog_rx = macb_rx;
-		dev->ethtool_ops = &macb_ethtool_ops;
+		netdev->ethtool_ops = &macb_ethtool_ops;
 	}
 
-	netdev_sw_irq_coalesce_default_on(dev);
+	netdev_sw_irq_coalesce_default_on(netdev);
 
-	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+	netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
 
 	/* Set features */
-	dev->hw_features = NETIF_F_SG;
+	netdev->hw_features = NETIF_F_SG;
 
 	/* Check LSO capability; runtime detection can be overridden by a cap
 	 * flag if the hardware is known to be buggy
 	 */
 	if (!(bp->caps & MACB_CAPS_NO_LSO) &&
 	    GEM_BFEXT(PBUF_LSO, gem_readl(bp, DCFG6)))
-		dev->hw_features |= MACB_NETIF_LSO;
+		netdev->hw_features |= MACB_NETIF_LSO;
 
 	/* Checksum offload is only available on gem with packet buffer */
 	if (macb_is_gem(bp) && !(bp->caps & MACB_CAPS_FIFO_MODE))
-		dev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_RXCSUM;
+		netdev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_RXCSUM;
 	if (bp->caps & MACB_CAPS_SG_DISABLED)
-		dev->hw_features &= ~NETIF_F_SG;
+		netdev->hw_features &= ~NETIF_F_SG;
 	/* Enable HW_TC if hardware supports QBV */
 	if (bp->caps & MACB_CAPS_QBV)
-		dev->hw_features |= NETIF_F_HW_TC;
+		netdev->hw_features |= NETIF_F_HW_TC;
 
-	dev->features = dev->hw_features;
+	netdev->features = netdev->hw_features;
 
 	/* Check RX Flow Filters support.
 	 * Max Rx flows set by availability of screeners & compare regs:
@@ -4843,7 +4844,7 @@ static int macb_init_dflt(struct platform_device *pdev)
 			reg = GEM_BFINS(ETHTCMP, (uint16_t)ETH_P_IP, reg);
 			gem_writel_n(bp, ETHT, SCRT2_ETHT, reg);
 			/* Filtering is supported in hw but don't enable it in kernel now */
-			dev->hw_features |= NETIF_F_NTUPLE;
+			netdev->hw_features |= NETIF_F_NTUPLE;
 			/* init Rx flow definitions */
 			bp->rx_fs_list.count = 0;
 			spin_lock_init(&bp->rx_fs_lock);
@@ -5053,9 +5054,9 @@ static void at91ether_stop(struct macb *lp)
 }
 
 /* Open the ethernet interface */
-static int at91ether_open(struct net_device *dev)
+static int at91ether_open(struct net_device *netdev)
 {
-	struct macb *lp = netdev_priv(dev);
+	struct macb *lp = netdev_priv(netdev);
 	u32 ctl;
 	int ret;
 
@@ -5077,7 +5078,7 @@ static int at91ether_open(struct net_device *dev)
 	if (ret)
 		goto stop;
 
-	netif_start_queue(dev);
+	netif_start_queue(netdev);
 
 	return 0;
 
@@ -5089,11 +5090,11 @@ static int at91ether_open(struct net_device *dev)
 }
 
 /* Close the interface */
-static int at91ether_close(struct net_device *dev)
+static int at91ether_close(struct net_device *netdev)
 {
-	struct macb *lp = netdev_priv(dev);
+	struct macb *lp = netdev_priv(netdev);
 
-	netif_stop_queue(dev);
+	netif_stop_queue(netdev);
 
 	phylink_stop(lp->phylink);
 	phylink_disconnect_phy(lp->phylink);
@@ -5107,14 +5108,14 @@ static int at91ether_close(struct net_device *dev)
 
 /* Transmit packet */
 static netdev_tx_t at91ether_start_xmit(struct sk_buff *skb,
-					struct net_device *dev)
+					struct net_device *netdev)
 {
-	struct macb *lp = netdev_priv(dev);
+	struct macb *lp = netdev_priv(netdev);
 
 	if (macb_readl(lp, TSR) & MACB_BIT(RM9200_BNQ)) {
 		int desc = 0;
 
-		netif_stop_queue(dev);
+		netif_stop_queue(netdev);
 
 		/* Store packet information (to free when Tx completed) */
 		lp->rm9200_txq[desc].skb = skb;
@@ -5123,8 +5124,8 @@ static netdev_tx_t at91ether_start_xmit(struct sk_buff *skb,
 							      skb->len, DMA_TO_DEVICE);
 		if (dma_mapping_error(&lp->pdev->dev, lp->rm9200_txq[desc].mapping)) {
 			dev_kfree_skb_any(skb);
-			dev->stats.tx_dropped++;
-			netdev_err(dev, "%s: DMA mapping error\n", __func__);
+			netdev->stats.tx_dropped++;
+			netdev_err(netdev, "%s: DMA mapping error\n", __func__);
 			return NETDEV_TX_OK;
 		}
 
@@ -5134,7 +5135,8 @@ static netdev_tx_t at91ether_start_xmit(struct sk_buff *skb,
 		macb_writel(lp, TCR, skb->len);
 
 	} else {
-		netdev_err(dev, "%s called, but device is busy!\n", __func__);
+		netdev_err(netdev, "%s called, but device is busy!\n",
+			   __func__);
 		return NETDEV_TX_BUSY;
 	}
 
@@ -5144,9 +5146,9 @@ static netdev_tx_t at91ether_start_xmit(struct sk_buff *skb,
 /* Extract received frame from buffer descriptors and sent to upper layers.
  * (Called from interrupt context)
  */
-static void at91ether_rx(struct net_device *dev)
+static void at91ether_rx(struct net_device *netdev)
 {
-	struct macb *lp = netdev_priv(dev);
+	struct macb *lp = netdev_priv(netdev);
 	struct macb_queue *q = &lp->queues[0];
 	struct macb_dma_desc *desc;
 	unsigned char *p_recv;
@@ -5157,21 +5159,21 @@ static void at91ether_rx(struct net_device *dev)
 	while (desc->addr & MACB_BIT(RX_USED)) {
 		p_recv = q->rx_buffers + q->rx_tail * AT91ETHER_MAX_RBUFF_SZ;
 		pktlen = MACB_BF(RX_FRMLEN, desc->ctrl);
-		skb = netdev_alloc_skb(dev, pktlen + 2);
+		skb = netdev_alloc_skb(netdev, pktlen + 2);
 		if (skb) {
 			skb_reserve(skb, 2);
 			skb_put_data(skb, p_recv, pktlen);
 
-			skb->protocol = eth_type_trans(skb, dev);
-			dev->stats.rx_packets++;
-			dev->stats.rx_bytes += pktlen;
+			skb->protocol = eth_type_trans(skb, netdev);
+			netdev->stats.rx_packets++;
+			netdev->stats.rx_bytes += pktlen;
 			netif_rx(skb);
 		} else {
-			dev->stats.rx_dropped++;
+			netdev->stats.rx_dropped++;
 		}
 
 		if (desc->ctrl & MACB_BIT(RX_MHASH_MATCH))
-			dev->stats.multicast++;
+			netdev->stats.multicast++;
 
 		/* reset ownership bit */
 		desc->addr &= ~MACB_BIT(RX_USED);
@@ -5189,8 +5191,8 @@ static void at91ether_rx(struct net_device *dev)
 /* MAC interrupt handler */
 static irqreturn_t at91ether_interrupt(int irq, void *dev_id)
 {
-	struct net_device *dev = dev_id;
-	struct macb *lp = netdev_priv(dev);
+	struct net_device *netdev = dev_id;
+	struct macb *lp = netdev_priv(netdev);
 	u32 intstatus, ctl;
 	unsigned int desc;
 
@@ -5201,13 +5203,13 @@ static irqreturn_t at91ether_interrupt(int irq, void *dev_id)
 
 	/* Receive complete */
 	if (intstatus & MACB_BIT(RCOMP))
-		at91ether_rx(dev);
+		at91ether_rx(netdev);
 
 	/* Transmit complete */
 	if (intstatus & MACB_BIT(TCOMP)) {
 		/* The TCOM bit is set even if the transmission failed */
 		if (intstatus & (MACB_BIT(ISR_TUND) | MACB_BIT(ISR_RLE)))
-			dev->stats.tx_errors++;
+			netdev->stats.tx_errors++;
 
 		desc = 0;
 		if (lp->rm9200_txq[desc].skb) {
@@ -5215,10 +5217,10 @@ static irqreturn_t at91ether_interrupt(int irq, void *dev_id)
 			lp->rm9200_txq[desc].skb = NULL;
 			dma_unmap_single(&lp->pdev->dev, lp->rm9200_txq[desc].mapping,
 					 lp->rm9200_txq[desc].size, DMA_TO_DEVICE);
-			dev->stats.tx_packets++;
-			dev->stats.tx_bytes += lp->rm9200_txq[desc].size;
+			netdev->stats.tx_packets++;
+			netdev->stats.tx_bytes += lp->rm9200_txq[desc].size;
 		}
-		netif_wake_queue(dev);
+		netif_wake_queue(netdev);
 	}
 
 	/* Work-around for EMAC Errata section 41.3.1 */
@@ -5230,18 +5232,18 @@ static irqreturn_t at91ether_interrupt(int irq, void *dev_id)
 	}
 
 	if (intstatus & MACB_BIT(ISR_ROVR))
-		netdev_err(dev, "ROVR error\n");
+		netdev_err(netdev, "ROVR error\n");
 
 	return IRQ_HANDLED;
 }
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
-static void at91ether_poll_controller(struct net_device *dev)
+static void at91ether_poll_controller(struct net_device *netdev)
 {
 	unsigned long flags;
 
 	local_irq_save(flags);
-	at91ether_interrupt(dev->irq, dev);
+	at91ether_interrupt(netdev->irq, netdev);
 	local_irq_restore(flags);
 }
 #endif
@@ -5288,17 +5290,17 @@ static int at91ether_clk_init(struct platform_device *pdev, struct clk **pclk,
 
 static int at91ether_init(struct platform_device *pdev)
 {
-	struct net_device *dev = platform_get_drvdata(pdev);
-	struct macb *bp = netdev_priv(dev);
+	struct net_device *netdev = platform_get_drvdata(pdev);
+	struct macb *bp = netdev_priv(netdev);
 	int err;
 
 	bp->queues[0].bp = bp;
 
-	dev->netdev_ops = &at91ether_netdev_ops;
-	dev->ethtool_ops = &macb_ethtool_ops;
+	netdev->netdev_ops = &at91ether_netdev_ops;
+	netdev->ethtool_ops = &macb_ethtool_ops;
 
-	err = devm_request_irq(&pdev->dev, dev->irq, at91ether_interrupt,
-			       0, dev->name, dev);
+	err = devm_request_irq(&pdev->dev, netdev->irq, at91ether_interrupt,
+			       0, netdev->name, netdev);
 	if (err)
 		return err;
 
@@ -5427,8 +5429,8 @@ static int fu540_c000_init(struct platform_device *pdev)
 
 static int init_reset_optional(struct platform_device *pdev)
 {
-	struct net_device *dev = platform_get_drvdata(pdev);
-	struct macb *bp = netdev_priv(dev);
+	struct net_device *netdev = platform_get_drvdata(pdev);
+	struct macb *bp = netdev_priv(netdev);
 	int ret;
 
 	if (bp->phy_interface == PHY_INTERFACE_MODE_SGMII) {
@@ -5736,7 +5738,7 @@ static int macb_probe(struct platform_device *pdev)
 	const struct macb_config *macb_config;
 	struct clk *tsu_clk = NULL;
 	phy_interface_t interface;
-	struct net_device *dev;
+	struct net_device *netdev;
 	struct resource *regs;
 	u32 wtrmrk_rst_val;
 	void __iomem *mem;
@@ -5771,19 +5773,19 @@ static int macb_probe(struct platform_device *pdev)
 		goto err_disable_clocks;
 	}
 
-	dev = alloc_etherdev_mq(sizeof(*bp), num_queues);
-	if (!dev) {
+	netdev = alloc_etherdev_mq(sizeof(*bp), num_queues);
+	if (!netdev) {
 		err = -ENOMEM;
 		goto err_disable_clocks;
 	}
 
-	dev->base_addr = regs->start;
+	netdev->base_addr = regs->start;
 
-	SET_NETDEV_DEV(dev, &pdev->dev);
+	SET_NETDEV_DEV(netdev, &pdev->dev);
 
-	bp = netdev_priv(dev);
+	bp = netdev_priv(netdev);
 	bp->pdev = pdev;
-	bp->dev = dev;
+	bp->netdev = netdev;
 	bp->regs = mem;
 	bp->native_io = native_io;
 	if (native_io) {
@@ -5856,21 +5858,21 @@ static int macb_probe(struct platform_device *pdev)
 		bp->caps |= MACB_CAPS_DMA_64B;
 	}
 #endif
-	platform_set_drvdata(pdev, dev);
+	platform_set_drvdata(pdev, netdev);
 
-	dev->irq = platform_get_irq(pdev, 0);
-	if (dev->irq < 0) {
-		err = dev->irq;
+	netdev->irq = platform_get_irq(pdev, 0);
+	if (netdev->irq < 0) {
+		err = netdev->irq;
 		goto err_out_free_netdev;
 	}
 
 	/* MTU range: 68 - 1518 or 10240 */
-	dev->min_mtu = GEM_MTU_MIN_SIZE;
+	netdev->min_mtu = GEM_MTU_MIN_SIZE;
 	if ((bp->caps & MACB_CAPS_JUMBO) && bp->jumbo_max_len)
-		dev->max_mtu = MIN(bp->jumbo_max_len, RX_BUFFER_MAX) -
+		netdev->max_mtu = MIN(bp->jumbo_max_len, RX_BUFFER_MAX) -
 				ETH_HLEN - ETH_FCS_LEN;
 	else
-		dev->max_mtu = 1536 - ETH_HLEN - ETH_FCS_LEN;
+		netdev->max_mtu = 1536 - ETH_HLEN - ETH_FCS_LEN;
 
 	if (bp->caps & MACB_CAPS_BD_RD_PREFETCH) {
 		val = GEM_BFEXT(RXBD_RDBUFF, gem_readl(bp, DCFG10));
@@ -5888,7 +5890,7 @@ static int macb_probe(struct platform_device *pdev)
 	if (bp->caps & MACB_CAPS_NEEDS_RSTONUBR)
 		bp->rx_intr_mask |= MACB_BIT(RXUBR);
 
-	err = of_get_ethdev_address(np, bp->dev);
+	err = of_get_ethdev_address(np, bp->netdev);
 	if (err == -EPROBE_DEFER)
 		goto err_out_free_netdev;
 	else if (err)
@@ -5910,9 +5912,9 @@ static int macb_probe(struct platform_device *pdev)
 	if (err)
 		goto err_out_phy_exit;
 
-	netif_carrier_off(dev);
+	netif_carrier_off(netdev);
 
-	err = register_netdev(dev);
+	err = register_netdev(netdev);
 	if (err) {
 		dev_err(&pdev->dev, "Cannot register net device, aborting.\n");
 		goto err_out_unregister_mdio;
@@ -5921,9 +5923,9 @@ static int macb_probe(struct platform_device *pdev)
 	INIT_WORK(&bp->hresp_err_bh_work, macb_hresp_error_task);
 	INIT_DELAYED_WORK(&bp->tx_lpi_work, macb_tx_lpi_work_fn);
 
-	netdev_info(dev, "Cadence %s rev 0x%08x at 0x%08lx irq %d (%pM)\n",
+	netdev_info(netdev, "Cadence %s rev 0x%08x at 0x%08lx irq %d (%pM)\n",
 		    macb_is_gem(bp) ? "GEM" : "MACB", macb_readl(bp, MID),
-		    dev->base_addr, dev->irq, dev->dev_addr);
+		    netdev->base_addr, netdev->irq, netdev->dev_addr);
 
 	pm_runtime_put_autosuspend(&bp->pdev->dev);
 
@@ -5937,7 +5939,7 @@ static int macb_probe(struct platform_device *pdev)
 	phy_exit(bp->phy);
 
 err_out_free_netdev:
-	free_netdev(dev);
+	free_netdev(netdev);
 
 err_disable_clocks:
 	macb_clks_disable(pclk, hclk, tx_clk, rx_clk, tsu_clk);
@@ -5950,14 +5952,14 @@ static int macb_probe(struct platform_device *pdev)
 
 static void macb_remove(struct platform_device *pdev)
 {
-	struct net_device *dev;
+	struct net_device *netdev;
 	struct macb *bp;
 
-	dev = platform_get_drvdata(pdev);
+	netdev = platform_get_drvdata(pdev);
 
-	if (dev) {
-		bp = netdev_priv(dev);
-		unregister_netdev(dev);
+	if (netdev) {
+		bp = netdev_priv(netdev);
+		unregister_netdev(netdev);
 		phy_exit(bp->phy);
 		mdiobus_unregister(bp->mii_bus);
 		mdiobus_free(bp->mii_bus);
@@ -5969,7 +5971,7 @@ static void macb_remove(struct platform_device *pdev)
 		pm_runtime_dont_use_autosuspend(&pdev->dev);
 		pm_runtime_set_suspended(&pdev->dev);
 		phylink_destroy(bp->phylink);
-		free_netdev(dev);
+		free_netdev(netdev);
 	}
 }
 
@@ -5984,7 +5986,7 @@ static int __maybe_unused macb_suspend(struct device *dev)
 	u32 tmp, ifa_local;
 	unsigned int q;
 
-	if (!device_may_wakeup(&bp->dev->dev))
+	if (!device_may_wakeup(&bp->netdev->dev))
 		phy_exit(bp->phy);
 
 	if (!netif_running(netdev))
@@ -5994,7 +5996,7 @@ static int __maybe_unused macb_suspend(struct device *dev)
 		if (bp->wolopts & WAKE_ARP) {
 			/* Check for IP address in WOL ARP mode */
 			rcu_read_lock();
-			idev = __in_dev_get_rcu(bp->dev);
+			idev = __in_dev_get_rcu(bp->netdev);
 			if (idev)
 				ifa = rcu_dereference(idev->ifa_list);
 			if (!ifa) {
@@ -6096,7 +6098,7 @@ static int __maybe_unused macb_resume(struct device *dev)
 	unsigned long flags;
 	unsigned int q;
 
-	if (!device_may_wakeup(&bp->dev->dev))
+	if (!device_may_wakeup(&bp->netdev->dev))
 		phy_init(bp->phy);
 
 	if (!netif_running(netdev))
diff --git a/drivers/net/ethernet/cadence/macb_pci.c b/drivers/net/ethernet/cadence/macb_pci.c
index b79dec17e6b0..ac009007118f 100644
--- a/drivers/net/ethernet/cadence/macb_pci.c
+++ b/drivers/net/ethernet/cadence/macb_pci.c
@@ -24,48 +24,48 @@
 #define GEM_PCLK_RATE 50000000
 #define GEM_HCLK_RATE 50000000
 
-static int macb_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+static int macb_probe(struct pci_dev *pci, const struct pci_device_id *id)
 {
 	int err;
-	struct platform_device *plat_dev;
+	struct platform_device *pdev;
 	struct platform_device_info plat_info;
 	struct macb_platform_data plat_data;
 	struct resource res[2];
 
 	/* enable pci device */
-	err = pcim_enable_device(pdev);
+	err = pcim_enable_device(pci);
 	if (err < 0) {
-		dev_err(&pdev->dev, "Enabling PCI device has failed: %d", err);
+		dev_err(&pci->dev, "Enabling PCI device has failed: %d", err);
 		return err;
 	}
 
-	pci_set_master(pdev);
+	pci_set_master(pci);
 
 	/* set up resources */
 	memset(res, 0x00, sizeof(struct resource) * ARRAY_SIZE(res));
-	res[0].start = pci_resource_start(pdev, 0);
-	res[0].end = pci_resource_end(pdev, 0);
+	res[0].start = pci_resource_start(pci, 0);
+	res[0].end = pci_resource_end(pci, 0);
 	res[0].name = PCI_DRIVER_NAME;
 	res[0].flags = IORESOURCE_MEM;
-	res[1].start = pci_irq_vector(pdev, 0);
+	res[1].start = pci_irq_vector(pci, 0);
 	res[1].name = PCI_DRIVER_NAME;
 	res[1].flags = IORESOURCE_IRQ;
 
-	dev_info(&pdev->dev, "EMAC physical base addr: %pa\n",
+	dev_info(&pci->dev, "EMAC physical base addr: %pa\n",
 		 &res[0].start);
 
 	/* set up macb platform data */
 	memset(&plat_data, 0, sizeof(plat_data));
 
 	/* initialize clocks */
-	plat_data.pclk = clk_register_fixed_rate(&pdev->dev, "pclk", NULL, 0,
+	plat_data.pclk = clk_register_fixed_rate(&pci->dev, "pclk", NULL, 0,
 						 GEM_PCLK_RATE);
 	if (IS_ERR(plat_data.pclk)) {
 		err = PTR_ERR(plat_data.pclk);
 		goto err_pclk_register;
 	}
 
-	plat_data.hclk = clk_register_fixed_rate(&pdev->dev, "hclk", NULL, 0,
+	plat_data.hclk = clk_register_fixed_rate(&pci->dev, "hclk", NULL, 0,
 						 GEM_HCLK_RATE);
 	if (IS_ERR(plat_data.hclk)) {
 		err = PTR_ERR(plat_data.hclk);
@@ -74,24 +74,24 @@ static int macb_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	/* set up platform device info */
 	memset(&plat_info, 0, sizeof(plat_info));
-	plat_info.parent = &pdev->dev;
-	plat_info.fwnode = pdev->dev.fwnode;
+	plat_info.parent = &pci->dev;
+	plat_info.fwnode = pci->dev.fwnode;
 	plat_info.name = PLAT_DRIVER_NAME;
-	plat_info.id = pdev->devfn;
+	plat_info.id = pci->devfn;
 	plat_info.res = res;
 	plat_info.num_res = ARRAY_SIZE(res);
 	plat_info.data = &plat_data;
 	plat_info.size_data = sizeof(plat_data);
-	plat_info.dma_mask = pdev->dma_mask;
+	plat_info.dma_mask = pci->dma_mask;
 
 	/* register platform device */
-	plat_dev = platform_device_register_full(&plat_info);
-	if (IS_ERR(plat_dev)) {
-		err = PTR_ERR(plat_dev);
+	pdev = platform_device_register_full(&plat_info);
+	if (IS_ERR(pdev)) {
+		err = PTR_ERR(pdev);
 		goto err_plat_dev_register;
 	}
 
-	pci_set_drvdata(pdev, plat_dev);
+	pci_set_drvdata(pci, pdev);
 
 	return 0;
 
@@ -105,14 +105,14 @@ static int macb_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	return err;
 }
 
-static void macb_remove(struct pci_dev *pdev)
+static void macb_remove(struct pci_dev *pci)
 {
-	struct platform_device *plat_dev = pci_get_drvdata(pdev);
-	struct macb_platform_data *plat_data = dev_get_platdata(&plat_dev->dev);
+	struct platform_device *pdev = pci_get_drvdata(pci);
+	struct macb_platform_data *plat_data = dev_get_platdata(&pdev->dev);
 	struct clk *pclk = plat_data->pclk;
 	struct clk *hclk = plat_data->hclk;
 
-	platform_device_unregister(plat_dev);
+	platform_device_unregister(pdev);
 	clk_unregister_fixed_rate(pclk);
 	clk_unregister_fixed_rate(hclk);
 }
diff --git a/drivers/net/ethernet/cadence/macb_ptp.c b/drivers/net/ethernet/cadence/macb_ptp.c
index d91f7b1aa39c..e5195d7dac1d 100644
--- a/drivers/net/ethernet/cadence/macb_ptp.c
+++ b/drivers/net/ethernet/cadence/macb_ptp.c
@@ -324,9 +324,9 @@ void gem_ptp_txstamp(struct macb *bp, struct sk_buff *skb,
 	skb_tstamp_tx(skb, &shhwtstamps);
 }
 
-void gem_ptp_init(struct net_device *dev)
+void gem_ptp_init(struct net_device *netdev)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 
 	bp->ptp_clock_info = gem_ptp_caps_template;
 
@@ -334,7 +334,7 @@ void gem_ptp_init(struct net_device *dev)
 	bp->tsu_rate = bp->ptp_info->get_tsu_rate(bp);
 	bp->ptp_clock_info.max_adj = bp->ptp_info->get_ptp_max_adj();
 	gem_ptp_init_timer(bp);
-	bp->ptp_clock = ptp_clock_register(&bp->ptp_clock_info, &dev->dev);
+	bp->ptp_clock = ptp_clock_register(&bp->ptp_clock_info, &netdev->dev);
 	if (IS_ERR(bp->ptp_clock)) {
 		pr_err("ptp clock register failed: %ld\n",
 			PTR_ERR(bp->ptp_clock));
@@ -353,9 +353,9 @@ void gem_ptp_init(struct net_device *dev)
 		 GEM_PTP_TIMER_NAME);
 }
 
-void gem_ptp_remove(struct net_device *ndev)
+void gem_ptp_remove(struct net_device *netdev)
 {
-	struct macb *bp = netdev_priv(ndev);
+	struct macb *bp = netdev_priv(netdev);
 
 	if (bp->ptp_clock) {
 		ptp_clock_unregister(bp->ptp_clock);
@@ -378,10 +378,10 @@ static int gem_ptp_set_ts_mode(struct macb *bp,
 	return 0;
 }
 
-int gem_get_hwtst(struct net_device *dev,
+int gem_get_hwtst(struct net_device *netdev,
 		  struct kernel_hwtstamp_config *tstamp_config)
 {
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 
 	*tstamp_config = bp->tstamp_config;
 	if (!macb_dma_ptp(bp))
@@ -402,13 +402,13 @@ static void gem_ptp_set_one_step_sync(struct macb *bp, u8 enable)
 		macb_writel(bp, NCR, reg_val & ~MACB_BIT(OSSMODE));
 }
 
-int gem_set_hwtst(struct net_device *dev,
+int gem_set_hwtst(struct net_device *netdev,
 		  struct kernel_hwtstamp_config *tstamp_config,
 		  struct netlink_ext_ack *extack)
 {
 	enum macb_bd_control tx_bd_control = TSTAMP_DISABLED;
 	enum macb_bd_control rx_bd_control = TSTAMP_DISABLED;
-	struct macb *bp = netdev_priv(dev);
+	struct macb *bp = netdev_priv(netdev);
 	u32 regval;
 
 	if (!macb_dma_ptp(bp))

-- 
2.53.0


^ permalink raw reply related

* [PATCH net-next v2 02/14] net: macb: unify `struct macb *` naming convention
From: Théo Lebrun @ 2026-04-10 19:51 UTC (permalink / raw)
  To: Nicolas Ferre, Claudiu Beznea, Andrew Lunn, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Richard Cochran,
	Russell King
  Cc: Paolo Valerio, Conor Dooley, Nicolai Buchwitz,
	Vladimir Kondratiev, Gregory CLEMENT, Benoît Monin,
	Tawfik Bayouk, Thomas Petazzoni, Maxime Chevallier, netdev,
	linux-kernel, Théo Lebrun
In-Reply-To: <20260410-macb-context-v2-0-af39f71d40b6@bootlin.com>

For historical reason, MACB has both:

   struct macb *bp;
   struct macb *lp; // used in at91ether functions

Use only the former.

Signed-off-by: Théo Lebrun <theo.lebrun@bootlin.com>
---
 drivers/net/ethernet/cadence/macb_main.c | 176 ++++++++++++++++---------------
 1 file changed, 91 insertions(+), 85 deletions(-)

diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index 896d481e0f95..a8a7df615d25 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -4938,71 +4938,72 @@ static const struct macb_usrio_config at91_default_usrio = {
 
 static struct sifive_fu540_macb_mgmt *mgmt;
 
-static int at91ether_alloc_coherent(struct macb *lp)
+static int at91ether_alloc_coherent(struct macb *bp)
 {
-	struct macb_queue *q = &lp->queues[0];
+	struct macb_queue *queue = &bp->queues[0];
 
-	q->rx_ring = dma_alloc_coherent(&lp->pdev->dev,
-					 (AT91ETHER_MAX_RX_DESCR *
-					  macb_dma_desc_get_size(lp)),
-					 &q->rx_ring_dma, GFP_KERNEL);
-	if (!q->rx_ring)
+	queue->rx_ring = dma_alloc_coherent(&bp->pdev->dev,
+					    (AT91ETHER_MAX_RX_DESCR *
+					     macb_dma_desc_get_size(bp)),
+					    &queue->rx_ring_dma, GFP_KERNEL);
+	if (!queue->rx_ring)
 		return -ENOMEM;
 
-	q->rx_buffers = dma_alloc_coherent(&lp->pdev->dev,
-					    AT91ETHER_MAX_RX_DESCR *
-					    AT91ETHER_MAX_RBUFF_SZ,
-					    &q->rx_buffers_dma, GFP_KERNEL);
-	if (!q->rx_buffers) {
-		dma_free_coherent(&lp->pdev->dev,
+	queue->rx_buffers = dma_alloc_coherent(&bp->pdev->dev,
+					       AT91ETHER_MAX_RX_DESCR *
+					       AT91ETHER_MAX_RBUFF_SZ,
+					       &queue->rx_buffers_dma,
+					       GFP_KERNEL);
+	if (!queue->rx_buffers) {
+		dma_free_coherent(&bp->pdev->dev,
 				  AT91ETHER_MAX_RX_DESCR *
-				  macb_dma_desc_get_size(lp),
-				  q->rx_ring, q->rx_ring_dma);
-		q->rx_ring = NULL;
+				  macb_dma_desc_get_size(bp),
+				  queue->rx_ring, queue->rx_ring_dma);
+		queue->rx_ring = NULL;
 		return -ENOMEM;
 	}
 
 	return 0;
 }
 
-static void at91ether_free_coherent(struct macb *lp)
+static void at91ether_free_coherent(struct macb *bp)
 {
-	struct macb_queue *q = &lp->queues[0];
+	struct macb_queue *queue = &bp->queues[0];
 
-	if (q->rx_ring) {
-		dma_free_coherent(&lp->pdev->dev,
+	if (queue->rx_ring) {
+		dma_free_coherent(&bp->pdev->dev,
 				  AT91ETHER_MAX_RX_DESCR *
-				  macb_dma_desc_get_size(lp),
-				  q->rx_ring, q->rx_ring_dma);
-		q->rx_ring = NULL;
+				  macb_dma_desc_get_size(bp),
+				  queue->rx_ring, queue->rx_ring_dma);
+		queue->rx_ring = NULL;
 	}
 
-	if (q->rx_buffers) {
-		dma_free_coherent(&lp->pdev->dev,
+	if (queue->rx_buffers) {
+		dma_free_coherent(&bp->pdev->dev,
 				  AT91ETHER_MAX_RX_DESCR *
 				  AT91ETHER_MAX_RBUFF_SZ,
-				  q->rx_buffers, q->rx_buffers_dma);
-		q->rx_buffers = NULL;
+				  queue->rx_buffers, queue->rx_buffers_dma);
+		queue->rx_buffers = NULL;
 	}
 }
 
 /* Initialize and start the Receiver and Transmit subsystems */
-static int at91ether_start(struct macb *lp)
+static int at91ether_start(struct macb *bp)
 {
-	struct macb_queue *q = &lp->queues[0];
+	struct macb_queue *queue = &bp->queues[0];
 	struct macb_dma_desc *desc;
 	dma_addr_t addr;
 	u32 ctl;
 	int i, ret;
 
-	ret = at91ether_alloc_coherent(lp);
+	ret = at91ether_alloc_coherent(bp);
 	if (ret)
 		return ret;
 
-	addr = q->rx_buffers_dma;
+	addr = queue->rx_buffers_dma;
 	for (i = 0; i < AT91ETHER_MAX_RX_DESCR; i++) {
-		desc = macb_rx_desc(q, i);
-		macb_set_addr(lp, desc, addr);
+		desc = macb_rx_desc(queue, i);
+		macb_set_addr(bp, desc, addr);
 		desc->ctrl = 0;
 		addr += AT91ETHER_MAX_RBUFF_SZ;
 	}
@@ -5011,17 +5012,17 @@ static int at91ether_start(struct macb *lp)
 	desc->addr |= MACB_BIT(RX_WRAP);
 
 	/* Reset buffer index */
-	q->rx_tail = 0;
+	queue->rx_tail = 0;
 
 	/* Program address of descriptor list in Rx Buffer Queue register */
-	macb_writel(lp, RBQP, q->rx_ring_dma);
+	macb_writel(bp, RBQP, queue->rx_ring_dma);
 
 	/* Enable Receive and Transmit */
-	ctl = macb_readl(lp, NCR);
-	macb_writel(lp, NCR, ctl | MACB_BIT(RE) | MACB_BIT(TE));
+	ctl = macb_readl(bp, NCR);
+	macb_writel(bp, NCR, ctl | MACB_BIT(RE) | MACB_BIT(TE));
 
 	/* Enable MAC interrupts */
-	macb_writel(lp, IER, MACB_BIT(RCOMP)	|
+	macb_writel(bp, IER, MACB_BIT(RCOMP)	|
 			     MACB_BIT(RXUBR)	|
 			     MACB_BIT(ISR_TUND)	|
 			     MACB_BIT(ISR_RLE)	|
@@ -5032,12 +5033,12 @@ static int at91ether_start(struct macb *lp)
 	return 0;
 }
 
-static void at91ether_stop(struct macb *lp)
+static void at91ether_stop(struct macb *bp)
 {
 	u32 ctl;
 
 	/* Disable MAC interrupts */
-	macb_writel(lp, IDR, MACB_BIT(RCOMP)	|
+	macb_writel(bp, IDR, MACB_BIT(RCOMP)	|
 			     MACB_BIT(RXUBR)	|
 			     MACB_BIT(ISR_TUND)	|
 			     MACB_BIT(ISR_RLE)	|
@@ -5046,35 +5047,35 @@ static void at91ether_stop(struct macb *lp)
 			     MACB_BIT(HRESP));
 
 	/* Disable Receiver and Transmitter */
-	ctl = macb_readl(lp, NCR);
-	macb_writel(lp, NCR, ctl & ~(MACB_BIT(TE) | MACB_BIT(RE)));
+	ctl = macb_readl(bp, NCR);
+	macb_writel(bp, NCR, ctl & ~(MACB_BIT(TE) | MACB_BIT(RE)));
 
 	/* Free resources. */
-	at91ether_free_coherent(lp);
+	at91ether_free_coherent(bp);
 }
 
 /* Open the ethernet interface */
 static int at91ether_open(struct net_device *netdev)
 {
-	struct macb *lp = netdev_priv(netdev);
+	struct macb *bp = netdev_priv(netdev);
 	u32 ctl;
 	int ret;
 
-	ret = pm_runtime_resume_and_get(&lp->pdev->dev);
+	ret = pm_runtime_resume_and_get(&bp->pdev->dev);
 	if (ret < 0)
 		return ret;
 
 	/* Clear internal statistics */
-	ctl = macb_readl(lp, NCR);
-	macb_writel(lp, NCR, ctl | MACB_BIT(CLRSTAT));
+	ctl = macb_readl(bp, NCR);
+	macb_writel(bp, NCR, ctl | MACB_BIT(CLRSTAT));
 
-	macb_set_hwaddr(lp);
+	macb_set_hwaddr(bp);
 
-	ret = at91ether_start(lp);
+	ret = at91ether_start(bp);
 	if (ret)
 		goto pm_exit;
 
-	ret = macb_phylink_connect(lp);
+	ret = macb_phylink_connect(bp);
 	if (ret)
 		goto stop;
 
@@ -5083,25 +5084,25 @@ static int at91ether_open(struct net_device *netdev)
 	return 0;
 
 stop:
-	at91ether_stop(lp);
+	at91ether_stop(bp);
 pm_exit:
-	pm_runtime_put_sync(&lp->pdev->dev);
+	pm_runtime_put_sync(&bp->pdev->dev);
 	return ret;
 }
 
 /* Close the interface */
 static int at91ether_close(struct net_device *netdev)
 {
-	struct macb *lp = netdev_priv(netdev);
+	struct macb *bp = netdev_priv(netdev);
 
 	netif_stop_queue(netdev);
 
-	phylink_stop(lp->phylink);
-	phylink_disconnect_phy(lp->phylink);
+	phylink_stop(bp->phylink);
+	phylink_disconnect_phy(bp->phylink);
 
-	at91ether_stop(lp);
+	at91ether_stop(bp);
 
-	pm_runtime_put(&lp->pdev->dev);
+	pm_runtime_put(&bp->pdev->dev);
 
 	return 0;
 }
@@ -5110,19 +5111,21 @@ static int at91ether_close(struct net_device *netdev)
 static netdev_tx_t at91ether_start_xmit(struct sk_buff *skb,
 					struct net_device *netdev)
 {
-	struct macb *lp = netdev_priv(netdev);
+	struct macb *bp = netdev_priv(netdev);
+	struct device *dev = &bp->pdev->dev;
 
-	if (macb_readl(lp, TSR) & MACB_BIT(RM9200_BNQ)) {
+	if (macb_readl(bp, TSR) & MACB_BIT(RM9200_BNQ)) {
 		int desc = 0;
 
 		netif_stop_queue(netdev);
 
 		/* Store packet information (to free when Tx completed) */
-		lp->rm9200_txq[desc].skb = skb;
-		lp->rm9200_txq[desc].size = skb->len;
-		lp->rm9200_txq[desc].mapping = dma_map_single(&lp->pdev->dev, skb->data,
-							      skb->len, DMA_TO_DEVICE);
-		if (dma_mapping_error(&lp->pdev->dev, lp->rm9200_txq[desc].mapping)) {
+		bp->rm9200_txq[desc].skb = skb;
+		bp->rm9200_txq[desc].size = skb->len;
+		bp->rm9200_txq[desc].mapping = dma_map_single(dev, skb->data,
+							      skb->len,
+							      DMA_TO_DEVICE);
+		if (dma_mapping_error(dev, bp->rm9200_txq[desc].mapping)) {
 			dev_kfree_skb_any(skb);
 			netdev->stats.tx_dropped++;
 			netdev_err(netdev, "%s: DMA mapping error\n", __func__);
@@ -5130,9 +5133,9 @@ static netdev_tx_t at91ether_start_xmit(struct sk_buff *skb,
 		}
 
 		/* Set address of the data in the Transmit Address register */
-		macb_writel(lp, TAR, lp->rm9200_txq[desc].mapping);
+		macb_writel(bp, TAR, bp->rm9200_txq[desc].mapping);
 		/* Set length of the packet in the Transmit Control register */
-		macb_writel(lp, TCR, skb->len);
+		macb_writel(bp, TCR, skb->len);
 
 	} else {
 		netdev_err(netdev, "%s called, but device is busy!\n",
@@ -5148,16 +5151,17 @@ static netdev_tx_t at91ether_start_xmit(struct sk_buff *skb,
  */
 static void at91ether_rx(struct net_device *netdev)
 {
-	struct macb *lp = netdev_priv(netdev);
-	struct macb_queue *q = &lp->queues[0];
+	struct macb *bp = netdev_priv(netdev);
+	struct macb_queue *queue = &bp->queues[0];
 	struct macb_dma_desc *desc;
 	unsigned char *p_recv;
 	struct sk_buff *skb;
 	unsigned int pktlen;
 
-	desc = macb_rx_desc(q, q->rx_tail);
+	desc = macb_rx_desc(queue, queue->rx_tail);
 	while (desc->addr & MACB_BIT(RX_USED)) {
-		p_recv = q->rx_buffers + q->rx_tail * AT91ETHER_MAX_RBUFF_SZ;
+		p_recv = queue->rx_buffers +
+			 queue->rx_tail * AT91ETHER_MAX_RBUFF_SZ;
 		pktlen = MACB_BF(RX_FRMLEN, desc->ctrl);
 		skb = netdev_alloc_skb(netdev, pktlen + 2);
 		if (skb) {
@@ -5179,12 +5183,12 @@ static void at91ether_rx(struct net_device *netdev)
 		desc->addr &= ~MACB_BIT(RX_USED);
 
 		/* wrap after last buffer */
-		if (q->rx_tail == AT91ETHER_MAX_RX_DESCR - 1)
-			q->rx_tail = 0;
+		if (queue->rx_tail == AT91ETHER_MAX_RX_DESCR - 1)
+			queue->rx_tail = 0;
 		else
-			q->rx_tail++;
+			queue->rx_tail++;
 
-		desc = macb_rx_desc(q, q->rx_tail);
+		desc = macb_rx_desc(queue, queue->rx_tail);
 	}
 }
 
@@ -5192,14 +5196,14 @@ static void at91ether_rx(struct net_device *netdev)
 static irqreturn_t at91ether_interrupt(int irq, void *dev_id)
 {
 	struct net_device *netdev = dev_id;
-	struct macb *lp = netdev_priv(netdev);
+	struct macb *bp = netdev_priv(netdev);
 	u32 intstatus, ctl;
 	unsigned int desc;
 
 	/* MAC Interrupt Status register indicates what interrupts are pending.
 	 * It is automatically cleared once read.
 	 */
-	intstatus = macb_readl(lp, ISR);
+	intstatus = macb_readl(bp, ISR);
 
 	/* Receive complete */
 	if (intstatus & MACB_BIT(RCOMP))
@@ -5212,23 +5216,25 @@ static irqreturn_t at91ether_interrupt(int irq, void *dev_id)
 			netdev->stats.tx_errors++;
 
 		desc = 0;
-		if (lp->rm9200_txq[desc].skb) {
-			dev_consume_skb_irq(lp->rm9200_txq[desc].skb);
-			lp->rm9200_txq[desc].skb = NULL;
-			dma_unmap_single(&lp->pdev->dev, lp->rm9200_txq[desc].mapping,
-					 lp->rm9200_txq[desc].size, DMA_TO_DEVICE);
+		if (bp->rm9200_txq[desc].skb) {
+			dev_consume_skb_irq(bp->rm9200_txq[desc].skb);
+			bp->rm9200_txq[desc].skb = NULL;
+			dma_unmap_single(&bp->pdev->dev,
+					 bp->rm9200_txq[desc].mapping,
+					 bp->rm9200_txq[desc].size,
+					 DMA_TO_DEVICE);
 			netdev->stats.tx_packets++;
-			netdev->stats.tx_bytes += lp->rm9200_txq[desc].size;
+			netdev->stats.tx_bytes += bp->rm9200_txq[desc].size;
 		}
 		netif_wake_queue(netdev);
 	}
 
 	/* Work-around for EMAC Errata section 41.3.1 */
 	if (intstatus & MACB_BIT(RXUBR)) {
-		ctl = macb_readl(lp, NCR);
-		macb_writel(lp, NCR, ctl & ~MACB_BIT(RE));
+		ctl = macb_readl(bp, NCR);
+		macb_writel(bp, NCR, ctl & ~MACB_BIT(RE));
 		wmb();
-		macb_writel(lp, NCR, ctl | MACB_BIT(RE));
+		macb_writel(bp, NCR, ctl | MACB_BIT(RE));
 	}
 
 	if (intstatus & MACB_BIT(ISR_ROVR))

-- 
2.53.0


^ permalink raw reply related

* [PATCH net-next v2 03/14] net: macb: unify queue index variable naming convention and types
From: Théo Lebrun @ 2026-04-10 19:51 UTC (permalink / raw)
  To: Nicolas Ferre, Claudiu Beznea, Andrew Lunn, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Richard Cochran,
	Russell King
  Cc: Paolo Valerio, Conor Dooley, Nicolai Buchwitz,
	Vladimir Kondratiev, Gregory CLEMENT, Benoît Monin,
	Tawfik Bayouk, Thomas Petazzoni, Maxime Chevallier, netdev,
	linux-kernel, Théo Lebrun
In-Reply-To: <20260410-macb-context-v2-0-af39f71d40b6@bootlin.com>

Variables are named q or queue_index. Types are int, unsigned int, u32
and u16. Use `unsigned int q` everywhere.

Skip over taprio functions. They use `u8 queue_id` which fits with the
`struct macb_queue_enst_config` field. Using `queue_id` everywhere
would be too verbose.

Signed-off-by: Théo Lebrun <theo.lebrun@bootlin.com>
---
 drivers/net/ethernet/cadence/macb_main.c | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index a8a7df615d25..b0e70f6ce305 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -877,7 +877,7 @@ static void gem_shuffle_tx_one_ring(struct macb_queue *queue)
 static void gem_shuffle_tx_rings(struct macb *bp)
 {
 	struct macb_queue *queue;
-	int q;
+	unsigned int q;
 
 	for (q = 0, queue = bp->queues; q < bp->num_queues; q++, queue++)
 		gem_shuffle_tx_one_ring(queue);
@@ -1258,7 +1258,7 @@ static void macb_tx_error_task(struct work_struct *work)
 						      tx_error_task);
 	bool			halt_timeout = false;
 	struct macb		*bp = queue->bp;
-	u32			queue_index;
+	unsigned int		q;
 	u32			packets = 0;
 	u32			bytes = 0;
 	struct macb_tx_skb	*tx_skb;
@@ -1267,9 +1267,9 @@ static void macb_tx_error_task(struct work_struct *work)
 	unsigned int		tail;
 	unsigned long		flags;
 
-	queue_index = queue - bp->queues;
+	q = queue - bp->queues;
 	netdev_vdbg(bp->netdev, "macb_tx_error_task: q = %u, t = %u, h = %u\n",
-		    queue_index, queue->tx_tail, queue->tx_head);
+		    q, queue->tx_tail, queue->tx_head);
 
 	/* Prevent the queue NAPI TX poll from running, as it calls
 	 * macb_tx_complete(), which in turn may call netif_wake_subqueue().
@@ -1342,7 +1342,7 @@ static void macb_tx_error_task(struct work_struct *work)
 		macb_tx_unmap(bp, tx_skb, 0);
 	}
 
-	netdev_tx_completed_queue(netdev_get_tx_queue(bp->netdev, queue_index),
+	netdev_tx_completed_queue(netdev_get_tx_queue(bp->netdev, q),
 				  packets, bytes);
 
 	/* Set end of TX queue */
@@ -1407,7 +1407,7 @@ static bool ptp_one_step_sync(struct sk_buff *skb)
 static int macb_tx_complete(struct macb_queue *queue, int budget)
 {
 	struct macb *bp = queue->bp;
-	u16 queue_index = queue - bp->queues;
+	unsigned int q = queue - bp->queues;
 	unsigned long flags;
 	unsigned int tail;
 	unsigned int head;
@@ -1469,14 +1469,14 @@ static int macb_tx_complete(struct macb_queue *queue, int budget)
 		}
 	}
 
-	netdev_tx_completed_queue(netdev_get_tx_queue(bp->netdev, queue_index),
+	netdev_tx_completed_queue(netdev_get_tx_queue(bp->netdev, q),
 				  packets, bytes);
 
 	queue->tx_tail = tail;
-	if (__netif_subqueue_stopped(bp->netdev, queue_index) &&
+	if (__netif_subqueue_stopped(bp->netdev, q) &&
 	    CIRC_CNT(queue->tx_head, queue->tx_tail,
 		     bp->tx_ring_size) <= MACB_TX_WAKEUP_THRESH(bp))
-		netif_wake_subqueue(bp->netdev, queue_index);
+		netif_wake_subqueue(bp->netdev, q);
 	spin_unlock_irqrestore(&queue->tx_ptr_lock, flags);
 
 	if (packets)
@@ -2470,10 +2470,10 @@ static int macb_pad_and_fcs(struct sk_buff **skb, struct net_device *netdev)
 static netdev_tx_t macb_start_xmit(struct sk_buff *skb,
 				   struct net_device *netdev)
 {
-	u16 queue_index = skb_get_queue_mapping(skb);
 	struct macb *bp = netdev_priv(netdev);
-	struct macb_queue *queue = &bp->queues[queue_index];
+	unsigned int q = skb_get_queue_mapping(skb);
 	unsigned int desc_cnt, nr_frags, frag_size, f;
+	struct macb_queue *queue = &bp->queues[q];
 	unsigned int hdrlen;
 	unsigned long flags;
 	bool is_lso;
@@ -2513,7 +2513,7 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb,
 #if defined(DEBUG) && defined(VERBOSE_DEBUG)
 	netdev_vdbg(bp->netdev,
 		    "start_xmit: queue %hu len %u head %p data %p tail %p end %p\n",
-		    queue_index, skb->len, skb->head, skb->data,
+		    q, skb->len, skb->head, skb->data,
 		    skb_tail_pointer(skb), skb_end_pointer(skb));
 	print_hex_dump(KERN_DEBUG, "data: ", DUMP_PREFIX_OFFSET, 16, 1,
 		       skb->data, 16, true);
@@ -2539,7 +2539,7 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb,
 	/* This is a hard error, log it. */
 	if (CIRC_SPACE(queue->tx_head, queue->tx_tail,
 		       bp->tx_ring_size) < desc_cnt) {
-		netif_stop_subqueue(netdev, queue_index);
+		netif_stop_subqueue(netdev, q);
 		netdev_dbg(netdev, "tx_head = %u, tx_tail = %u\n",
 			   queue->tx_head, queue->tx_tail);
 		ret = NETDEV_TX_BUSY;
@@ -2555,7 +2555,7 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb,
 	/* Make newly initialized descriptor visible to hardware */
 	wmb();
 	skb_tx_timestamp(skb);
-	netdev_tx_sent_queue(netdev_get_tx_queue(bp->netdev, queue_index),
+	netdev_tx_sent_queue(netdev_get_tx_queue(bp->netdev, q),
 			     skb->len);
 
 	spin_lock(&bp->lock);
@@ -2564,7 +2564,7 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb,
 	spin_unlock(&bp->lock);
 
 	if (CIRC_SPACE(queue->tx_head, queue->tx_tail, bp->tx_ring_size) < 1)
-		netif_stop_subqueue(netdev, queue_index);
+		netif_stop_subqueue(netdev, q);
 
 unlock:
 	spin_unlock_irqrestore(&queue->tx_ptr_lock, flags);

-- 
2.53.0


^ permalink raw reply related

* [PATCH net-next v2 04/14] net: macb: enforce reverse christmas tree (RCT) convention
From: Théo Lebrun @ 2026-04-10 19:51 UTC (permalink / raw)
  To: Nicolas Ferre, Claudiu Beznea, Andrew Lunn, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Richard Cochran,
	Russell King
  Cc: Paolo Valerio, Conor Dooley, Nicolai Buchwitz,
	Vladimir Kondratiev, Gregory CLEMENT, Benoît Monin,
	Tawfik Bayouk, Thomas Petazzoni, Maxime Chevallier, netdev,
	linux-kernel, Théo Lebrun
In-Reply-To: <20260410-macb-context-v2-0-af39f71d40b6@bootlin.com>

Enforce the reverse christmas tree convention in those functions:

   macb_tx_error_task()
   gem_rx_refill()
   gem_rx()
   macb_rx_frame()
   macb_init_rx_ring()
   macb_rx()
   macb_rx_pending()
   macb_start_xmit()

The goal is to minimise unrelated diff in future patches.

Signed-off-by: Théo Lebrun <theo.lebrun@bootlin.com>
---
 drivers/net/ethernet/cadence/macb_main.c | 61 ++++++++++++++++----------------
 1 file changed, 30 insertions(+), 31 deletions(-)

diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index b0e70f6ce305..c5d8e8f835ba 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -1254,20 +1254,19 @@ static dma_addr_t macb_get_addr(struct macb *bp, struct macb_dma_desc *desc)
 
 static void macb_tx_error_task(struct work_struct *work)
 {
-	struct macb_queue	*queue = container_of(work, struct macb_queue,
-						      tx_error_task);
-	bool			halt_timeout = false;
-	struct macb		*bp = queue->bp;
-	unsigned int		q;
-	u32			packets = 0;
-	u32			bytes = 0;
-	struct macb_tx_skb	*tx_skb;
-	struct macb_dma_desc	*desc;
-	struct sk_buff		*skb;
-	unsigned int		tail;
-	unsigned long		flags;
+	struct macb_queue *queue = container_of(work, struct macb_queue,
+						tx_error_task);
+	unsigned int q = queue - queue->bp->queues;
+	struct macb *bp = queue->bp;
+	struct macb_tx_skb *tx_skb;
+	struct macb_dma_desc *desc;
+	bool halt_timeout = false;
+	struct sk_buff *skb;
+	unsigned long flags;
+	unsigned int tail;
+	u32 packets = 0;
+	u32 bytes = 0;
 
-	q = queue - bp->queues;
 	netdev_vdbg(bp->netdev, "macb_tx_error_task: q = %u, t = %u, h = %u\n",
 		    q, queue->tx_tail, queue->tx_head);
 
@@ -1487,11 +1486,11 @@ static int macb_tx_complete(struct macb_queue *queue, int budget)
 
 static void gem_rx_refill(struct macb_queue *queue)
 {
-	unsigned int		entry;
-	struct sk_buff		*skb;
-	dma_addr_t		paddr;
 	struct macb *bp = queue->bp;
 	struct macb_dma_desc *desc;
+	struct sk_buff *skb;
+	unsigned int entry;
+	dma_addr_t paddr;
 
 	while (CIRC_SPACE(queue->rx_prepared_head, queue->rx_tail,
 			bp->rx_ring_size) > 0) {
@@ -1584,11 +1583,11 @@ static int gem_rx(struct macb_queue *queue, struct napi_struct *napi,
 		  int budget)
 {
 	struct macb *bp = queue->bp;
-	unsigned int		len;
-	unsigned int		entry;
-	struct sk_buff		*skb;
-	struct macb_dma_desc	*desc;
-	int			count = 0;
+	struct macb_dma_desc *desc;
+	struct sk_buff *skb;
+	unsigned int entry;
+	unsigned int len;
+	int count = 0;
 
 	while (count < budget) {
 		u32 ctrl;
@@ -1674,12 +1673,12 @@ static int gem_rx(struct macb_queue *queue, struct napi_struct *napi,
 static int macb_rx_frame(struct macb_queue *queue, struct napi_struct *napi,
 			 unsigned int first_frag, unsigned int last_frag)
 {
-	unsigned int len;
-	unsigned int frag;
+	struct macb *bp = queue->bp;
+	struct macb_dma_desc *desc;
 	unsigned int offset;
 	struct sk_buff *skb;
-	struct macb_dma_desc *desc;
-	struct macb *bp = queue->bp;
+	unsigned int frag;
+	unsigned int len;
 
 	desc = macb_rx_desc(queue, last_frag);
 	len = desc->ctrl & bp->rx_frm_len_mask;
@@ -1755,9 +1754,9 @@ static int macb_rx_frame(struct macb_queue *queue, struct napi_struct *napi,
 
 static inline void macb_init_rx_ring(struct macb_queue *queue)
 {
+	struct macb_dma_desc *desc = NULL;
 	struct macb *bp = queue->bp;
 	dma_addr_t addr;
-	struct macb_dma_desc *desc = NULL;
 	int i;
 
 	addr = queue->rx_buffers_dma;
@@ -1776,9 +1775,9 @@ static int macb_rx(struct macb_queue *queue, struct napi_struct *napi,
 {
 	struct macb *bp = queue->bp;
 	bool reset_rx_queue = false;
-	int received = 0;
-	unsigned int tail;
 	int first_frag = -1;
+	unsigned int tail;
+	int received = 0;
 
 	for (tail = queue->rx_tail; budget > 0; tail++) {
 		struct macb_dma_desc *desc = macb_rx_desc(queue, tail);
@@ -1853,8 +1852,8 @@ static int macb_rx(struct macb_queue *queue, struct napi_struct *napi,
 static bool macb_rx_pending(struct macb_queue *queue)
 {
 	struct macb *bp = queue->bp;
-	unsigned int		entry;
-	struct macb_dma_desc	*desc;
+	struct macb_dma_desc *desc;
+	unsigned int entry;
 
 	entry = macb_rx_ring_wrap(bp, queue->rx_tail);
 	desc = macb_rx_desc(queue, entry);
@@ -2474,10 +2473,10 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb,
 	unsigned int q = skb_get_queue_mapping(skb);
 	unsigned int desc_cnt, nr_frags, frag_size, f;
 	struct macb_queue *queue = &bp->queues[q];
+	netdev_tx_t ret = NETDEV_TX_OK;
 	unsigned int hdrlen;
 	unsigned long flags;
 	bool is_lso;
-	netdev_tx_t ret = NETDEV_TX_OK;
 
 	if (macb_clear_csum(skb)) {
 		dev_kfree_skb_any(skb);

-- 
2.53.0


^ permalink raw reply related

* [PATCH net-next v2 05/14] net: macb: allocate tieoff descriptor once across device lifetime
From: Théo Lebrun @ 2026-04-10 19:51 UTC (permalink / raw)
  To: Nicolas Ferre, Claudiu Beznea, Andrew Lunn, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Richard Cochran,
	Russell King
  Cc: Paolo Valerio, Conor Dooley, Nicolai Buchwitz,
	Vladimir Kondratiev, Gregory CLEMENT, Benoît Monin,
	Tawfik Bayouk, Thomas Petazzoni, Maxime Chevallier, netdev,
	linux-kernel, Théo Lebrun
In-Reply-To: <20260410-macb-context-v2-0-af39f71d40b6@bootlin.com>

The tieoff descriptor is a RX DMA descriptor ring of size one. It gets
configured onto queues for Wake-on-LAN during system-wide suspend when
hardware does not support disabling individual queues
(MACB_CAPS_QUEUE_DISABLE).

MACB/GEM driver allocates it alongside the main RX ring
inside macb_alloc_consistent() at open. Free is done by
macb_free_consistent() at close.

Change to allocate once at probe and free on probe failure or device
removal. This makes the tieoff descriptor lifetime much longer,
avoiding repeating coherent buffer allocation on each open/close cycle.

Main benefit: we dissociate its lifetime from the main ring's lifetime.
That way there is less work to be doing on resources (re)alloc. This
currently happens on close/open, but will soon also happen on context
swap operations (set_ringparam, change_mtu, set_channels, etc).

Signed-off-by: Théo Lebrun <theo.lebrun@bootlin.com>
---
 drivers/net/ethernet/cadence/macb_main.c | 75 +++++++++++++++++---------------
 1 file changed, 41 insertions(+), 34 deletions(-)

diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index c5d8e8f835ba..ec030801ed68 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -2653,12 +2653,6 @@ static void macb_free_consistent(struct macb *bp)
 	unsigned int q;
 	size_t size;
 
-	if (bp->rx_ring_tieoff) {
-		dma_free_coherent(dev, macb_dma_desc_get_size(bp),
-				  bp->rx_ring_tieoff, bp->rx_ring_tieoff_dma);
-		bp->rx_ring_tieoff = NULL;
-	}
-
 	bp->macbgem_ops.mog_free_rx_buffers(bp);
 
 	size = bp->num_queues * macb_tx_ring_size_per_queue(bp);
@@ -2756,16 +2750,6 @@ static int macb_alloc_consistent(struct macb *bp)
 	if (bp->macbgem_ops.mog_alloc_rx_buffers(bp))
 		goto out_err;
 
-	/* Required for tie off descriptor for PM cases */
-	if (!(bp->caps & MACB_CAPS_QUEUE_DISABLE)) {
-		bp->rx_ring_tieoff = dma_alloc_coherent(&bp->pdev->dev,
-							macb_dma_desc_get_size(bp),
-							&bp->rx_ring_tieoff_dma,
-							GFP_KERNEL);
-		if (!bp->rx_ring_tieoff)
-			goto out_err;
-	}
-
 	return 0;
 
 out_err:
@@ -2773,19 +2757,6 @@ static int macb_alloc_consistent(struct macb *bp)
 	return -ENOMEM;
 }
 
-static void macb_init_tieoff(struct macb *bp)
-{
-	struct macb_dma_desc *desc = bp->rx_ring_tieoff;
-
-	if (bp->caps & MACB_CAPS_QUEUE_DISABLE)
-		return;
-	/* Setup a wrapping descriptor with no free slots
-	 * (WRAP and USED) to tie off/disable unused RX queues.
-	 */
-	macb_set_addr(bp, desc, MACB_BIT(RX_WRAP) | MACB_BIT(RX_USED));
-	desc->ctrl = 0;
-}
-
 static void gem_init_rx_ring(struct macb_queue *queue)
 {
 	queue->rx_tail = 0;
@@ -2813,8 +2784,6 @@ static void gem_init_rings(struct macb *bp)
 
 		gem_init_rx_ring(queue);
 	}
-
-	macb_init_tieoff(bp);
 }
 
 static void macb_init_rings(struct macb *bp)
@@ -2832,8 +2801,6 @@ static void macb_init_rings(struct macb *bp)
 	bp->queues[0].tx_head = 0;
 	bp->queues[0].tx_tail = 0;
 	desc->ctrl |= MACB_BIT(TX_WRAP);
-
-	macb_init_tieoff(bp);
 }
 
 static void macb_reset_hw(struct macb *bp)
@@ -5510,6 +5477,38 @@ static int eyeq5_init(struct platform_device *pdev)
 	return ret;
 }
 
+static int macb_alloc_tieoff(struct macb *bp)
+{
+	/* Tieoff is a workaround in case HW cannot disable queues, for PM. */
+	if (bp->caps & MACB_CAPS_QUEUE_DISABLE)
+		return 0;
+
+	bp->rx_ring_tieoff = dma_alloc_coherent(&bp->pdev->dev,
+						macb_dma_desc_get_size(bp),
+						&bp->rx_ring_tieoff_dma,
+						GFP_KERNEL);
+	if (!bp->rx_ring_tieoff)
+		return -ENOMEM;
+
+	macb_set_addr(bp, bp->rx_ring_tieoff,
+		      MACB_BIT(RX_WRAP) | MACB_BIT(RX_USED));
+
+	bp->rx_ring_tieoff->ctrl = 0;
+
+	return 0;
+}
+
+static void macb_free_tieoff(struct macb *bp)
+{
+	if (!bp->rx_ring_tieoff)
+		return;
+
+	dma_free_coherent(&bp->pdev->dev, macb_dma_desc_get_size(bp),
+			  bp->rx_ring_tieoff,
+			  bp->rx_ring_tieoff_dma);
+	bp->rx_ring_tieoff = NULL;
+}
+
 static const struct macb_usrio_config mpfs_usrio = {
 	.tsu_source = 0,
 };
@@ -5919,10 +5918,14 @@ static int macb_probe(struct platform_device *pdev)
 
 	netif_carrier_off(netdev);
 
+	err = macb_alloc_tieoff(bp);
+	if (err)
+		goto err_out_unregister_mdio;
+
 	err = register_netdev(netdev);
 	if (err) {
 		dev_err(&pdev->dev, "Cannot register net device, aborting.\n");
-		goto err_out_unregister_mdio;
+		goto err_out_free_tieoff;
 	}
 
 	INIT_WORK(&bp->hresp_err_bh_work, macb_hresp_error_task);
@@ -5936,6 +5939,9 @@ static int macb_probe(struct platform_device *pdev)
 
 	return 0;
 
+err_out_free_tieoff:
+	macb_free_tieoff(bp);
+
 err_out_unregister_mdio:
 	mdiobus_unregister(bp->mii_bus);
 	mdiobus_free(bp->mii_bus);
@@ -5965,6 +5971,7 @@ static void macb_remove(struct platform_device *pdev)
 	if (netdev) {
 		bp = netdev_priv(netdev);
 		unregister_netdev(netdev);
+		macb_free_tieoff(bp);
 		phy_exit(bp->phy);
 		mdiobus_unregister(bp->mii_bus);
 		mdiobus_free(bp->mii_bus);

-- 
2.53.0


^ permalink raw reply related

* [PATCH net-next v2 06/14] net: macb: introduce macb_context struct for buffer management
From: Théo Lebrun @ 2026-04-10 19:51 UTC (permalink / raw)
  To: Nicolas Ferre, Claudiu Beznea, Andrew Lunn, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Richard Cochran,
	Russell King
  Cc: Paolo Valerio, Conor Dooley, Nicolai Buchwitz,
	Vladimir Kondratiev, Gregory CLEMENT, Benoît Monin,
	Tawfik Bayouk, Thomas Petazzoni, Maxime Chevallier, netdev,
	linux-kernel, Théo Lebrun
In-Reply-To: <20260410-macb-context-v2-0-af39f71d40b6@bootlin.com>

Whenever an operation requires buffer realloc, we close the interface,
update parameters and reopen. To improve reliability under memory
pressure, we should rather alloc new buffers, reconfigure HW and free
old buffers. This requires MACB to support having multiple "contexts"
in parallel.

Introduce this concept by adding the macb_context struct, which owns all
queue buffers and the parameters associated. We do not yet support
multiple contexts in parallel, because all functions access bp->ctx
(the currently active context) directly.

Steps:

 - Introduce `struct macb_context` and its children `struct macb_rxq`
   and `struct macb_txq`. Context fields are stolen from `struct macb`
   and rxq/txq fields are from `struct macb_queue`.

   Making it two separate structs per queue simplifies accesses: we grab
   a txq/rxq local variable and access fields like txq->head instead of
   queue->tx_head. It also anecdotally improves data locality.

 - macb_init_dflt() / macb_get_ringparam() do not access
   bp->ctx->{rx,tx}_ring_size as they will/might run while interface is
   offline and ctx is not NULL. Instead, introduce
   bp->configured_{rx,tx}_ring_size which get updated on user requests.

 - macb_open() starts by allocating bp->ctx. It gets freed in the
   open error codepath or by macb_close().

 - Guided by compile errors, update all codepaths. Most diff is changing
   `queue->tx_*` to `txq->*` and `queue->rx_*` to `rxq->*`, with a new
   local variable. Also rx_buffer_size / rx_ring_size / tx_ring_size
   move from bp to bp->ctx.

   Introduce two helpers macb_tx|rx() functions to convert macb_queue
   pointers.

 - macb_get_regs() is tweaked to support being ran while interface is
   offline (and context is NULL). Use default values at zero and
   override them only if context is present.

Signed-off-by: Théo Lebrun <theo.lebrun@bootlin.com>
---
 drivers/net/ethernet/cadence/macb.h      |  49 +++-
 drivers/net/ethernet/cadence/macb_main.c | 454 ++++++++++++++++++-------------
 2 files changed, 305 insertions(+), 198 deletions(-)

diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
index 9857df5b57f0..452b2c8f8641 100644
--- a/drivers/net/ethernet/cadence/macb.h
+++ b/drivers/net/ethernet/cadence/macb.h
@@ -1272,21 +1272,10 @@ struct macb_queue {
 
 	/* Lock to protect tx_head and tx_tail */
 	spinlock_t		tx_ptr_lock;
-	unsigned int		tx_head, tx_tail;
-	struct macb_dma_desc	*tx_ring;
-	struct macb_tx_skb	*tx_skb;
-	dma_addr_t		tx_ring_dma;
 	struct work_struct	tx_error_task;
 	bool			txubr_pending;
 	struct napi_struct	napi_tx;
 
-	dma_addr_t		rx_ring_dma;
-	dma_addr_t		rx_buffers_dma;
-	unsigned int		rx_tail;
-	unsigned int		rx_prepared_head;
-	struct macb_dma_desc	*rx_ring;
-	struct sk_buff		**rx_skbuff;
-	void			*rx_buffers;
 	struct napi_struct	napi_rx;
 	struct queue_stats stats;
 };
@@ -1301,6 +1290,32 @@ struct ethtool_rx_fs_list {
 	unsigned int count;
 };
 
+struct macb_rxq {
+	struct macb_dma_desc	*ring;		/* MACB & GEM */
+	dma_addr_t		ring_dma;	/* MACB & GEM */
+	unsigned int		tail;		/* MACB & GEM */
+	unsigned int		prepared_head;	/* GEM */
+	struct sk_buff		**skbuff;	/* GEM */
+	dma_addr_t		buffers_dma;	/* MACB */
+	void			*buffers;	/* MACB */
+};
+
+struct macb_txq {
+	unsigned int		head;
+	unsigned int		tail;
+	struct macb_dma_desc	*ring;
+	dma_addr_t		ring_dma;
+	struct macb_tx_skb	*skb;
+};
+
+struct macb_context {
+	unsigned int		rx_buffer_size;
+	unsigned int		rx_ring_size;
+	unsigned int		tx_ring_size;
+	struct macb_rxq		rxq[MACB_MAX_QUEUES];
+	struct macb_txq		txq[MACB_MAX_QUEUES];
+};
+
 struct macb {
 	void __iomem		*regs;
 	bool			native_io;
@@ -1309,12 +1324,16 @@ struct macb {
 	u32	(*macb_reg_readl)(struct macb *bp, int offset);
 	void	(*macb_reg_writel)(struct macb *bp, int offset, u32 value);
 
+	/*
+	 * Context stores all its parameters.
+	 * But we must remember them across closure.
+	 */
+	unsigned int		configured_rx_ring_size;
+	unsigned int		configured_tx_ring_size;
+	struct macb_context	*ctx;
+
 	struct macb_dma_desc	*rx_ring_tieoff;
 	dma_addr_t		rx_ring_tieoff_dma;
-	size_t			rx_buffer_size;
-
-	unsigned int		rx_ring_size;
-	unsigned int		tx_ring_size;
 
 	unsigned int		num_queues;
 	struct macb_queue	queues[MACB_MAX_QUEUES];
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index ec030801ed68..3e596cbe9fc8 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -61,7 +61,7 @@ struct sifive_fu540_macb_mgmt {
 #define MAX_TX_RING_SIZE	4096
 
 /* level of occupied TX descriptors under which we wake up TX process */
-#define MACB_TX_WAKEUP_THRESH(bp)	(3 * (bp)->tx_ring_size / 4)
+#define MACB_TX_WAKEUP_THRESH(bp)	(3 * (bp)->ctx->tx_ring_size / 4)
 
 #define MACB_RX_INT_FLAGS	(MACB_BIT(RCOMP) | MACB_BIT(ISR_ROVR))
 #define MACB_TX_ERR_FLAGS	(MACB_BIT(ISR_TUND)			\
@@ -152,48 +152,73 @@ static struct macb_dma_desc_64 *macb_64b_desc(struct macb *bp, struct macb_dma_d
 /* Ring buffer accessors */
 static unsigned int macb_tx_ring_wrap(struct macb *bp, unsigned int index)
 {
-	return index & (bp->tx_ring_size - 1);
+	return index & (bp->ctx->tx_ring_size - 1);
+}
+
+static struct macb_txq *macb_txq(struct macb_queue *queue)
+{
+	struct macb *bp = queue->bp;
+	unsigned int q = queue - bp->queues;
+
+	return &bp->ctx->txq[q];
+}
+
+static struct macb_rxq *macb_rxq(struct macb_queue *queue)
+{
+	struct macb *bp = queue->bp;
+	unsigned int q = queue - bp->queues;
+
+	return &bp->ctx->rxq[q];
 }
 
 static struct macb_dma_desc *macb_tx_desc(struct macb_queue *queue,
 					  unsigned int index)
 {
+	struct macb_txq *txq = macb_txq(queue);
+
 	index = macb_tx_ring_wrap(queue->bp, index);
 	index = macb_adj_dma_desc_idx(queue->bp, index);
-	return &queue->tx_ring[index];
+	return &txq->ring[index];
 }
 
 static struct macb_tx_skb *macb_tx_skb(struct macb_queue *queue,
 				       unsigned int index)
 {
-	return &queue->tx_skb[macb_tx_ring_wrap(queue->bp, index)];
+	struct macb_txq *txq = macb_txq(queue);
+
+	return &txq->skb[macb_tx_ring_wrap(queue->bp, index)];
 }
 
 static dma_addr_t macb_tx_dma(struct macb_queue *queue, unsigned int index)
 {
+	struct macb_txq *txq = macb_txq(queue);
 	dma_addr_t offset;
 
 	offset = macb_tx_ring_wrap(queue->bp, index) *
 			macb_dma_desc_get_size(queue->bp);
 
-	return queue->tx_ring_dma + offset;
+	return txq->ring_dma + offset;
 }
 
 static unsigned int macb_rx_ring_wrap(struct macb *bp, unsigned int index)
 {
-	return index & (bp->rx_ring_size - 1);
+	return index & (bp->ctx->rx_ring_size - 1);
 }
 
 static struct macb_dma_desc *macb_rx_desc(struct macb_queue *queue, unsigned int index)
 {
+	struct macb_rxq *rxq = macb_rxq(queue);
+
 	index = macb_rx_ring_wrap(queue->bp, index);
 	index = macb_adj_dma_desc_idx(queue->bp, index);
-	return &queue->rx_ring[index];
+	return &rxq->ring[index];
 }
 
 static void *macb_rx_buffer(struct macb_queue *queue, unsigned int index)
 {
-	return queue->rx_buffers + queue->bp->rx_buffer_size *
+	struct macb_rxq *rxq = macb_rxq(queue);
+
+	return rxq->buffers + queue->bp->ctx->rx_buffer_size *
 	       macb_rx_ring_wrap(queue->bp, index);
 }
 
@@ -463,19 +488,23 @@ static int macb_mdio_write_c45(struct mii_bus *bus, int mii_id,
 static void macb_init_buffers(struct macb *bp)
 {
 	struct macb_queue *queue;
+	struct macb_rxq *rxq;
+	struct macb_txq *txq;
 	unsigned int q;
 
 	/* Single register for all queues' high 32 bits. */
 	if (macb_dma64(bp)) {
-		macb_writel(bp, RBQPH,
-			    upper_32_bits(bp->queues[0].rx_ring_dma));
-		macb_writel(bp, TBQPH,
-			    upper_32_bits(bp->queues[0].tx_ring_dma));
+		rxq = &bp->ctx->rxq[0];
+		txq = &bp->ctx->txq[0];
+		macb_writel(bp, RBQPH, upper_32_bits(rxq->ring_dma));
+		macb_writel(bp, TBQPH, upper_32_bits(txq->ring_dma));
 	}
 
 	for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
-		queue_writel(queue, RBQP, lower_32_bits(queue->rx_ring_dma));
-		queue_writel(queue, TBQP, lower_32_bits(queue->tx_ring_dma));
+		rxq = &bp->ctx->rxq[q];
+		txq = &bp->ctx->txq[q];
+		queue_writel(queue, RBQP, lower_32_bits(rxq->ring_dma));
+		queue_writel(queue, TBQP, lower_32_bits(txq->ring_dma));
 	}
 }
 
@@ -648,11 +677,12 @@ static bool macb_tx_lpi_set(struct macb *bp, bool enable)
 
 static bool macb_tx_all_queues_idle(struct macb *bp)
 {
-	struct macb_queue *queue;
+	struct macb_txq *txq;
 	unsigned int q;
 
-	for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
-		if (READ_ONCE(queue->tx_head) != READ_ONCE(queue->tx_tail))
+	for (q = 0; q < bp->num_queues; ++q) {
+		txq = &bp->ctx->txq[q];
+		if (READ_ONCE(txq->head) != READ_ONCE(txq->tail))
 			return false;
 	}
 	return true;
@@ -799,6 +829,7 @@ static void gem_shuffle_tx_one_ring(struct macb_queue *queue)
 	struct macb_tx_skb tx_skb, *skb_curr, *skb_next;
 	struct macb_dma_desc *desc_curr, *desc_next;
 	unsigned int i, cycles, shift, curr, next;
+	struct macb_txq *txq = macb_txq(queue);
 	struct macb *bp = queue->bp;
 	unsigned char desc[24];
 	unsigned long flags;
@@ -809,17 +840,17 @@ static void gem_shuffle_tx_one_ring(struct macb_queue *queue)
 		return;
 
 	spin_lock_irqsave(&queue->tx_ptr_lock, flags);
-	head = queue->tx_head;
-	tail = queue->tx_tail;
-	ring_size = bp->tx_ring_size;
+	head = txq->head;
+	tail = txq->tail;
+	ring_size = bp->ctx->tx_ring_size;
 	count = CIRC_CNT(head, tail, ring_size);
 
 	if (!(tail % ring_size))
 		goto unlock;
 
 	if (!count) {
-		queue->tx_head = 0;
-		queue->tx_tail = 0;
+		txq->head = 0;
+		txq->tail = 0;
 		goto unlock;
 	}
 
@@ -863,8 +894,8 @@ static void gem_shuffle_tx_one_ring(struct macb_queue *queue)
 		       sizeof(struct macb_tx_skb));
 	}
 
-	queue->tx_head = count;
-	queue->tx_tail = 0;
+	txq->head = count;
+	txq->tail = 0;
 
 	/* Make descriptor updates visible to hardware */
 	wmb();
@@ -1257,6 +1288,7 @@ static void macb_tx_error_task(struct work_struct *work)
 	struct macb_queue *queue = container_of(work, struct macb_queue,
 						tx_error_task);
 	unsigned int q = queue - queue->bp->queues;
+	struct macb_txq *txq = macb_txq(queue);
 	struct macb *bp = queue->bp;
 	struct macb_tx_skb *tx_skb;
 	struct macb_dma_desc *desc;
@@ -1268,7 +1300,7 @@ static void macb_tx_error_task(struct work_struct *work)
 	u32 bytes = 0;
 
 	netdev_vdbg(bp->netdev, "macb_tx_error_task: q = %u, t = %u, h = %u\n",
-		    q, queue->tx_tail, queue->tx_head);
+		    q, txq->tail, txq->head);
 
 	/* Prevent the queue NAPI TX poll from running, as it calls
 	 * macb_tx_complete(), which in turn may call netif_wake_subqueue().
@@ -1295,7 +1327,7 @@ static void macb_tx_error_task(struct work_struct *work)
 	/* Treat frames in TX queue including the ones that caused the error.
 	 * Free transmit buffers in upper layer.
 	 */
-	for (tail = queue->tx_tail; tail != queue->tx_head; tail++) {
+	for (tail = txq->tail; tail != txq->head; tail++) {
 		u32	ctrl;
 
 		desc = macb_tx_desc(queue, tail);
@@ -1353,10 +1385,10 @@ static void macb_tx_error_task(struct work_struct *work)
 	wmb();
 
 	/* Reinitialize the TX desc queue */
-	queue_writel(queue, TBQP, lower_32_bits(queue->tx_ring_dma));
+	queue_writel(queue, TBQP, lower_32_bits(txq->ring_dma));
 	/* Make TX ring reflect state of hardware */
-	queue->tx_head = 0;
-	queue->tx_tail = 0;
+	txq->head = 0;
+	txq->tail = 0;
 
 	/* Housework before enabling TX IRQ */
 	macb_writel(bp, TSR, macb_readl(bp, TSR));
@@ -1406,6 +1438,7 @@ static bool ptp_one_step_sync(struct sk_buff *skb)
 static int macb_tx_complete(struct macb_queue *queue, int budget)
 {
 	struct macb *bp = queue->bp;
+	struct macb_txq *txq = macb_txq(queue);
 	unsigned int q = queue - bp->queues;
 	unsigned long flags;
 	unsigned int tail;
@@ -1414,8 +1447,8 @@ static int macb_tx_complete(struct macb_queue *queue, int budget)
 	u32 bytes = 0;
 
 	spin_lock_irqsave(&queue->tx_ptr_lock, flags);
-	head = queue->tx_head;
-	for (tail = queue->tx_tail; tail != head && packets < budget; tail++) {
+	head = txq->head;
+	for (tail = txq->tail; tail != head && packets < budget; tail++) {
 		struct macb_tx_skb	*tx_skb;
 		struct sk_buff		*skb;
 		struct macb_dma_desc	*desc;
@@ -1471,10 +1504,10 @@ static int macb_tx_complete(struct macb_queue *queue, int budget)
 	netdev_tx_completed_queue(netdev_get_tx_queue(bp->netdev, q),
 				  packets, bytes);
 
-	queue->tx_tail = tail;
+	txq->tail = tail;
 	if (__netif_subqueue_stopped(bp->netdev, q) &&
-	    CIRC_CNT(queue->tx_head, queue->tx_tail,
-		     bp->tx_ring_size) <= MACB_TX_WAKEUP_THRESH(bp))
+	    CIRC_CNT(txq->head, txq->tail,
+		     bp->ctx->tx_ring_size) <= MACB_TX_WAKEUP_THRESH(bp))
 		netif_wake_subqueue(bp->netdev, q);
 	spin_unlock_irqrestore(&queue->tx_ptr_lock, flags);
 
@@ -1486,24 +1519,26 @@ static int macb_tx_complete(struct macb_queue *queue, int budget)
 
 static void gem_rx_refill(struct macb_queue *queue)
 {
+	struct macb_rxq *rxq = macb_rxq(queue);
 	struct macb *bp = queue->bp;
 	struct macb_dma_desc *desc;
 	struct sk_buff *skb;
 	unsigned int entry;
 	dma_addr_t paddr;
 
-	while (CIRC_SPACE(queue->rx_prepared_head, queue->rx_tail,
-			bp->rx_ring_size) > 0) {
-		entry = macb_rx_ring_wrap(bp, queue->rx_prepared_head);
+	while (CIRC_SPACE(rxq->prepared_head, rxq->tail,
+			  bp->ctx->rx_ring_size) > 0) {
+		entry = macb_rx_ring_wrap(bp, rxq->prepared_head);
 
 		/* Make hw descriptor updates visible to CPU */
 		rmb();
 
 		desc = macb_rx_desc(queue, entry);
 
-		if (!queue->rx_skbuff[entry]) {
+		if (!rxq->skbuff[entry]) {
 			/* allocate sk_buff for this free entry in ring */
-			skb = netdev_alloc_skb(bp->netdev, bp->rx_buffer_size);
+			skb = netdev_alloc_skb(bp->netdev,
+					       bp->ctx->rx_buffer_size);
 			if (unlikely(!skb)) {
 				netdev_err(bp->netdev,
 					   "Unable to allocate sk_buff\n");
@@ -1512,16 +1547,16 @@ static void gem_rx_refill(struct macb_queue *queue)
 
 			/* now fill corresponding descriptor entry */
 			paddr = dma_map_single(&bp->pdev->dev, skb->data,
-					       bp->rx_buffer_size,
+					       bp->ctx->rx_buffer_size,
 					       DMA_FROM_DEVICE);
 			if (dma_mapping_error(&bp->pdev->dev, paddr)) {
 				dev_kfree_skb(skb);
 				break;
 			}
 
-			queue->rx_skbuff[entry] = skb;
+			rxq->skbuff[entry] = skb;
 
-			if (entry == bp->rx_ring_size - 1)
+			if (entry == bp->ctx->rx_ring_size - 1)
 				paddr |= MACB_BIT(RX_WRAP);
 			desc->ctrl = 0;
 			/* Setting addr clears RX_USED and allows reception,
@@ -1548,14 +1583,14 @@ static void gem_rx_refill(struct macb_queue *queue)
 			dma_wmb();
 			desc->addr &= ~MACB_BIT(RX_USED);
 		}
-		queue->rx_prepared_head++;
+		rxq->prepared_head++;
 	}
 
 	/* Make descriptor updates visible to hardware */
 	wmb();
 
 	netdev_vdbg(bp->netdev, "rx ring: queue: %p, prepared head %d, tail %d\n",
-		    queue, queue->rx_prepared_head, queue->rx_tail);
+		    queue, rxq->prepared_head, rxq->tail);
 }
 
 /* Mark DMA descriptors from begin up to and not including end as unused */
@@ -1582,6 +1617,7 @@ static void discard_partial_frame(struct macb_queue *queue, unsigned int begin,
 static int gem_rx(struct macb_queue *queue, struct napi_struct *napi,
 		  int budget)
 {
+	struct macb_rxq *rxq = macb_rxq(queue);
 	struct macb *bp = queue->bp;
 	struct macb_dma_desc *desc;
 	struct sk_buff *skb;
@@ -1594,7 +1630,7 @@ static int gem_rx(struct macb_queue *queue, struct napi_struct *napi,
 		dma_addr_t addr;
 		bool rxused;
 
-		entry = macb_rx_ring_wrap(bp, queue->rx_tail);
+		entry = macb_rx_ring_wrap(bp, rxq->tail);
 		desc = macb_rx_desc(queue, entry);
 
 		/* Make hw descriptor updates visible to CPU */
@@ -1611,7 +1647,7 @@ static int gem_rx(struct macb_queue *queue, struct napi_struct *napi,
 
 		ctrl = desc->ctrl;
 
-		queue->rx_tail++;
+		rxq->tail++;
 		count++;
 
 		if (!(ctrl & MACB_BIT(RX_SOF) && ctrl & MACB_BIT(RX_EOF))) {
@@ -1621,7 +1657,7 @@ static int gem_rx(struct macb_queue *queue, struct napi_struct *napi,
 			queue->stats.rx_dropped++;
 			break;
 		}
-		skb = queue->rx_skbuff[entry];
+		skb = rxq->skbuff[entry];
 		if (unlikely(!skb)) {
 			netdev_err(bp->netdev,
 				   "inconsistent Rx descriptor chain\n");
@@ -1630,14 +1666,14 @@ static int gem_rx(struct macb_queue *queue, struct napi_struct *napi,
 			break;
 		}
 		/* now everything is ready for receiving packet */
-		queue->rx_skbuff[entry] = NULL;
+		rxq->skbuff[entry] = NULL;
 		len = ctrl & bp->rx_frm_len_mask;
 
 		netdev_vdbg(bp->netdev, "gem_rx %u (len %u)\n", entry, len);
 
 		skb_put(skb, len);
 		dma_unmap_single(&bp->pdev->dev, addr,
-				 bp->rx_buffer_size, DMA_FROM_DEVICE);
+				 bp->ctx->rx_buffer_size, DMA_FROM_DEVICE);
 
 		skb->protocol = eth_type_trans(skb, bp->netdev);
 		skb_checksum_none_assert(skb);
@@ -1717,7 +1753,7 @@ static int macb_rx_frame(struct macb_queue *queue, struct napi_struct *napi,
 	skb_put(skb, len);
 
 	for (frag = first_frag; ; frag++) {
-		unsigned int frag_len = bp->rx_buffer_size;
+		unsigned int frag_len = bp->ctx->rx_buffer_size;
 
 		if (offset + frag_len > len) {
 			if (unlikely(frag != last_frag)) {
@@ -1729,7 +1765,7 @@ static int macb_rx_frame(struct macb_queue *queue, struct napi_struct *napi,
 		skb_copy_to_linear_data_offset(skb, offset,
 					       macb_rx_buffer(queue, frag),
 					       frag_len);
-		offset += bp->rx_buffer_size;
+		offset += bp->ctx->rx_buffer_size;
 		desc = macb_rx_desc(queue, frag);
 		desc->addr &= ~MACB_BIT(RX_USED);
 
@@ -1754,32 +1790,34 @@ static int macb_rx_frame(struct macb_queue *queue, struct napi_struct *napi,
 
 static inline void macb_init_rx_ring(struct macb_queue *queue)
 {
+	struct macb_rxq *rxq = macb_rxq(queue);
 	struct macb_dma_desc *desc = NULL;
 	struct macb *bp = queue->bp;
 	dma_addr_t addr;
 	int i;
 
-	addr = queue->rx_buffers_dma;
-	for (i = 0; i < bp->rx_ring_size; i++) {
+	addr = rxq->buffers_dma;
+	for (i = 0; i < bp->ctx->rx_ring_size; i++) {
 		desc = macb_rx_desc(queue, i);
 		macb_set_addr(bp, desc, addr);
 		desc->ctrl = 0;
-		addr += bp->rx_buffer_size;
+		addr += bp->ctx->rx_buffer_size;
 	}
 	desc->addr |= MACB_BIT(RX_WRAP);
-	queue->rx_tail = 0;
+	rxq->tail = 0;
 }
 
 static int macb_rx(struct macb_queue *queue, struct napi_struct *napi,
 		   int budget)
 {
+	struct macb_rxq *rxq = macb_rxq(queue);
 	struct macb *bp = queue->bp;
 	bool reset_rx_queue = false;
 	int first_frag = -1;
 	unsigned int tail;
 	int received = 0;
 
-	for (tail = queue->rx_tail; budget > 0; tail++) {
+	for (tail = rxq->tail; budget > 0; tail++) {
 		struct macb_dma_desc *desc = macb_rx_desc(queue, tail);
 		u32 ctrl;
 
@@ -1833,7 +1871,7 @@ static int macb_rx(struct macb_queue *queue, struct napi_struct *napi,
 		macb_writel(bp, NCR, ctrl & ~MACB_BIT(RE));
 
 		macb_init_rx_ring(queue);
-		queue_writel(queue, RBQP, queue->rx_ring_dma);
+		queue_writel(queue, RBQP, rxq->ring_dma);
 
 		macb_writel(bp, NCR, ctrl | MACB_BIT(RE));
 
@@ -1842,20 +1880,21 @@ static int macb_rx(struct macb_queue *queue, struct napi_struct *napi,
 	}
 
 	if (first_frag != -1)
-		queue->rx_tail = first_frag;
+		rxq->tail = first_frag;
 	else
-		queue->rx_tail = tail;
+		rxq->tail = tail;
 
 	return received;
 }
 
 static bool macb_rx_pending(struct macb_queue *queue)
 {
+	struct macb_rxq *rxq = macb_rxq(queue);
 	struct macb *bp = queue->bp;
 	struct macb_dma_desc *desc;
 	unsigned int entry;
 
-	entry = macb_rx_ring_wrap(bp, queue->rx_tail);
+	entry = macb_rx_ring_wrap(bp, rxq->tail);
 	desc = macb_rx_desc(queue, entry);
 
 	/* Make hw descriptor updates visible to CPU */
@@ -1903,18 +1942,19 @@ static int macb_rx_poll(struct napi_struct *napi, int budget)
 
 static void macb_tx_restart(struct macb_queue *queue)
 {
+	struct macb_txq *txq = macb_txq(queue);
 	struct macb *bp = queue->bp;
 	unsigned int head_idx, tbqp;
 	unsigned long flags;
 
 	spin_lock_irqsave(&queue->tx_ptr_lock, flags);
 
-	if (queue->tx_head == queue->tx_tail)
+	if (txq->head == txq->tail)
 		goto out_tx_ptr_unlock;
 
 	tbqp = queue_readl(queue, TBQP) / macb_dma_desc_get_size(bp);
 	tbqp = macb_adj_dma_desc_idx(bp, macb_tx_ring_wrap(bp, tbqp));
-	head_idx = macb_adj_dma_desc_idx(bp, macb_tx_ring_wrap(bp, queue->tx_head));
+	head_idx = macb_adj_dma_desc_idx(bp, macb_tx_ring_wrap(bp, txq->head));
 
 	if (tbqp == head_idx)
 		goto out_tx_ptr_unlock;
@@ -1929,15 +1969,16 @@ static void macb_tx_restart(struct macb_queue *queue)
 
 static bool macb_tx_complete_pending(struct macb_queue *queue)
 {
+	struct macb_txq *txq = macb_txq(queue);
 	bool retval = false;
 	unsigned long flags;
 
 	spin_lock_irqsave(&queue->tx_ptr_lock, flags);
-	if (queue->tx_head != queue->tx_tail) {
+	if (txq->head != txq->tail) {
 		/* Make hw descriptor updates visible to CPU */
 		rmb();
 
-		if (macb_tx_desc(queue, queue->tx_tail)->ctrl & MACB_BIT(TX_USED))
+		if (macb_tx_desc(queue, txq->tail)->ctrl & MACB_BIT(TX_USED))
 			retval = true;
 	}
 	spin_unlock_irqrestore(&queue->tx_ptr_lock, flags);
@@ -2199,8 +2240,9 @@ static unsigned int macb_tx_map(struct macb *bp,
 				struct sk_buff *skb,
 				unsigned int hdrlen)
 {
+	struct macb_txq *txq = macb_txq(queue);
 	unsigned int f, nr_frags = skb_shinfo(skb)->nr_frags;
-	unsigned int len, i, tx_head = queue->tx_head;
+	unsigned int len, i, tx_head = txq->head;
 	u32 ctrl, lso_ctrl = 0, seq_ctrl = 0;
 	unsigned int eof = 1, mss_mfs = 0;
 	struct macb_tx_skb *tx_skb = NULL;
@@ -2320,11 +2362,12 @@ static unsigned int macb_tx_map(struct macb *bp,
 			ctrl |= MACB_BIT(TX_LAST);
 			eof = 0;
 		}
-		if (unlikely(macb_tx_ring_wrap(bp, i) == bp->tx_ring_size - 1))
+		if (unlikely(macb_tx_ring_wrap(bp, i) ==
+				bp->ctx->tx_ring_size - 1))
 			ctrl |= MACB_BIT(TX_WRAP);
 
 		/* First descriptor is header descriptor */
-		if (i == queue->tx_head) {
+		if (i == txq->head) {
 			ctrl |= MACB_BF(TX_LSO, lso_ctrl);
 			ctrl |= MACB_BF(TX_TCP_SEQ_SRC, seq_ctrl);
 			if ((bp->netdev->features & NETIF_F_HW_CSUM) &&
@@ -2344,16 +2387,16 @@ static unsigned int macb_tx_map(struct macb *bp,
 		 */
 		wmb();
 		desc->ctrl = ctrl;
-	} while (i != queue->tx_head);
+	} while (i != txq->head);
 
-	queue->tx_head = tx_head;
+	txq->head = tx_head;
 
 	return 0;
 
 dma_error:
 	netdev_err(bp->netdev, "TX DMA map failed\n");
 
-	for (i = queue->tx_head; i != tx_head; i++) {
+	for (i = txq->head; i != tx_head; i++) {
 		tx_skb = macb_tx_skb(queue, i);
 
 		macb_tx_unmap(bp, tx_skb, 0);
@@ -2473,6 +2516,7 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb,
 	unsigned int q = skb_get_queue_mapping(skb);
 	unsigned int desc_cnt, nr_frags, frag_size, f;
 	struct macb_queue *queue = &bp->queues[q];
+	struct macb_txq *txq = macb_txq(queue);
 	netdev_tx_t ret = NETDEV_TX_OK;
 	unsigned int hdrlen;
 	unsigned long flags;
@@ -2536,11 +2580,11 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb,
 	spin_lock_irqsave(&queue->tx_ptr_lock, flags);
 
 	/* This is a hard error, log it. */
-	if (CIRC_SPACE(queue->tx_head, queue->tx_tail,
-		       bp->tx_ring_size) < desc_cnt) {
+	if (CIRC_SPACE(txq->head, txq->tail,
+		       bp->ctx->tx_ring_size) < desc_cnt) {
 		netif_stop_subqueue(netdev, q);
 		netdev_dbg(netdev, "tx_head = %u, tx_tail = %u\n",
-			   queue->tx_head, queue->tx_tail);
+			   txq->head, txq->tail);
 		ret = NETDEV_TX_BUSY;
 		goto unlock;
 	}
@@ -2562,7 +2606,7 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb,
 	macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(TSTART));
 	spin_unlock(&bp->lock);
 
-	if (CIRC_SPACE(queue->tx_head, queue->tx_tail, bp->tx_ring_size) < 1)
+	if (CIRC_SPACE(txq->head, txq->tail, bp->ctx->tx_ring_size) < 1)
 		netif_stop_subqueue(netdev, q);
 
 unlock:
@@ -2574,38 +2618,42 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb,
 static void macb_init_rx_buffer_size(struct macb *bp, size_t size)
 {
 	if (!macb_is_gem(bp)) {
-		bp->rx_buffer_size = MACB_RX_BUFFER_SIZE;
+		bp->ctx->rx_buffer_size = MACB_RX_BUFFER_SIZE;
 	} else {
-		bp->rx_buffer_size = MIN(size, RX_BUFFER_MAX);
+		bp->ctx->rx_buffer_size = MIN(size, RX_BUFFER_MAX);
 
-		if (bp->rx_buffer_size % RX_BUFFER_MULTIPLE) {
+		if (bp->ctx->rx_buffer_size % RX_BUFFER_MULTIPLE) {
 			netdev_dbg(bp->netdev,
 				   "RX buffer must be multiple of %d bytes, expanding\n",
 				   RX_BUFFER_MULTIPLE);
-			bp->rx_buffer_size =
-				roundup(bp->rx_buffer_size, RX_BUFFER_MULTIPLE);
+			bp->ctx->rx_buffer_size =
+				roundup(bp->ctx->rx_buffer_size,
+					RX_BUFFER_MULTIPLE);
 		}
 	}
 
-	netdev_dbg(bp->netdev, "mtu [%u] rx_buffer_size [%zu]\n",
-		   bp->netdev->mtu, bp->rx_buffer_size);
+	netdev_dbg(bp->netdev, "mtu [%u] rx_buffer_size [%u]\n",
+		   bp->netdev->mtu, bp->ctx->rx_buffer_size);
 }
 
 static void gem_free_rx_buffers(struct macb *bp)
 {
-	struct sk_buff		*skb;
-	struct macb_dma_desc	*desc;
+	struct macb_dma_desc *desc;
 	struct macb_queue *queue;
-	dma_addr_t		addr;
+	struct macb_rxq *rxq;
+	struct sk_buff *skb;
+	dma_addr_t addr;
 	unsigned int q;
 	int i;
 
 	for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
-		if (!queue->rx_skbuff)
+		rxq = &bp->ctx->rxq[q];
+
+		if (!rxq->skbuff)
 			continue;
 
-		for (i = 0; i < bp->rx_ring_size; i++) {
-			skb = queue->rx_skbuff[i];
+		for (i = 0; i < bp->ctx->rx_ring_size; i++) {
+			skb = rxq->skbuff[i];
 
 			if (!skb)
 				continue;
@@ -2613,95 +2661,106 @@ static void gem_free_rx_buffers(struct macb *bp)
 			desc = macb_rx_desc(queue, i);
 			addr = macb_get_addr(bp, desc);
 
-			dma_unmap_single(&bp->pdev->dev, addr, bp->rx_buffer_size,
-					DMA_FROM_DEVICE);
+			dma_unmap_single(&bp->pdev->dev, addr,
+					 bp->ctx->rx_buffer_size,
+					 DMA_FROM_DEVICE);
 			dev_kfree_skb_any(skb);
 			skb = NULL;
 		}
 
-		kfree(queue->rx_skbuff);
-		queue->rx_skbuff = NULL;
+		kfree(rxq->skbuff);
+		rxq->skbuff = NULL;
 	}
 }
 
 static void macb_free_rx_buffers(struct macb *bp)
 {
-	struct macb_queue *queue = &bp->queues[0];
+	struct macb_rxq *rxq = &bp->ctx->rxq[0];
 
-	if (queue->rx_buffers) {
+	if (rxq->buffers) {
 		dma_free_coherent(&bp->pdev->dev,
-				  bp->rx_ring_size * bp->rx_buffer_size,
-				  queue->rx_buffers, queue->rx_buffers_dma);
-		queue->rx_buffers = NULL;
+				  bp->ctx->rx_ring_size *
+					bp->ctx->rx_buffer_size,
+				  rxq->buffers, rxq->buffers_dma);
+		rxq->buffers = NULL;
 	}
 }
 
 static unsigned int macb_tx_ring_size_per_queue(struct macb *bp)
 {
-	return macb_dma_desc_get_size(bp) * bp->tx_ring_size + bp->tx_bd_rd_prefetch;
+	return macb_dma_desc_get_size(bp) * bp->ctx->tx_ring_size +
+		bp->tx_bd_rd_prefetch;
 }
 
 static unsigned int macb_rx_ring_size_per_queue(struct macb *bp)
 {
-	return macb_dma_desc_get_size(bp) * bp->rx_ring_size + bp->rx_bd_rd_prefetch;
+	return macb_dma_desc_get_size(bp) * bp->ctx->rx_ring_size +
+		bp->rx_bd_rd_prefetch;
 }
 
 static void macb_free_consistent(struct macb *bp)
 {
 	struct device *dev = &bp->pdev->dev;
-	struct macb_queue *queue;
+	struct macb_txq *txq;
+	struct macb_rxq *rxq;
 	unsigned int q;
 	size_t size;
 
 	bp->macbgem_ops.mog_free_rx_buffers(bp);
 
+	txq = &bp->ctx->txq[0];
 	size = bp->num_queues * macb_tx_ring_size_per_queue(bp);
-	dma_free_coherent(dev, size, bp->queues[0].tx_ring, bp->queues[0].tx_ring_dma);
+	dma_free_coherent(dev, size, txq->ring, txq->ring_dma);
 
+	rxq = &bp->ctx->rxq[0];
 	size = bp->num_queues * macb_rx_ring_size_per_queue(bp);
-	dma_free_coherent(dev, size, bp->queues[0].rx_ring, bp->queues[0].rx_ring_dma);
+	dma_free_coherent(dev, size, rxq->ring, rxq->ring_dma);
 
-	for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
-		kfree(queue->tx_skb);
-		queue->tx_skb = NULL;
-		queue->tx_ring = NULL;
-		queue->rx_ring = NULL;
+	for (q = 0; q < bp->num_queues; ++q) {
+		txq = &bp->ctx->txq[q];
+		rxq = &bp->ctx->rxq[q];
+
+		kfree(txq->skb);
+		txq->skb = NULL;
+		txq->ring = NULL;
+		rxq->ring = NULL;
 	}
 }
 
 static int gem_alloc_rx_buffers(struct macb *bp)
 {
-	struct macb_queue *queue;
+	struct macb_rxq *rxq;
 	unsigned int q;
 	int size;
 
-	for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
-		size = bp->rx_ring_size * sizeof(struct sk_buff *);
-		queue->rx_skbuff = kzalloc(size, GFP_KERNEL);
-		if (!queue->rx_skbuff)
+	for (q = 0; q < bp->num_queues; ++q) {
+		rxq = &bp->ctx->rxq[q];
+		size = bp->ctx->rx_ring_size * sizeof(struct sk_buff *);
+		rxq->skbuff = kzalloc(size, GFP_KERNEL);
+		if (!rxq->skbuff)
 			return -ENOMEM;
 		else
 			netdev_dbg(bp->netdev,
 				   "Allocated %d RX struct sk_buff entries at %p\n",
-				   bp->rx_ring_size, queue->rx_skbuff);
+				   bp->ctx->rx_ring_size, rxq->skbuff);
 	}
 	return 0;
 }
 
 static int macb_alloc_rx_buffers(struct macb *bp)
 {
-	struct macb_queue *queue = &bp->queues[0];
+	struct macb_rxq *rxq = &bp->ctx->rxq[0];
 	int size;
 
-	size = bp->rx_ring_size * bp->rx_buffer_size;
-	queue->rx_buffers = dma_alloc_coherent(&bp->pdev->dev, size,
-					    &queue->rx_buffers_dma, GFP_KERNEL);
-	if (!queue->rx_buffers)
+	size = bp->ctx->rx_ring_size * bp->ctx->rx_buffer_size;
+	rxq->buffers = dma_alloc_coherent(&bp->pdev->dev, size,
+					  &rxq->buffers_dma, GFP_KERNEL);
+	if (!rxq->buffers)
 		return -ENOMEM;
 
 	netdev_dbg(bp->netdev,
 		   "Allocated RX buffers of %d bytes at %08lx (mapped %p)\n",
-		   size, (unsigned long)queue->rx_buffers_dma, queue->rx_buffers);
+		   size, (unsigned long)rxq->buffers_dma, rxq->buffers);
 	return 0;
 }
 
@@ -2709,7 +2768,8 @@ static int macb_alloc_consistent(struct macb *bp)
 {
 	struct device *dev = &bp->pdev->dev;
 	dma_addr_t tx_dma, rx_dma;
-	struct macb_queue *queue;
+	struct macb_txq *txq;
+	struct macb_rxq *rxq;
 	unsigned int q;
 	void *tx, *rx;
 	size_t size;
@@ -2735,16 +2795,19 @@ static int macb_alloc_consistent(struct macb *bp)
 	netdev_dbg(bp->netdev, "Allocated %zu bytes for %u RX rings at %08lx (mapped %p)\n",
 		   size, bp->num_queues, (unsigned long)rx_dma, rx);
 
-	for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
-		queue->tx_ring = tx + macb_tx_ring_size_per_queue(bp) * q;
-		queue->tx_ring_dma = tx_dma + macb_tx_ring_size_per_queue(bp) * q;
+	for (q = 0; q < bp->num_queues; ++q) {
+		txq = &bp->ctx->txq[q];
+		rxq = &bp->ctx->rxq[q];
 
-		queue->rx_ring = rx + macb_rx_ring_size_per_queue(bp) * q;
-		queue->rx_ring_dma = rx_dma + macb_rx_ring_size_per_queue(bp) * q;
+		txq->ring = tx + macb_tx_ring_size_per_queue(bp) * q;
+		txq->ring_dma = tx_dma + macb_tx_ring_size_per_queue(bp) * q;
 
-		size = bp->tx_ring_size * sizeof(struct macb_tx_skb);
-		queue->tx_skb = kmalloc(size, GFP_KERNEL);
-		if (!queue->tx_skb)
+		rxq->ring = rx + macb_rx_ring_size_per_queue(bp) * q;
+		rxq->ring_dma = rx_dma + macb_rx_ring_size_per_queue(bp) * q;
+
+		size = bp->ctx->tx_ring_size * sizeof(struct macb_tx_skb);
+		txq->skb = kmalloc(size, GFP_KERNEL);
+		if (!txq->skb)
 			goto out_err;
 	}
 	if (bp->macbgem_ops.mog_alloc_rx_buffers(bp))
@@ -2759,8 +2822,10 @@ static int macb_alloc_consistent(struct macb *bp)
 
 static void gem_init_rx_ring(struct macb_queue *queue)
 {
-	queue->rx_tail = 0;
-	queue->rx_prepared_head = 0;
+	struct macb_rxq *rxq = macb_rxq(queue);
+
+	rxq->tail = 0;
+	rxq->prepared_head = 0;
 
 	gem_rx_refill(queue);
 }
@@ -2769,18 +2834,20 @@ static void gem_init_rings(struct macb *bp)
 {
 	struct macb_queue *queue;
 	struct macb_dma_desc *desc = NULL;
+	struct macb_txq *txq;
 	unsigned int q;
 	int i;
 
 	for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
-		for (i = 0; i < bp->tx_ring_size; i++) {
+		txq = &bp->ctx->txq[q];
+		for (i = 0; i < bp->ctx->tx_ring_size; i++) {
 			desc = macb_tx_desc(queue, i);
 			macb_set_addr(bp, desc, 0);
 			desc->ctrl = MACB_BIT(TX_USED);
 		}
 		desc->ctrl |= MACB_BIT(TX_WRAP);
-		queue->tx_head = 0;
-		queue->tx_tail = 0;
+		txq->head = 0;
+		txq->tail = 0;
 
 		gem_init_rx_ring(queue);
 	}
@@ -2788,18 +2855,19 @@ static void gem_init_rings(struct macb *bp)
 
 static void macb_init_rings(struct macb *bp)
 {
-	int i;
+	struct macb_txq *txq = &bp->ctx->txq[0];
 	struct macb_dma_desc *desc = NULL;
+	int i;
 
 	macb_init_rx_ring(&bp->queues[0]);
 
-	for (i = 0; i < bp->tx_ring_size; i++) {
+	for (i = 0; i < bp->ctx->tx_ring_size; i++) {
 		desc = macb_tx_desc(&bp->queues[0], i);
 		macb_set_addr(bp, desc, 0);
 		desc->ctrl = MACB_BIT(TX_USED);
 	}
-	bp->queues[0].tx_head = 0;
-	bp->queues[0].tx_tail = 0;
+	txq->head = 0;
+	txq->tail = 0;
 	desc->ctrl |= MACB_BIT(TX_WRAP);
 }
 
@@ -2914,7 +2982,7 @@ static void macb_configure_dma(struct macb *bp)
 	unsigned int q;
 	u32 dmacfg;
 
-	buffer_size = bp->rx_buffer_size / RX_BUFFER_MULTIPLE;
+	buffer_size = bp->ctx->rx_buffer_size / RX_BUFFER_MULTIPLE;
 	if (macb_is_gem(bp)) {
 		dmacfg = gem_readl(bp, DMACFG) & ~GEM_BF(RXBS, -1L);
 		for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
@@ -3121,14 +3189,22 @@ static int macb_open(struct net_device *netdev)
 	if (err < 0)
 		return err;
 
+	bp->ctx = kzalloc_obj(*bp->ctx);
+	if (!bp->ctx) {
+		err = -ENOMEM;
+		goto pm_exit;
+	}
+
 	/* RX buffers initialization */
 	macb_init_rx_buffer_size(bp, bufsz);
+	bp->ctx->rx_ring_size = bp->configured_rx_ring_size;
+	bp->ctx->tx_ring_size = bp->configured_tx_ring_size;
 
 	err = macb_alloc_consistent(bp);
 	if (err) {
 		netdev_err(netdev, "Unable to allocate DMA memory (error %d)\n",
 			   err);
-		goto pm_exit;
+		goto free_ctx;
 	}
 
 	bp->macbgem_ops.mog_init_rings(bp);
@@ -3170,6 +3246,9 @@ static int macb_open(struct net_device *netdev)
 		napi_disable(&queue->napi_tx);
 	}
 	macb_free_consistent(bp);
+free_ctx:
+	kfree(bp->ctx);
+	bp->ctx = NULL;
 pm_exit:
 	pm_runtime_put_sync(&bp->pdev->dev);
 	return err;
@@ -3203,6 +3282,8 @@ static int macb_close(struct net_device *netdev)
 	spin_unlock_irqrestore(&bp->lock, flags);
 
 	macb_free_consistent(bp);
+	kfree(bp->ctx);
+	bp->ctx = NULL;
 
 	if (bp->ptp_info)
 		bp->ptp_info->ptp_remove(netdev);
@@ -3568,15 +3649,22 @@ static int macb_get_regs_len(struct net_device *netdev)
 static void macb_get_regs(struct net_device *netdev, struct ethtool_regs *regs,
 			  void *p)
 {
+	dma_addr_t tx_dma_tail = 0, tx_dma_head = 0;
 	struct macb *bp = netdev_priv(netdev);
-	unsigned int tail, head;
+	unsigned int tail = 0, head = 0;
+	struct macb_txq *txq;
 	u32 *regs_buff = p;
 
 	regs->version = (macb_readl(bp, MID) & ((1 << MACB_REV_SIZE) - 1))
 			| MACB_GREGS_VERSION;
 
-	tail = macb_tx_ring_wrap(bp, bp->queues[0].tx_tail);
-	head = macb_tx_ring_wrap(bp, bp->queues[0].tx_head);
+	if (bp->ctx) {
+		txq = &bp->ctx->txq[0];
+		tail = macb_tx_ring_wrap(bp, txq->tail);
+		head = macb_tx_ring_wrap(bp, txq->head);
+		tx_dma_tail = macb_tx_dma(&bp->queues[0], tail);
+		tx_dma_head = macb_tx_dma(&bp->queues[0], head);
+	}
 
 	regs_buff[0]  = macb_readl(bp, NCR);
 	regs_buff[1]  = macb_or_gem_readl(bp, NCFGR);
@@ -3589,8 +3677,8 @@ static void macb_get_regs(struct net_device *netdev, struct ethtool_regs *regs,
 
 	regs_buff[8]  = tail;
 	regs_buff[9]  = head;
-	regs_buff[10] = macb_tx_dma(&bp->queues[0], tail);
-	regs_buff[11] = macb_tx_dma(&bp->queues[0], head);
+	regs_buff[10] = tx_dma_tail;
+	regs_buff[11] = tx_dma_head;
 
 	if (!(bp->caps & MACB_CAPS_USRIO_DISABLED))
 		regs_buff[12] = macb_or_gem_readl(bp, USRIO);
@@ -3655,8 +3743,8 @@ static void macb_get_ringparam(struct net_device *netdev,
 	ring->rx_max_pending = MAX_RX_RING_SIZE;
 	ring->tx_max_pending = MAX_TX_RING_SIZE;
 
-	ring->rx_pending = bp->rx_ring_size;
-	ring->tx_pending = bp->tx_ring_size;
+	ring->rx_pending = bp->configured_rx_ring_size;
+	ring->tx_pending = bp->configured_tx_ring_size;
 }
 
 static int macb_set_ringparam(struct net_device *netdev,
@@ -3679,8 +3767,8 @@ static int macb_set_ringparam(struct net_device *netdev,
 			      MIN_TX_RING_SIZE, MAX_TX_RING_SIZE);
 	new_tx_size = roundup_pow_of_two(new_tx_size);
 
-	if ((new_tx_size == bp->tx_ring_size) &&
-	    (new_rx_size == bp->rx_ring_size)) {
+	if (new_tx_size == bp->configured_tx_ring_size &&
+	    new_rx_size == bp->configured_rx_ring_size) {
 		/* nothing to do */
 		return 0;
 	}
@@ -3690,8 +3778,8 @@ static int macb_set_ringparam(struct net_device *netdev,
 		macb_close(bp->netdev);
 	}
 
-	bp->rx_ring_size = new_rx_size;
-	bp->tx_ring_size = new_tx_size;
+	bp->configured_rx_ring_size = new_rx_size;
+	bp->configured_tx_ring_size = new_tx_size;
 
 	if (reset)
 		macb_open(bp->netdev);
@@ -4698,9 +4786,6 @@ static int macb_init_dflt(struct platform_device *pdev)
 	int err;
 	u32 val, reg;
 
-	bp->tx_ring_size = DEFAULT_TX_RING_SIZE;
-	bp->rx_ring_size = DEFAULT_RX_RING_SIZE;
-
 	/* set the queue register mapping once for all: queue0 has a special
 	 * register mapping but we don't want to test the queue index then
 	 * compute the corresponding register offset at run time.
@@ -4906,26 +4991,26 @@ static struct sifive_fu540_macb_mgmt *mgmt;
 
 static int at91ether_alloc_coherent(struct macb *bp)
 {
-	struct macb_queue *queue = &bp->queues[0];
+	struct macb_rxq *rxq = &bp->ctx->rxq[0];
 
-	queue->rx_ring = dma_alloc_coherent(&bp->pdev->dev,
-					    (AT91ETHER_MAX_RX_DESCR *
-					     macb_dma_desc_get_size(bp)),
-					    &queue->rx_ring_dma, GFP_KERNEL);
-	if (!queue->rx_ring)
+	rxq->ring = dma_alloc_coherent(&bp->pdev->dev,
+				       (AT91ETHER_MAX_RX_DESCR *
+					macb_dma_desc_get_size(bp)),
+				       &rxq->ring_dma, GFP_KERNEL);
+	if (!rxq->ring)
 		return -ENOMEM;
 
-	queue->rx_buffers = dma_alloc_coherent(&bp->pdev->dev,
-					       AT91ETHER_MAX_RX_DESCR *
-					       AT91ETHER_MAX_RBUFF_SZ,
-					       &queue->rx_buffers_dma,
-					       GFP_KERNEL);
-	if (!queue->rx_buffers) {
+	rxq->buffers = dma_alloc_coherent(&bp->pdev->dev,
+					  AT91ETHER_MAX_RX_DESCR *
+					  AT91ETHER_MAX_RBUFF_SZ,
+					  &rxq->buffers_dma,
+					  GFP_KERNEL);
+	if (!rxq->buffers) {
 		dma_free_coherent(&bp->pdev->dev,
 				  AT91ETHER_MAX_RX_DESCR *
 				  macb_dma_desc_get_size(bp),
-				  queue->rx_ring, queue->rx_ring_dma);
-		queue->rx_ring = NULL;
+				  rxq->ring, rxq->ring_dma);
+		rxq->ring = NULL;
 		return -ENOMEM;
 	}
 
@@ -4934,22 +5019,22 @@ static int at91ether_alloc_coherent(struct macb *bp)
 
 static void at91ether_free_coherent(struct macb *bp)
 {
-	struct macb_queue *queue = &bp->queues[0];
+	struct macb_rxq *rxq = &bp->ctx->rxq[0];
 
-	if (queue->rx_ring) {
+	if (rxq->ring) {
 		dma_free_coherent(&bp->pdev->dev,
 				  AT91ETHER_MAX_RX_DESCR *
 				  macb_dma_desc_get_size(bp),
-				  queue->rx_ring, queue->rx_ring_dma);
-		queue->rx_ring = NULL;
+				  rxq->ring, rxq->ring_dma);
+		rxq->ring = NULL;
 	}
 
-	if (queue->rx_buffers) {
+	if (rxq->buffers) {
 		dma_free_coherent(&bp->pdev->dev,
 				  AT91ETHER_MAX_RX_DESCR *
 				  AT91ETHER_MAX_RBUFF_SZ,
-				  queue->rx_buffers, queue->rx_buffers_dma);
-		queue->rx_buffers = NULL;
+				  rxq->buffers, rxq->buffers_dma);
+		rxq->buffers = NULL;
 	}
 }
 
@@ -4957,6 +5042,7 @@ static void at91ether_free_coherent(struct macb *bp)
 static int at91ether_start(struct macb *bp)
 {
 	struct macb_queue *queue = &bp->queues[0];
+	struct macb_rxq *rxq = &bp->ctx->rxq[0];
 	struct macb_dma_desc *desc;
 	dma_addr_t addr;
 	u32 ctl;
@@ -4966,7 +5052,7 @@ static int at91ether_start(struct macb *bp)
 	if (ret)
 		return ret;
 
-	addr = queue->rx_buffers_dma;
+	addr = rxq->buffers_dma;
 	for (i = 0; i < AT91ETHER_MAX_RX_DESCR; i++) {
 		desc = macb_rx_desc(queue, i);
 		macb_set_addr(bp, desc, addr);
@@ -4978,10 +5064,10 @@ static int at91ether_start(struct macb *bp)
 	desc->addr |= MACB_BIT(RX_WRAP);
 
 	/* Reset buffer index */
-	queue->rx_tail = 0;
+	rxq->tail = 0;
 
 	/* Program address of descriptor list in Rx Buffer Queue register */
-	macb_writel(bp, RBQP, queue->rx_ring_dma);
+	macb_writel(bp, RBQP, rxq->ring_dma);
 
 	/* Enable Receive and Transmit */
 	ctl = macb_readl(bp, NCR);
@@ -5119,15 +5205,15 @@ static void at91ether_rx(struct net_device *netdev)
 {
 	struct macb *bp = netdev_priv(netdev);
 	struct macb_queue *queue = &bp->queues[0];
+	struct macb_rxq *rxq = &bp->ctx->rxq[0];
 	struct macb_dma_desc *desc;
 	unsigned char *p_recv;
 	struct sk_buff *skb;
 	unsigned int pktlen;
 
-	desc = macb_rx_desc(queue, queue->rx_tail);
+	desc = macb_rx_desc(queue, rxq->tail);
 	while (desc->addr & MACB_BIT(RX_USED)) {
-		p_recv = queue->rx_buffers +
-			 queue->rx_tail * AT91ETHER_MAX_RBUFF_SZ;
+		p_recv = rxq->buffers + rxq->tail * AT91ETHER_MAX_RBUFF_SZ;
 		pktlen = MACB_BF(RX_FRMLEN, desc->ctrl);
 		skb = netdev_alloc_skb(netdev, pktlen + 2);
 		if (skb) {
@@ -5149,12 +5235,12 @@ static void at91ether_rx(struct net_device *netdev)
 		desc->addr &= ~MACB_BIT(RX_USED);
 
 		/* wrap after last buffer */
-		if (queue->rx_tail == AT91ETHER_MAX_RX_DESCR - 1)
-			queue->rx_tail = 0;
+		if (rxq->tail == AT91ETHER_MAX_RX_DESCR - 1)
+			rxq->tail = 0;
 		else
-			queue->rx_tail++;
+			rxq->tail++;
 
-		desc = macb_rx_desc(queue, queue->rx_tail);
+		desc = macb_rx_desc(queue, rxq->tail);
 	}
 }
 
@@ -5807,6 +5893,8 @@ static int macb_probe(struct platform_device *pdev)
 	bp->rx_clk = rx_clk;
 	bp->tsu_clk = tsu_clk;
 	bp->jumbo_max_len = macb_config->jumbo_max_len;
+	bp->configured_rx_ring_size = DEFAULT_RX_RING_SIZE;
+	bp->configured_tx_ring_size = DEFAULT_TX_RING_SIZE;
 
 	if (!hw_is_gem(bp->regs, bp->native_io))
 		bp->max_tx_length = MACB_MAX_TX_LEN;

-- 
2.53.0


^ permalink raw reply related

* [PATCH net-next v2 07/14] net: macb: avoid macb_init_rx_buffer_size() modifying state
From: Théo Lebrun @ 2026-04-10 19:51 UTC (permalink / raw)
  To: Nicolas Ferre, Claudiu Beznea, Andrew Lunn, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Richard Cochran,
	Russell King
  Cc: Paolo Valerio, Conor Dooley, Nicolai Buchwitz,
	Vladimir Kondratiev, Gregory CLEMENT, Benoît Monin,
	Tawfik Bayouk, Thomas Petazzoni, Maxime Chevallier, netdev,
	linux-kernel, Théo Lebrun
In-Reply-To: <20260410-macb-context-v2-0-af39f71d40b6@bootlin.com>

macb_init_rx_buffer_size() takes the macb private data struct and
overrides its bp->ctx->rx_buffer_size. To make it usable with multiple
contexts, make it return its value.

Also, move the `bufsz` computation into it. The value is only used if
GEM, and for historical reason it currently lives in macb_open().

Signed-off-by: Théo Lebrun <theo.lebrun@bootlin.com>
---
 drivers/net/ethernet/cadence/macb_main.c | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index 3e596cbe9fc8..2eddc7892073 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -2615,25 +2615,26 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb,
 	return ret;
 }
 
-static void macb_init_rx_buffer_size(struct macb *bp, size_t size)
+static unsigned int macb_rx_buffer_size(struct macb *bp, unsigned int mtu)
 {
-	if (!macb_is_gem(bp)) {
-		bp->ctx->rx_buffer_size = MACB_RX_BUFFER_SIZE;
-	} else {
-		bp->ctx->rx_buffer_size = MIN(size, RX_BUFFER_MAX);
+	unsigned int size;
 
-		if (bp->ctx->rx_buffer_size % RX_BUFFER_MULTIPLE) {
+	if (!macb_is_gem(bp)) {
+		size = MACB_RX_BUFFER_SIZE;
+	} else {
+		size = mtu + ETH_HLEN + ETH_FCS_LEN + NET_IP_ALIGN;
+		size = MIN(size, RX_BUFFER_MAX);
+
+		if (size % RX_BUFFER_MULTIPLE) {
 			netdev_dbg(bp->netdev,
 				   "RX buffer must be multiple of %d bytes, expanding\n",
 				   RX_BUFFER_MULTIPLE);
-			bp->ctx->rx_buffer_size =
-				roundup(bp->ctx->rx_buffer_size,
-					RX_BUFFER_MULTIPLE);
+			size = roundup(size, RX_BUFFER_MULTIPLE);
 		}
 	}
 
-	netdev_dbg(bp->netdev, "mtu [%u] rx_buffer_size [%u]\n",
-		   bp->netdev->mtu, bp->ctx->rx_buffer_size);
+	netdev_dbg(bp->netdev, "mtu [%u] rx_buffer_size [%u]\n", mtu, size);
+	return size;
 }
 
 static void gem_free_rx_buffers(struct macb *bp)
@@ -3177,7 +3178,6 @@ static void macb_set_rx_mode(struct net_device *netdev)
 
 static int macb_open(struct net_device *netdev)
 {
-	size_t bufsz = netdev->mtu + ETH_HLEN + ETH_FCS_LEN + NET_IP_ALIGN;
 	struct macb *bp = netdev_priv(netdev);
 	struct macb_queue *queue;
 	unsigned int q;
@@ -3196,7 +3196,7 @@ static int macb_open(struct net_device *netdev)
 	}
 
 	/* RX buffers initialization */
-	macb_init_rx_buffer_size(bp, bufsz);
+	bp->ctx->rx_buffer_size = macb_rx_buffer_size(bp, netdev->mtu);
 	bp->ctx->rx_ring_size = bp->configured_rx_ring_size;
 	bp->ctx->tx_ring_size = bp->configured_tx_ring_size;
 

-- 
2.53.0


^ permalink raw reply related

* [PATCH net-next v2 08/14] net: macb: make `struct macb` subset reachable from macb_context struct
From: Théo Lebrun @ 2026-04-10 19:51 UTC (permalink / raw)
  To: Nicolas Ferre, Claudiu Beznea, Andrew Lunn, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Richard Cochran,
	Russell King
  Cc: Paolo Valerio, Conor Dooley, Nicolai Buchwitz,
	Vladimir Kondratiev, Gregory CLEMENT, Benoît Monin,
	Tawfik Bayouk, Thomas Petazzoni, Maxime Chevallier, netdev,
	linux-kernel, Théo Lebrun
In-Reply-To: <20260410-macb-context-v2-0-af39f71d40b6@bootlin.com>

For parallel MACB context to start become a reality, many functions need
to stop operating on bp->ctx (the currently active context) and instead
work on a context they get passed. That context might be
(1) the new one that is getting allocated and initialised, or,
(2) the old one to be freed.

To reduce bug surface area, we will taint those functions to *only* take
a context and no `struct macb *bp`. That way, no bug of using `bp->ctx`
instead of `ctx` will ever occur.

For that, we need to embed a subset of `struct macb` information into
each context so that all helpers can still do their jobs. That subset
must be constant once probe is completed. Do this by taking a pointer
to a subset of macb called `struct macb_info`.

That subset is accessible from context (ctx->info->caps) or
from bp (bp->caps) using `-fms-extensions` option, thanks to
commit c4781dc3d1cf ("Kbuild: enable -fms-extensions").
https://gcc.gnu.org/onlinedocs/gcc/Unnamed-Fields.html

Add the structure and assign ctx->info at alloc,
but nothing uses it yet.

Signed-off-by: Théo Lebrun <theo.lebrun@bootlin.com>
---
 drivers/net/ethernet/cadence/macb.h      | 30 +++++++++++++++++++++---------
 drivers/net/ethernet/cadence/macb_main.c |  2 ++
 2 files changed, 23 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
index 452b2c8f8641..5ce1b1045e6a 100644
--- a/drivers/net/ethernet/cadence/macb.h
+++ b/drivers/net/ethernet/cadence/macb.h
@@ -1290,6 +1290,16 @@ struct ethtool_rx_fs_list {
 	unsigned int count;
 };
 
+struct macb_info {
+	struct platform_device	*pdev;
+	struct net_device	*netdev;
+	struct macb_or_gem_ops	macbgem_ops;
+	unsigned int		num_queues;
+	u32			caps;
+	int			rx_bd_rd_prefetch;
+	int			tx_bd_rd_prefetch;
+};
+
 struct macb_rxq {
 	struct macb_dma_desc	*ring;		/* MACB & GEM */
 	dma_addr_t		ring_dma;	/* MACB & GEM */
@@ -1309,6 +1319,8 @@ struct macb_txq {
 };
 
 struct macb_context {
+	const struct macb_info	*info;
+
 	unsigned int		rx_buffer_size;
 	unsigned int		rx_ring_size;
 	unsigned int		tx_ring_size;
@@ -1324,6 +1336,15 @@ struct macb {
 	u32	(*macb_reg_readl)(struct macb *bp, int offset);
 	void	(*macb_reg_writel)(struct macb *bp, int offset, u32 value);
 
+	/*
+	 * Give direct access (bp->caps) and
+	 * allow taking a pointer to it (&bp->info) for contexts.
+	 */
+	union {
+		struct macb_info;
+		struct macb_info info;
+	};
+
 	/*
 	 * Context stores all its parameters.
 	 * But we must remember them across closure.
@@ -1335,17 +1356,14 @@ struct macb {
 	struct macb_dma_desc	*rx_ring_tieoff;
 	dma_addr_t		rx_ring_tieoff_dma;
 
-	unsigned int		num_queues;
 	struct macb_queue	queues[MACB_MAX_QUEUES];
 
 	spinlock_t		lock;
-	struct platform_device	*pdev;
 	struct clk		*pclk;
 	struct clk		*hclk;
 	struct clk		*tx_clk;
 	struct clk		*rx_clk;
 	struct clk		*tsu_clk;
-	struct net_device	*netdev;
 	/* Protects hw_stats and ethtool_stats */
 	spinlock_t		stats_lock;
 	union {
@@ -1353,15 +1371,12 @@ struct macb {
 		struct gem_stats	gem;
 	}			hw_stats;
 
-	struct macb_or_gem_ops	macbgem_ops;
-
 	struct mii_bus		*mii_bus;
 	struct phylink		*phylink;
 	struct phylink_config	phylink_config;
 	struct phylink_pcs	phylink_usx_pcs;
 	struct phylink_pcs	phylink_sgmii_pcs;
 
-	u32			caps;
 	unsigned int		dma_burst_length;
 
 	phy_interface_t		phy_interface;
@@ -1404,9 +1419,6 @@ struct macb {
 	struct delayed_work	tx_lpi_work;
 	u32			tx_lpi_timer;
 
-	int	rx_bd_rd_prefetch;
-	int	tx_bd_rd_prefetch;
-
 	u32	rx_intr_mask;
 
 	struct macb_pm_data pm_data;
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index 2eddc7892073..9e35c25b7a56 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -3195,6 +3195,8 @@ static int macb_open(struct net_device *netdev)
 		goto pm_exit;
 	}
 
+	bp->ctx->info = &bp->info;
+
 	/* RX buffers initialization */
 	bp->ctx->rx_buffer_size = macb_rx_buffer_size(bp, netdev->mtu);
 	bp->ctx->rx_ring_size = bp->configured_rx_ring_size;

-- 
2.53.0


^ permalink raw reply related

* [PATCH net-next v2 09/14] net: macb: change caps helpers signatures
From: Théo Lebrun @ 2026-04-10 19:51 UTC (permalink / raw)
  To: Nicolas Ferre, Claudiu Beznea, Andrew Lunn, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Richard Cochran,
	Russell King
  Cc: Paolo Valerio, Conor Dooley, Nicolai Buchwitz,
	Vladimir Kondratiev, Gregory CLEMENT, Benoît Monin,
	Tawfik Bayouk, Thomas Petazzoni, Maxime Chevallier, netdev,
	linux-kernel, Théo Lebrun
In-Reply-To: <20260410-macb-context-v2-0-af39f71d40b6@bootlin.com>

For parallel MACB context to start become a reality, many functions will
soon not have access to `struct macb *bp`. Those will still have access
to caps through ctx->info->caps.

Change all caps helpers signatures, from taking `struct macb *bp` to
taking `u32 caps`.

Function list:

   macb_is_gem()
   gem_has_ptp()
   macb_dma64()
   macb_dma_ptp()
   macb_dma_desc_get_size()
   macb_set_addr()
   macb_get_addr()

Note: drop macb_64b_desc(bp, ...) parameter; it is unused and it must
be dropped as macb_{set,get}_addr() call macb_64b_desc().

Signed-off-by: Théo Lebrun <theo.lebrun@bootlin.com>
---
 drivers/net/ethernet/cadence/macb.h      |  21 ++---
 drivers/net/ethernet/cadence/macb_main.c | 133 ++++++++++++++++---------------
 drivers/net/ethernet/cadence/macb_ptp.c  |   8 +-
 3 files changed, 82 insertions(+), 80 deletions(-)

diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
index 5ce1b1045e6a..0c11d2805848 100644
--- a/drivers/net/ethernet/cadence/macb.h
+++ b/drivers/net/ethernet/cadence/macb.h
@@ -840,7 +840,7 @@
  */
 #define macb_or_gem_writel(__bp, __reg, __value) \
 	({ \
-		if (macb_is_gem((__bp))) \
+		if (macb_is_gem((__bp)->caps)) \
 			gem_writel((__bp), __reg, __value); \
 		else \
 			macb_writel((__bp), __reg, __value); \
@@ -849,7 +849,7 @@
 #define macb_or_gem_readl(__bp, __reg) \
 	({ \
 		u32 __v; \
-		if (macb_is_gem((__bp))) \
+		if (macb_is_gem((__bp)->caps)) \
 			__v = gem_readl((__bp), __reg); \
 		else \
 			__v = macb_readl((__bp), __reg); \
@@ -1470,14 +1470,15 @@ static inline void gem_ptp_do_txstamp(struct macb *bp, struct sk_buff *skb, stru
 static inline void gem_ptp_do_rxstamp(struct macb *bp, struct sk_buff *skb, struct macb_dma_desc *desc) { }
 #endif
 
-static inline bool macb_is_gem(struct macb *bp)
+static inline bool macb_is_gem(u32 caps)
 {
-	return !!(bp->caps & MACB_CAPS_MACB_IS_GEM);
+	return !!(caps & MACB_CAPS_MACB_IS_GEM);
 }
 
-static inline bool gem_has_ptp(struct macb *bp)
+static inline bool gem_has_ptp(u32 caps)
 {
-	return IS_ENABLED(CONFIG_MACB_USE_HWSTAMP) && (bp->caps & MACB_CAPS_GEM_HAS_PTP);
+	return IS_ENABLED(CONFIG_MACB_USE_HWSTAMP) &&
+	       (caps & MACB_CAPS_GEM_HAS_PTP);
 }
 
 /* ENST Helper functions */
@@ -1493,16 +1494,16 @@ static inline u64 enst_max_hw_interval(u32 speed_mbps)
 			    ENST_TIME_GRANULARITY_NS * 1000, (speed_mbps));
 }
 
-static inline bool macb_dma64(struct macb *bp)
+static inline bool macb_dma64(u32 caps)
 {
 	return IS_ENABLED(CONFIG_ARCH_DMA_ADDR_T_64BIT) &&
-	       bp->caps & MACB_CAPS_DMA_64B;
+	       caps & MACB_CAPS_DMA_64B;
 }
 
-static inline bool macb_dma_ptp(struct macb *bp)
+static inline bool macb_dma_ptp(u32 caps)
 {
 	return IS_ENABLED(CONFIG_MACB_USE_HWSTAMP) &&
-	       bp->caps & MACB_CAPS_DMA_PTP;
+	       caps & MACB_CAPS_DMA_PTP;
 }
 
 static inline void macb_queue_isr_clear(struct macb *bp,
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index 9e35c25b7a56..f66f1a174bb4 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -126,13 +126,13 @@ struct sifive_fu540_macb_mgmt {
  *    word 5: timestamp word 1
  *    word 6: timestamp word 2
  */
-static unsigned int macb_dma_desc_get_size(struct macb *bp)
+static unsigned int macb_dma_desc_get_size(u32 caps)
 {
 	unsigned int desc_size = sizeof(struct macb_dma_desc);
 
-	if (macb_dma64(bp))
+	if (macb_dma64(caps))
 		desc_size += sizeof(struct macb_dma_desc_64);
-	if (macb_dma_ptp(bp))
+	if (macb_dma_ptp(caps))
 		desc_size += sizeof(struct macb_dma_desc_ptp);
 
 	return desc_size;
@@ -140,10 +140,10 @@ static unsigned int macb_dma_desc_get_size(struct macb *bp)
 
 static unsigned int macb_adj_dma_desc_idx(struct macb *bp, unsigned int desc_idx)
 {
-	return desc_idx * (1 + macb_dma64(bp) + macb_dma_ptp(bp));
+	return desc_idx * (1 + macb_dma64(bp->caps) + macb_dma_ptp(bp->caps));
 }
 
-static struct macb_dma_desc_64 *macb_64b_desc(struct macb *bp, struct macb_dma_desc *desc)
+static struct macb_dma_desc_64 *macb_64b_desc(struct macb_dma_desc *desc)
 {
 	return (struct macb_dma_desc_64 *)((void *)desc
 		+ sizeof(struct macb_dma_desc));
@@ -195,7 +195,7 @@ static dma_addr_t macb_tx_dma(struct macb_queue *queue, unsigned int index)
 	dma_addr_t offset;
 
 	offset = macb_tx_ring_wrap(queue->bp, index) *
-			macb_dma_desc_get_size(queue->bp);
+			macb_dma_desc_get_size(queue->bp->caps);
 
 	return txq->ring_dma + offset;
 }
@@ -282,7 +282,7 @@ static void macb_set_hwaddr(struct macb *bp)
 	top = get_unaligned_le16(bp->netdev->dev_addr + 4);
 	macb_or_gem_writel(bp, SA1T, top);
 
-	if (gem_has_ptp(bp)) {
+	if (gem_has_ptp(bp->caps)) {
 		gem_writel(bp, RXPTPUNI, bottom);
 		gem_writel(bp, TXPTPUNI, bottom);
 	}
@@ -493,7 +493,7 @@ static void macb_init_buffers(struct macb *bp)
 	unsigned int q;
 
 	/* Single register for all queues' high 32 bits. */
-	if (macb_dma64(bp)) {
+	if (macb_dma64(bp->caps)) {
 		rxq = &bp->ctx->rxq[0];
 		txq = &bp->ctx->txq[0];
 		macb_writel(bp, RBQPH, upper_32_bits(rxq->ring_dma));
@@ -776,7 +776,7 @@ static void macb_mac_config(struct phylink_config *config, unsigned int mode,
 	if (bp->caps & MACB_CAPS_MACB_IS_EMAC) {
 		if (state->interface == PHY_INTERFACE_MODE_RMII)
 			ctrl |= MACB_BIT(RM9200_RMII);
-	} else if (macb_is_gem(bp)) {
+	} else if (macb_is_gem(bp->caps)) {
 		ctrl &= ~(GEM_BIT(SGMIIEN) | GEM_BIT(PCSSEL));
 		ncr &= ~GEM_BIT(ENABLE_HS_MAC);
 
@@ -834,7 +834,7 @@ static void gem_shuffle_tx_one_ring(struct macb_queue *queue)
 	unsigned char desc[24];
 	unsigned long flags;
 
-	desc_size = macb_dma_desc_get_size(bp);
+	desc_size = macb_dma_desc_get_size(bp->caps);
 
 	if (WARN_ON_ONCE(desc_size > ARRAY_SIZE(desc)))
 		return;
@@ -941,7 +941,7 @@ static void macb_mac_link_up(struct phylink_config *config,
 
 	if (!(bp->caps & MACB_CAPS_MACB_IS_EMAC)) {
 		ctrl &= ~MACB_BIT(PAE);
-		if (macb_is_gem(bp)) {
+		if (macb_is_gem(bp->caps)) {
 			ctrl &= ~GEM_BIT(GBE);
 
 			if (speed == SPEED_1000)
@@ -972,7 +972,7 @@ static void macb_mac_link_up(struct phylink_config *config,
 
 	/* Enable Rx and Tx; Enable PTP unicast */
 	ctrl = macb_readl(bp, NCR);
-	if (gem_has_ptp(bp))
+	if (gem_has_ptp(bp->caps))
 		ctrl |= MACB_BIT(PTPUNI);
 
 	macb_writel(bp, NCR, ctrl | MACB_BIT(RE) | MACB_BIT(TE));
@@ -1082,7 +1082,8 @@ static int macb_mii_probe(struct net_device *netdev)
 		  bp->phylink_config.supported_interfaces);
 
 	/* Determine what modes are supported */
-	if (macb_is_gem(bp) && (bp->caps & MACB_CAPS_GIGABIT_MODE_AVAILABLE)) {
+	if (macb_is_gem(bp->caps) &&
+	    (bp->caps & MACB_CAPS_GIGABIT_MODE_AVAILABLE)) {
 		bp->phylink_config.mac_capabilities |= MAC_1000FD;
 		if (!(bp->caps & MACB_CAPS_NO_GIGABIT_HALF))
 			bp->phylink_config.mac_capabilities |= MAC_1000HD;
@@ -1250,12 +1251,12 @@ static void macb_tx_unmap(struct macb *bp, struct macb_tx_skb *tx_skb, int budge
 	}
 }
 
-static void macb_set_addr(struct macb *bp, struct macb_dma_desc *desc, dma_addr_t addr)
+static void macb_set_addr(u32 caps, struct macb_dma_desc *desc, dma_addr_t addr)
 {
-	if (macb_dma64(bp)) {
+	if (macb_dma64(caps)) {
 		struct macb_dma_desc_64 *desc_64;
 
-		desc_64 = macb_64b_desc(bp, desc);
+		desc_64 = macb_64b_desc(desc);
 		desc_64->addrh = upper_32_bits(addr);
 		/* The low bits of RX address contain the RX_USED bit, clearing
 		 * of which allows packet RX. Make sure the high bits are also
@@ -1267,18 +1268,18 @@ static void macb_set_addr(struct macb *bp, struct macb_dma_desc *desc, dma_addr_
 	desc->addr = lower_32_bits(addr);
 }
 
-static dma_addr_t macb_get_addr(struct macb *bp, struct macb_dma_desc *desc)
+static dma_addr_t macb_get_addr(u32 caps, struct macb_dma_desc *desc)
 {
 	dma_addr_t addr = 0;
 
-	if (macb_dma64(bp)) {
+	if (macb_dma64(caps)) {
 		struct macb_dma_desc_64 *desc_64;
 
-		desc_64 = macb_64b_desc(bp, desc);
+		desc_64 = macb_64b_desc(desc);
 		addr = ((u64)(desc_64->addrh) << 32);
 	}
 	addr |= MACB_BF(RX_WADDR, MACB_BFEXT(RX_WADDR, desc->addr));
-	if (macb_dma_ptp(bp))
+	if (macb_dma_ptp(caps))
 		addr &= ~GEM_BIT(DMA_RXVALID);
 	return addr;
 }
@@ -1378,7 +1379,7 @@ static void macb_tx_error_task(struct work_struct *work)
 
 	/* Set end of TX queue */
 	desc = macb_tx_desc(queue, 0);
-	macb_set_addr(bp, desc, 0);
+	macb_set_addr(bp->caps, desc, 0);
 	desc->ctrl = MACB_BIT(TX_USED);
 
 	/* Make descriptor updates visible to hardware */
@@ -1563,7 +1564,7 @@ static void gem_rx_refill(struct macb_queue *queue)
 			 * make sure ctrl is cleared first to avoid a race.
 			 */
 			dma_wmb();
-			macb_set_addr(bp, desc, paddr);
+			macb_set_addr(bp->caps, desc, paddr);
 
 			/* Properly align Ethernet header.
 			 *
@@ -1637,7 +1638,7 @@ static int gem_rx(struct macb_queue *queue, struct napi_struct *napi,
 		rmb();
 
 		rxused = (desc->addr & MACB_BIT(RX_USED)) ? true : false;
-		addr = macb_get_addr(bp, desc);
+		addr = macb_get_addr(bp->caps, desc);
 
 		if (!rxused)
 			break;
@@ -1799,7 +1800,7 @@ static inline void macb_init_rx_ring(struct macb_queue *queue)
 	addr = rxq->buffers_dma;
 	for (i = 0; i < bp->ctx->rx_ring_size; i++) {
 		desc = macb_rx_desc(queue, i);
-		macb_set_addr(bp, desc, addr);
+		macb_set_addr(bp->caps, desc, addr);
 		desc->ctrl = 0;
 		addr += bp->ctx->rx_buffer_size;
 	}
@@ -1952,7 +1953,7 @@ static void macb_tx_restart(struct macb_queue *queue)
 	if (txq->head == txq->tail)
 		goto out_tx_ptr_unlock;
 
-	tbqp = queue_readl(queue, TBQP) / macb_dma_desc_get_size(bp);
+	tbqp = queue_readl(queue, TBQP) / macb_dma_desc_get_size(bp->caps);
 	tbqp = macb_adj_dma_desc_idx(bp, macb_tx_ring_wrap(bp, tbqp));
 	head_idx = macb_adj_dma_desc_idx(bp, macb_tx_ring_wrap(bp, txq->head));
 
@@ -2129,7 +2130,7 @@ static int macb_interrupt_misc(struct macb_queue *queue, u32 status)
 	if (status & MACB_BIT(ISR_ROVR)) {
 		/* We missed at least one packet */
 		spin_lock(&bp->stats_lock);
-		if (macb_is_gem(bp))
+		if (macb_is_gem(bp->caps))
 			bp->hw_stats.gem.rx_overruns++;
 		else
 			bp->hw_stats.macb.rx_overruns++;
@@ -2143,7 +2144,7 @@ static int macb_interrupt_misc(struct macb_queue *queue, u32 status)
 		macb_queue_isr_clear(bp, queue, MACB_BIT(HRESP));
 	}
 
-	if (macb_is_gem(bp)) {
+	if (macb_is_gem(bp->caps)) {
 		if (status & GEM_BIT(WOL))
 			gem_wol_interrupt(queue, status);
 	} else {
@@ -2381,7 +2382,7 @@ static unsigned int macb_tx_map(struct macb *bp,
 			ctrl |= MACB_BF(MSS_MFS, mss_mfs);
 
 		/* Set TX buffer descriptor */
-		macb_set_addr(bp, desc, tx_skb->mapping);
+		macb_set_addr(bp->caps, desc, tx_skb->mapping);
 		/* desc->addr must be visible to hardware before clearing
 		 * 'TX_USED' bit in desc->ctrl.
 		 */
@@ -2532,7 +2533,7 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb,
 		return ret;
 	}
 
-	if (macb_dma_ptp(bp) &&
+	if (macb_dma_ptp(bp->caps) &&
 	    (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))
 		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
 
@@ -2619,7 +2620,7 @@ static unsigned int macb_rx_buffer_size(struct macb *bp, unsigned int mtu)
 {
 	unsigned int size;
 
-	if (!macb_is_gem(bp)) {
+	if (!macb_is_gem(bp->caps)) {
 		size = MACB_RX_BUFFER_SIZE;
 	} else {
 		size = mtu + ETH_HLEN + ETH_FCS_LEN + NET_IP_ALIGN;
@@ -2660,7 +2661,7 @@ static void gem_free_rx_buffers(struct macb *bp)
 				continue;
 
 			desc = macb_rx_desc(queue, i);
-			addr = macb_get_addr(bp, desc);
+			addr = macb_get_addr(bp->caps, desc);
 
 			dma_unmap_single(&bp->pdev->dev, addr,
 					 bp->ctx->rx_buffer_size,
@@ -2689,13 +2690,13 @@ static void macb_free_rx_buffers(struct macb *bp)
 
 static unsigned int macb_tx_ring_size_per_queue(struct macb *bp)
 {
-	return macb_dma_desc_get_size(bp) * bp->ctx->tx_ring_size +
+	return macb_dma_desc_get_size(bp->caps) * bp->ctx->tx_ring_size +
 		bp->tx_bd_rd_prefetch;
 }
 
 static unsigned int macb_rx_ring_size_per_queue(struct macb *bp)
 {
-	return macb_dma_desc_get_size(bp) * bp->ctx->rx_ring_size +
+	return macb_dma_desc_get_size(bp->caps) * bp->ctx->rx_ring_size +
 		bp->rx_bd_rd_prefetch;
 }
 
@@ -2843,7 +2844,7 @@ static void gem_init_rings(struct macb *bp)
 		txq = &bp->ctx->txq[q];
 		for (i = 0; i < bp->ctx->tx_ring_size; i++) {
 			desc = macb_tx_desc(queue, i);
-			macb_set_addr(bp, desc, 0);
+			macb_set_addr(bp->caps, desc, 0);
 			desc->ctrl = MACB_BIT(TX_USED);
 		}
 		desc->ctrl |= MACB_BIT(TX_WRAP);
@@ -2864,7 +2865,7 @@ static void macb_init_rings(struct macb *bp)
 
 	for (i = 0; i < bp->ctx->tx_ring_size; i++) {
 		desc = macb_tx_desc(&bp->queues[0], i);
-		macb_set_addr(bp, desc, 0);
+		macb_set_addr(bp->caps, desc, 0);
 		desc->ctrl = MACB_BIT(TX_USED);
 	}
 	txq->head = 0;
@@ -2933,7 +2934,7 @@ static u32 macb_mdc_clk_div(struct macb *bp)
 	u32 config;
 	unsigned long pclk_hz;
 
-	if (macb_is_gem(bp))
+	if (macb_is_gem(bp->caps))
 		return gem_mdc_clk_div(bp);
 
 	pclk_hz = clk_get_rate(bp->pclk);
@@ -2955,7 +2956,7 @@ static u32 macb_mdc_clk_div(struct macb *bp)
  */
 static u32 macb_dbw(struct macb *bp)
 {
-	if (!macb_is_gem(bp))
+	if (!macb_is_gem(bp->caps))
 		return 0;
 
 	switch (GEM_BFEXT(DBWDEF, gem_readl(bp, DCFG1))) {
@@ -2984,7 +2985,7 @@ static void macb_configure_dma(struct macb *bp)
 	u32 dmacfg;
 
 	buffer_size = bp->ctx->rx_buffer_size / RX_BUFFER_MULTIPLE;
-	if (macb_is_gem(bp)) {
+	if (macb_is_gem(bp->caps)) {
 		dmacfg = gem_readl(bp, DMACFG) & ~GEM_BF(RXBS, -1L);
 		for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
 			if (q)
@@ -3008,9 +3009,9 @@ static void macb_configure_dma(struct macb *bp)
 			dmacfg &= ~GEM_BIT(TXCOEN);
 
 		dmacfg &= ~GEM_BIT(ADDR64);
-		if (macb_dma64(bp))
+		if (macb_dma64(bp->caps))
 			dmacfg |= GEM_BIT(ADDR64);
-		if (macb_dma_ptp(bp))
+		if (macb_dma_ptp(bp->caps))
 			dmacfg |= GEM_BIT(RXEXT) | GEM_BIT(TXEXT);
 		netdev_dbg(bp->netdev, "Cadence configure DMA with 0x%08x\n",
 			   dmacfg);
@@ -3038,7 +3039,7 @@ static void macb_init_hw(struct macb *bp)
 		config |= MACB_BIT(BIG);	/* Receive oversized frames */
 	if (bp->netdev->flags & IFF_PROMISC)
 		config |= MACB_BIT(CAF);	/* Copy All Frames */
-	else if (macb_is_gem(bp) && bp->netdev->features & NETIF_F_RXCSUM)
+	else if (macb_is_gem(bp->caps) && bp->netdev->features & NETIF_F_RXCSUM)
 		config |= GEM_BIT(RXCOEN);
 	if (!(bp->netdev->flags & IFF_BROADCAST))
 		config |= MACB_BIT(NBC);	/* No BroadCast */
@@ -3146,14 +3147,14 @@ static void macb_set_rx_mode(struct net_device *netdev)
 		cfg |= MACB_BIT(CAF);
 
 		/* Disable RX checksum offload */
-		if (macb_is_gem(bp))
+		if (macb_is_gem(bp->caps))
 			cfg &= ~GEM_BIT(RXCOEN);
 	} else {
 		/* Disable promiscuous mode */
 		cfg &= ~MACB_BIT(CAF);
 
 		/* Enable RX checksum offload only if requested */
-		if (macb_is_gem(bp) && netdev->features & NETIF_F_RXCSUM)
+		if (macb_is_gem(bp->caps) && netdev->features & NETIF_F_RXCSUM)
 			cfg |= GEM_BIT(RXCOEN);
 	}
 
@@ -3436,7 +3437,7 @@ static void macb_get_stats(struct net_device *netdev,
 	struct macb_stats *hwstat = &bp->hw_stats.macb;
 
 	netdev_stats_to_stats64(nstat, &bp->netdev->stats);
-	if (macb_is_gem(bp)) {
+	if (macb_is_gem(bp->caps)) {
 		gem_get_stats(bp, nstat);
 		return;
 	}
@@ -3684,7 +3685,7 @@ static void macb_get_regs(struct net_device *netdev, struct ethtool_regs *regs,
 
 	if (!(bp->caps & MACB_CAPS_USRIO_DISABLED))
 		regs_buff[12] = macb_or_gem_readl(bp, USRIO);
-	if (macb_is_gem(bp))
+	if (macb_is_gem(bp->caps))
 		regs_buff[13] = gem_readl(bp, DMACFG);
 }
 
@@ -3816,7 +3817,7 @@ static int gem_get_ts_info(struct net_device *netdev,
 {
 	struct macb *bp = netdev_priv(netdev);
 
-	if (!macb_dma_ptp(bp)) {
+	if (!macb_dma_ptp(bp->caps)) {
 		ethtool_op_get_ts_info(netdev, info);
 		return 0;
 	}
@@ -3917,7 +3918,7 @@ static void gem_prog_cmp_regs(struct macb *bp, struct ethtool_rx_flow_spec *fs)
 	bool cmp_b = false;
 	bool cmp_c = false;
 
-	if (!macb_is_gem(bp))
+	if (!macb_is_gem(bp->caps))
 		return;
 
 	tp4sp_v = &(fs->h_u.tcp_ip4_spec);
@@ -4278,7 +4279,7 @@ static inline void macb_set_txcsum_feature(struct macb *bp,
 {
 	u32 val;
 
-	if (!macb_is_gem(bp))
+	if (!macb_is_gem(bp->caps))
 		return;
 
 	val = gem_readl(bp, DMACFG);
@@ -4296,7 +4297,7 @@ static inline void macb_set_rxcsum_feature(struct macb *bp,
 	struct net_device *netdev = bp->netdev;
 	u32 val;
 
-	if (!macb_is_gem(bp))
+	if (!macb_is_gem(bp->caps))
 		return;
 
 	val = gem_readl(bp, NCFGR);
@@ -4311,7 +4312,7 @@ static inline void macb_set_rxcsum_feature(struct macb *bp,
 static inline void macb_set_rxflow_feature(struct macb *bp,
 					   netdev_features_t features)
 {
-	if (!macb_is_gem(bp))
+	if (!macb_is_gem(bp->caps))
 		return;
 
 	gem_enable_flow_filters(bp, !!(features & NETIF_F_NTUPLE));
@@ -4630,7 +4631,7 @@ static void macb_configure_caps(struct macb *bp,
 			bp->caps |= MACB_CAPS_FIFO_MODE;
 		if (GEM_BFEXT(PBUF_RSC, gem_readl(bp, DCFG6)))
 			bp->caps |= MACB_CAPS_RSC;
-		if (gem_has_ptp(bp)) {
+		if (gem_has_ptp(bp->caps)) {
 			if (!GEM_BFEXT(TSU, gem_readl(bp, DCFG5)))
 				dev_err(&bp->pdev->dev,
 					"GEM doesn't support hardware ptp.\n");
@@ -4842,7 +4843,7 @@ static int macb_init_dflt(struct platform_device *pdev)
 	netdev->netdev_ops = &macb_netdev_ops;
 
 	/* setup appropriated routines according to adapter type */
-	if (macb_is_gem(bp)) {
+	if (macb_is_gem(bp->caps)) {
 		bp->macbgem_ops.mog_alloc_rx_buffers = gem_alloc_rx_buffers;
 		bp->macbgem_ops.mog_free_rx_buffers = gem_free_rx_buffers;
 		bp->macbgem_ops.mog_init_rings = gem_init_rings;
@@ -4871,7 +4872,7 @@ static int macb_init_dflt(struct platform_device *pdev)
 		netdev->hw_features |= MACB_NETIF_LSO;
 
 	/* Checksum offload is only available on gem with packet buffer */
-	if (macb_is_gem(bp) && !(bp->caps & MACB_CAPS_FIFO_MODE))
+	if (macb_is_gem(bp->caps) && !(bp->caps & MACB_CAPS_FIFO_MODE))
 		netdev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_RXCSUM;
 	if (bp->caps & MACB_CAPS_SG_DISABLED)
 		netdev->hw_features &= ~NETIF_F_SG;
@@ -4997,7 +4998,7 @@ static int at91ether_alloc_coherent(struct macb *bp)
 
 	rxq->ring = dma_alloc_coherent(&bp->pdev->dev,
 				       (AT91ETHER_MAX_RX_DESCR *
-					macb_dma_desc_get_size(bp)),
+					macb_dma_desc_get_size(bp->caps)),
 				       &rxq->ring_dma, GFP_KERNEL);
 	if (!rxq->ring)
 		return -ENOMEM;
@@ -5010,7 +5011,7 @@ static int at91ether_alloc_coherent(struct macb *bp)
 	if (!rxq->buffers) {
 		dma_free_coherent(&bp->pdev->dev,
 				  AT91ETHER_MAX_RX_DESCR *
-				  macb_dma_desc_get_size(bp),
+				  macb_dma_desc_get_size(bp->caps),
 				  rxq->ring, rxq->ring_dma);
 		rxq->ring = NULL;
 		return -ENOMEM;
@@ -5026,7 +5027,7 @@ static void at91ether_free_coherent(struct macb *bp)
 	if (rxq->ring) {
 		dma_free_coherent(&bp->pdev->dev,
 				  AT91ETHER_MAX_RX_DESCR *
-				  macb_dma_desc_get_size(bp),
+				  macb_dma_desc_get_size(bp->caps),
 				  rxq->ring, rxq->ring_dma);
 		rxq->ring = NULL;
 	}
@@ -5057,7 +5058,7 @@ static int at91ether_start(struct macb *bp)
 	addr = rxq->buffers_dma;
 	for (i = 0; i < AT91ETHER_MAX_RX_DESCR; i++) {
 		desc = macb_rx_desc(queue, i);
-		macb_set_addr(bp, desc, addr);
+		macb_set_addr(bp->caps, desc, addr);
 		desc->ctrl = 0;
 		addr += AT91ETHER_MAX_RBUFF_SZ;
 	}
@@ -5572,13 +5573,13 @@ static int macb_alloc_tieoff(struct macb *bp)
 		return 0;
 
 	bp->rx_ring_tieoff = dma_alloc_coherent(&bp->pdev->dev,
-						macb_dma_desc_get_size(bp),
+						macb_dma_desc_get_size(bp->caps),
 						&bp->rx_ring_tieoff_dma,
 						GFP_KERNEL);
 	if (!bp->rx_ring_tieoff)
 		return -ENOMEM;
 
-	macb_set_addr(bp, bp->rx_ring_tieoff,
+	macb_set_addr(bp->caps, bp->rx_ring_tieoff,
 		      MACB_BIT(RX_WRAP) | MACB_BIT(RX_USED));
 
 	bp->rx_ring_tieoff->ctrl = 0;
@@ -5591,7 +5592,7 @@ static void macb_free_tieoff(struct macb *bp)
 	if (!bp->rx_ring_tieoff)
 		return;
 
-	dma_free_coherent(&bp->pdev->dev, macb_dma_desc_get_size(bp),
+	dma_free_coherent(&bp->pdev->dev, macb_dma_desc_get_size(bp->caps),
 			  bp->rx_ring_tieoff,
 			  bp->rx_ring_tieoff_dma);
 	bp->rx_ring_tieoff = NULL;
@@ -5972,12 +5973,12 @@ static int macb_probe(struct platform_device *pdev)
 		val = GEM_BFEXT(RXBD_RDBUFF, gem_readl(bp, DCFG10));
 		if (val)
 			bp->rx_bd_rd_prefetch = (2 << (val - 1)) *
-						macb_dma_desc_get_size(bp);
+						macb_dma_desc_get_size(bp->caps);
 
 		val = GEM_BFEXT(TXBD_RDBUFF, gem_readl(bp, DCFG10));
 		if (val)
 			bp->tx_bd_rd_prefetch = (2 << (val - 1)) *
-						macb_dma_desc_get_size(bp);
+						macb_dma_desc_get_size(bp->caps);
 	}
 
 	bp->rx_intr_mask = MACB_RX_INT_FLAGS;
@@ -6022,7 +6023,7 @@ static int macb_probe(struct platform_device *pdev)
 	INIT_DELAYED_WORK(&bp->tx_lpi_work, macb_tx_lpi_work_fn);
 
 	netdev_info(netdev, "Cadence %s rev 0x%08x at 0x%08lx irq %d (%pM)\n",
-		    macb_is_gem(bp) ? "GEM" : "MACB", macb_readl(bp, MID),
+		    macb_is_gem(bp->caps) ? "GEM" : "MACB", macb_readl(bp, MID),
 		    netdev->base_addr, netdev->irq, netdev->dev_addr);
 
 	pm_runtime_put_autosuspend(&bp->pdev->dev);
@@ -6150,7 +6151,7 @@ static int __maybe_unused macb_suspend(struct device *dev)
 			tmp |= MACB_BFEXT(IP, ifa_local);
 		}
 
-		if (macb_is_gem(bp)) {
+		if (macb_is_gem(bp->caps)) {
 			queue_writel(bp->queues, IER, GEM_BIT(WOL));
 			gem_writel(bp, WOL, tmp);
 		} else {
@@ -6212,7 +6213,7 @@ static int __maybe_unused macb_resume(struct device *dev)
 	if (bp->wol & MACB_WOL_ENABLED) {
 		spin_lock_irqsave(&bp->lock, flags);
 		/* Disable WoL */
-		if (macb_is_gem(bp)) {
+		if (macb_is_gem(bp->caps)) {
 			queue_writel(bp->queues, IDR, GEM_BIT(WOL));
 			gem_writel(bp, WOL, 0);
 		} else {
@@ -6240,7 +6241,7 @@ static int __maybe_unused macb_resume(struct device *dev)
 	for (q = 0, queue = bp->queues; q < bp->num_queues;
 	     ++q, ++queue) {
 		if (!(bp->caps & MACB_CAPS_MACB_IS_EMAC)) {
-			if (macb_is_gem(bp))
+			if (macb_is_gem(bp->caps))
 				gem_init_rx_ring(queue);
 			else
 				macb_init_rx_ring(queue);
diff --git a/drivers/net/ethernet/cadence/macb_ptp.c b/drivers/net/ethernet/cadence/macb_ptp.c
index e5195d7dac1d..2070508fd2e0 100644
--- a/drivers/net/ethernet/cadence/macb_ptp.c
+++ b/drivers/net/ethernet/cadence/macb_ptp.c
@@ -28,10 +28,10 @@
 static struct macb_dma_desc_ptp *macb_ptp_desc(struct macb *bp,
 					       struct macb_dma_desc *desc)
 {
-	if (!macb_dma_ptp(bp))
+	if (!macb_dma_ptp(bp->caps))
 		return NULL;
 
-	if (macb_dma64(bp))
+	if (macb_dma64(bp->caps))
 		return (struct macb_dma_desc_ptp *)
 				((u8 *)desc + sizeof(struct macb_dma_desc)
 				+ sizeof(struct macb_dma_desc_64));
@@ -384,7 +384,7 @@ int gem_get_hwtst(struct net_device *netdev,
 	struct macb *bp = netdev_priv(netdev);
 
 	*tstamp_config = bp->tstamp_config;
-	if (!macb_dma_ptp(bp))
+	if (!macb_dma_ptp(bp->caps))
 		return -EOPNOTSUPP;
 
 	return 0;
@@ -411,7 +411,7 @@ int gem_set_hwtst(struct net_device *netdev,
 	struct macb *bp = netdev_priv(netdev);
 	u32 regval;
 
-	if (!macb_dma_ptp(bp))
+	if (!macb_dma_ptp(bp->caps))
 		return -EOPNOTSUPP;
 
 	switch (tstamp_config->tx_type) {

-- 
2.53.0


^ permalink raw reply related

* [PATCH net-next v2 10/14] net: macb: change function signatures to take contexts
From: Théo Lebrun @ 2026-04-10 19:51 UTC (permalink / raw)
  To: Nicolas Ferre, Claudiu Beznea, Andrew Lunn, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Richard Cochran,
	Russell King
  Cc: Paolo Valerio, Conor Dooley, Nicolai Buchwitz,
	Vladimir Kondratiev, Gregory CLEMENT, Benoît Monin,
	Tawfik Bayouk, Thomas Petazzoni, Maxime Chevallier, netdev,
	linux-kernel, Théo Lebrun
In-Reply-To: <20260410-macb-context-v2-0-af39f71d40b6@bootlin.com>

For parallel MACB context to start become a reality, many functions need
to stop operating on bp->ctx (the currently active context) and instead
work on a context they get passed. That context might be
(1) the new one that is getting allocated and initialised, or,
(2) the old one to be freed.

To reduce bug surface area, taint those functions to *only* take a
context `struct macb_context *ctx` and no `struct macb *bp`. That way,
no bug of using `bp->ctx` instead of `ctx` will ever occur.

We also convert functions that take a `struct macb_queue *queue` to
instead take `struct macb_context *ctx, unsigned int q`, with q
indexing ctx->txq[] and ctx->rxq[].

Full list:

   macb_adj_dma_desc_idx()
   macb_tx_ring_wrap()
   macb_tx_desc()
   macb_rx_ring_wrap()
   macb_rx_desc()
   gem_rx_refill()
   macb_init_rx_ring()
   gem_free_rx_buffers()
   macb_free_rx_buffers()
   macb_tx_ring_size_per_queue()
   macb_rx_ring_size_per_queue()
   macb_free_consistent()
   gem_alloc_rx_buffers()
   macb_alloc_rx_buffers()
   macb_alloc_consistent()
   gem_init_rx_ring()
   gem_init_rings()
   macb_init_rings()

Note about gem_rx_refill(): it ends with a netdev_vdbg() that prints the
queue pointer. Change to print the queue index because we do not have
access to the queue anymore.

Signed-off-by: Théo Lebrun <theo.lebrun@bootlin.com>
---
 drivers/net/ethernet/cadence/macb.h      |   7 +-
 drivers/net/ethernet/cadence/macb_main.c | 372 ++++++++++++++++---------------
 2 files changed, 202 insertions(+), 177 deletions(-)

diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
index 0c11d2805848..bc55a54ac9b7 100644
--- a/drivers/net/ethernet/cadence/macb.h
+++ b/drivers/net/ethernet/cadence/macb.h
@@ -1196,11 +1196,12 @@ static const struct gem_statistic queue_statistics[] = {
 
 struct macb;
 struct macb_queue;
+struct macb_context;
 
 struct macb_or_gem_ops {
-	int	(*mog_alloc_rx_buffers)(struct macb *bp);
-	void	(*mog_free_rx_buffers)(struct macb *bp);
-	void	(*mog_init_rings)(struct macb *bp);
+	int	(*mog_alloc_rx_buffers)(struct macb_context *ctx);
+	void	(*mog_free_rx_buffers)(struct macb_context *ctx);
+	void	(*mog_init_rings)(struct macb_context *ctx);
 	int	(*mog_rx)(struct macb_queue *queue, struct napi_struct *napi,
 			  int budget);
 };
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index f66f1a174bb4..71d60d8d1993 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -138,9 +138,11 @@ static unsigned int macb_dma_desc_get_size(u32 caps)
 	return desc_size;
 }
 
-static unsigned int macb_adj_dma_desc_idx(struct macb *bp, unsigned int desc_idx)
+static unsigned int macb_adj_dma_desc_idx(struct macb_context *ctx,
+					  unsigned int desc_idx)
 {
-	return desc_idx * (1 + macb_dma64(bp->caps) + macb_dma_ptp(bp->caps));
+	return desc_idx * (1 + macb_dma64(ctx->info->caps) +
+			       macb_dma_ptp(ctx->info->caps));
 }
 
 static struct macb_dma_desc_64 *macb_64b_desc(struct macb_dma_desc *desc)
@@ -150,9 +152,10 @@ static struct macb_dma_desc_64 *macb_64b_desc(struct macb_dma_desc *desc)
 }
 
 /* Ring buffer accessors */
-static unsigned int macb_tx_ring_wrap(struct macb *bp, unsigned int index)
+static unsigned int macb_tx_ring_wrap(struct macb_context *ctx,
+				      unsigned int index)
 {
-	return index & (bp->ctx->tx_ring_size - 1);
+	return index & (ctx->tx_ring_size - 1);
 }
 
 static struct macb_txq *macb_txq(struct macb_queue *queue)
@@ -171,14 +174,13 @@ static struct macb_rxq *macb_rxq(struct macb_queue *queue)
 	return &bp->ctx->rxq[q];
 }
 
-static struct macb_dma_desc *macb_tx_desc(struct macb_queue *queue,
+static struct macb_dma_desc *macb_tx_desc(struct macb_context *ctx,
+					  unsigned int q,
 					  unsigned int index)
 {
-	struct macb_txq *txq = macb_txq(queue);
-
-	index = macb_tx_ring_wrap(queue->bp, index);
-	index = macb_adj_dma_desc_idx(queue->bp, index);
-	return &txq->ring[index];
+	index = macb_tx_ring_wrap(ctx, index);
+	index = macb_adj_dma_desc_idx(ctx, index);
+	return &ctx->txq[q].ring[index];
 }
 
 static struct macb_tx_skb *macb_tx_skb(struct macb_queue *queue,
@@ -186,40 +188,42 @@ static struct macb_tx_skb *macb_tx_skb(struct macb_queue *queue,
 {
 	struct macb_txq *txq = macb_txq(queue);
 
-	return &txq->skb[macb_tx_ring_wrap(queue->bp, index)];
+	return &txq->skb[macb_tx_ring_wrap(queue->bp->ctx, index)];
 }
 
 static dma_addr_t macb_tx_dma(struct macb_queue *queue, unsigned int index)
 {
+	struct macb_context *ctx = queue->bp->ctx;
 	struct macb_txq *txq = macb_txq(queue);
 	dma_addr_t offset;
 
-	offset = macb_tx_ring_wrap(queue->bp, index) *
+	offset = macb_tx_ring_wrap(ctx, index) *
 			macb_dma_desc_get_size(queue->bp->caps);
 
 	return txq->ring_dma + offset;
 }
 
-static unsigned int macb_rx_ring_wrap(struct macb *bp, unsigned int index)
+static unsigned int macb_rx_ring_wrap(struct macb_context *ctx,
+				      unsigned int index)
 {
-	return index & (bp->ctx->rx_ring_size - 1);
+	return index & (ctx->rx_ring_size - 1);
 }
 
-static struct macb_dma_desc *macb_rx_desc(struct macb_queue *queue, unsigned int index)
+static struct macb_dma_desc *macb_rx_desc(struct macb_context *ctx,
+					  unsigned int q, unsigned int index)
 {
-	struct macb_rxq *rxq = macb_rxq(queue);
-
-	index = macb_rx_ring_wrap(queue->bp, index);
-	index = macb_adj_dma_desc_idx(queue->bp, index);
-	return &rxq->ring[index];
+	index = macb_rx_ring_wrap(ctx, index);
+	index = macb_adj_dma_desc_idx(ctx, index);
+	return &ctx->rxq[q].ring[index];
 }
 
 static void *macb_rx_buffer(struct macb_queue *queue, unsigned int index)
 {
+	struct macb_context *ctx = queue->bp->ctx;
 	struct macb_rxq *rxq = macb_rxq(queue);
 
-	return rxq->buffers + queue->bp->ctx->rx_buffer_size *
-	       macb_rx_ring_wrap(queue->bp, index);
+	return rxq->buffers + ctx->rx_buffer_size *
+	       macb_rx_ring_wrap(ctx, index);
 }
 
 /* I/O accessors */
@@ -828,13 +832,14 @@ static void gem_shuffle_tx_one_ring(struct macb_queue *queue)
 	unsigned int head, tail, count, ring_size, desc_size;
 	struct macb_tx_skb tx_skb, *skb_curr, *skb_next;
 	struct macb_dma_desc *desc_curr, *desc_next;
+	unsigned int q = queue - queue->bp->queues;
 	unsigned int i, cycles, shift, curr, next;
+	struct macb_context *ctx = queue->bp->ctx;
 	struct macb_txq *txq = macb_txq(queue);
-	struct macb *bp = queue->bp;
 	unsigned char desc[24];
 	unsigned long flags;
 
-	desc_size = macb_dma_desc_get_size(bp->caps);
+	desc_size = macb_dma_desc_get_size(queue->bp->caps);
 
 	if (WARN_ON_ONCE(desc_size > ARRAY_SIZE(desc)))
 		return;
@@ -842,7 +847,7 @@ static void gem_shuffle_tx_one_ring(struct macb_queue *queue)
 	spin_lock_irqsave(&queue->tx_ptr_lock, flags);
 	head = txq->head;
 	tail = txq->tail;
-	ring_size = bp->ctx->tx_ring_size;
+	ring_size = ctx->tx_ring_size;
 	count = CIRC_CNT(head, tail, ring_size);
 
 	if (!(tail % ring_size))
@@ -858,7 +863,7 @@ static void gem_shuffle_tx_one_ring(struct macb_queue *queue)
 	cycles = gcd(ring_size, shift);
 
 	for (i = 0; i < cycles; i++) {
-		memcpy(&desc, macb_tx_desc(queue, i), desc_size);
+		memcpy(&desc, macb_tx_desc(ctx, q, i), desc_size);
 		memcpy(&tx_skb, macb_tx_skb(queue, i),
 		       sizeof(struct macb_tx_skb));
 
@@ -866,8 +871,8 @@ static void gem_shuffle_tx_one_ring(struct macb_queue *queue)
 		next = (curr + shift) % ring_size;
 
 		while (next != i) {
-			desc_curr = macb_tx_desc(queue, curr);
-			desc_next = macb_tx_desc(queue, next);
+			desc_curr = macb_tx_desc(ctx, q, curr);
+			desc_next = macb_tx_desc(ctx, q, next);
 
 			memcpy(desc_curr, desc_next, desc_size);
 
@@ -884,7 +889,7 @@ static void gem_shuffle_tx_one_ring(struct macb_queue *queue)
 			next = (curr + shift) % ring_size;
 		}
 
-		desc_curr = macb_tx_desc(queue, curr);
+		desc_curr = macb_tx_desc(ctx, q, curr);
 		memcpy(desc_curr, &desc, desc_size);
 		if (i == ring_size - 1)
 			desc_curr->ctrl &= ~MACB_BIT(TX_WRAP);
@@ -1268,18 +1273,19 @@ static void macb_set_addr(u32 caps, struct macb_dma_desc *desc, dma_addr_t addr)
 	desc->addr = lower_32_bits(addr);
 }
 
-static dma_addr_t macb_get_addr(u32 caps, struct macb_dma_desc *desc)
+static dma_addr_t macb_get_addr(struct macb_context *ctx,
+				struct macb_dma_desc *desc)
 {
 	dma_addr_t addr = 0;
 
-	if (macb_dma64(caps)) {
+	if (macb_dma64(ctx->info->caps)) {
 		struct macb_dma_desc_64 *desc_64;
 
 		desc_64 = macb_64b_desc(desc);
 		addr = ((u64)(desc_64->addrh) << 32);
 	}
 	addr |= MACB_BF(RX_WADDR, MACB_BFEXT(RX_WADDR, desc->addr));
-	if (macb_dma_ptp(caps))
+	if (macb_dma_ptp(ctx->info->caps))
 		addr &= ~GEM_BIT(DMA_RXVALID);
 	return addr;
 }
@@ -1289,6 +1295,7 @@ static void macb_tx_error_task(struct work_struct *work)
 	struct macb_queue *queue = container_of(work, struct macb_queue,
 						tx_error_task);
 	unsigned int q = queue - queue->bp->queues;
+	struct macb_context *ctx = queue->bp->ctx;
 	struct macb_txq *txq = macb_txq(queue);
 	struct macb *bp = queue->bp;
 	struct macb_tx_skb *tx_skb;
@@ -1331,7 +1338,7 @@ static void macb_tx_error_task(struct work_struct *work)
 	for (tail = txq->tail; tail != txq->head; tail++) {
 		u32	ctrl;
 
-		desc = macb_tx_desc(queue, tail);
+		desc = macb_tx_desc(ctx, q, tail);
 		ctrl = desc->ctrl;
 		tx_skb = macb_tx_skb(queue, tail);
 		skb = tx_skb->skb;
@@ -1350,7 +1357,7 @@ static void macb_tx_error_task(struct work_struct *work)
 			 */
 			if (!(ctrl & MACB_BIT(TX_BUF_EXHAUSTED))) {
 				netdev_vdbg(bp->netdev, "txerr skb %u (data %p) TX complete\n",
-					    macb_tx_ring_wrap(bp, tail),
+					    macb_tx_ring_wrap(ctx, tail),
 					    skb->data);
 				bp->netdev->stats.tx_packets++;
 				queue->stats.tx_packets++;
@@ -1378,7 +1385,7 @@ static void macb_tx_error_task(struct work_struct *work)
 				  packets, bytes);
 
 	/* Set end of TX queue */
-	desc = macb_tx_desc(queue, 0);
+	desc = macb_tx_desc(ctx, q, 0);
 	macb_set_addr(bp->caps, desc, 0);
 	desc->ctrl = MACB_BIT(TX_USED);
 
@@ -1441,6 +1448,7 @@ static int macb_tx_complete(struct macb_queue *queue, int budget)
 	struct macb *bp = queue->bp;
 	struct macb_txq *txq = macb_txq(queue);
 	unsigned int q = queue - bp->queues;
+	struct macb_context *ctx = bp->ctx;
 	unsigned long flags;
 	unsigned int tail;
 	unsigned int head;
@@ -1455,7 +1463,7 @@ static int macb_tx_complete(struct macb_queue *queue, int budget)
 		struct macb_dma_desc	*desc;
 		u32			ctrl;
 
-		desc = macb_tx_desc(queue, tail);
+		desc = macb_tx_desc(ctx, q, tail);
 
 		/* Make hw descriptor updates visible to CPU */
 		rmb();
@@ -1480,7 +1488,7 @@ static int macb_tx_complete(struct macb_queue *queue, int budget)
 					gem_ptp_do_txstamp(bp, skb, desc);
 
 				netdev_vdbg(bp->netdev, "skb %u (data %p) TX complete\n",
-					    macb_tx_ring_wrap(bp, tail),
+					    macb_tx_ring_wrap(ctx, tail),
 					    skb->data);
 				bp->netdev->stats.tx_packets++;
 				queue->stats.tx_packets++;
@@ -1518,53 +1526,53 @@ static int macb_tx_complete(struct macb_queue *queue, int budget)
 	return packets;
 }
 
-static void gem_rx_refill(struct macb_queue *queue)
+static void gem_rx_refill(struct macb_context *ctx, unsigned int q)
 {
-	struct macb_rxq *rxq = macb_rxq(queue);
-	struct macb *bp = queue->bp;
+	struct device *dev = &ctx->info->pdev->dev;
+	struct macb_rxq *rxq = &ctx->rxq[q];
 	struct macb_dma_desc *desc;
 	struct sk_buff *skb;
 	unsigned int entry;
 	dma_addr_t paddr;
 
 	while (CIRC_SPACE(rxq->prepared_head, rxq->tail,
-			  bp->ctx->rx_ring_size) > 0) {
-		entry = macb_rx_ring_wrap(bp, rxq->prepared_head);
+			  ctx->rx_ring_size) > 0) {
+		entry = macb_rx_ring_wrap(ctx, rxq->prepared_head);
 
 		/* Make hw descriptor updates visible to CPU */
 		rmb();
 
-		desc = macb_rx_desc(queue, entry);
+		desc = macb_rx_desc(ctx, q, entry);
 
 		if (!rxq->skbuff[entry]) {
 			/* allocate sk_buff for this free entry in ring */
-			skb = netdev_alloc_skb(bp->netdev,
-					       bp->ctx->rx_buffer_size);
+			skb = netdev_alloc_skb(ctx->info->netdev,
+					       ctx->rx_buffer_size);
 			if (unlikely(!skb)) {
-				netdev_err(bp->netdev,
+				netdev_err(ctx->info->netdev,
 					   "Unable to allocate sk_buff\n");
 				break;
 			}
 
 			/* now fill corresponding descriptor entry */
-			paddr = dma_map_single(&bp->pdev->dev, skb->data,
-					       bp->ctx->rx_buffer_size,
+			paddr = dma_map_single(dev, skb->data,
+					       ctx->rx_buffer_size,
 					       DMA_FROM_DEVICE);
-			if (dma_mapping_error(&bp->pdev->dev, paddr)) {
+			if (dma_mapping_error(dev, paddr)) {
 				dev_kfree_skb(skb);
 				break;
 			}
 
 			rxq->skbuff[entry] = skb;
 
-			if (entry == bp->ctx->rx_ring_size - 1)
+			if (entry == ctx->rx_ring_size - 1)
 				paddr |= MACB_BIT(RX_WRAP);
 			desc->ctrl = 0;
 			/* Setting addr clears RX_USED and allows reception,
 			 * make sure ctrl is cleared first to avoid a race.
 			 */
 			dma_wmb();
-			macb_set_addr(bp->caps, desc, paddr);
+			macb_set_addr(ctx->info->caps, desc, paddr);
 
 			/* Properly align Ethernet header.
 			 *
@@ -1577,7 +1585,7 @@ static void gem_rx_refill(struct macb_queue *queue)
 			 * setting the low 2/3 bits.
 			 * It is 3 bits if HW_DMA_CAP_PTP, else 2 bits.
 			 */
-			if (!(bp->caps & MACB_CAPS_RSC))
+			if (!(ctx->info->caps & MACB_CAPS_RSC))
 				skb_reserve(skb, NET_IP_ALIGN);
 		} else {
 			desc->ctrl = 0;
@@ -1590,18 +1598,21 @@ static void gem_rx_refill(struct macb_queue *queue)
 	/* Make descriptor updates visible to hardware */
 	wmb();
 
-	netdev_vdbg(bp->netdev, "rx ring: queue: %p, prepared head %d, tail %d\n",
-		    queue, rxq->prepared_head, rxq->tail);
+	netdev_vdbg(ctx->info->netdev,
+		    "rx ring: queue: %u, prepared head %d, tail %d\n",
+		    q, rxq->prepared_head, rxq->tail);
 }
 
 /* Mark DMA descriptors from begin up to and not including end as unused */
 static void discard_partial_frame(struct macb_queue *queue, unsigned int begin,
 				  unsigned int end)
 {
+	unsigned int q = queue - queue->bp->queues;
+	struct macb_context *ctx = queue->bp->ctx;
 	unsigned int frag;
 
 	for (frag = begin; frag != end; frag++) {
-		struct macb_dma_desc *desc = macb_rx_desc(queue, frag);
+		struct macb_dma_desc *desc = macb_rx_desc(ctx, q, frag);
 
 		desc->addr &= ~MACB_BIT(RX_USED);
 	}
@@ -1618,6 +1629,8 @@ static void discard_partial_frame(struct macb_queue *queue, unsigned int begin,
 static int gem_rx(struct macb_queue *queue, struct napi_struct *napi,
 		  int budget)
 {
+	unsigned int q = queue - queue->bp->queues;
+	struct macb_context *ctx = queue->bp->ctx;
 	struct macb_rxq *rxq = macb_rxq(queue);
 	struct macb *bp = queue->bp;
 	struct macb_dma_desc *desc;
@@ -1631,14 +1644,14 @@ static int gem_rx(struct macb_queue *queue, struct napi_struct *napi,
 		dma_addr_t addr;
 		bool rxused;
 
-		entry = macb_rx_ring_wrap(bp, rxq->tail);
-		desc = macb_rx_desc(queue, entry);
+		entry = macb_rx_ring_wrap(ctx, rxq->tail);
+		desc = macb_rx_desc(ctx, q, entry);
 
 		/* Make hw descriptor updates visible to CPU */
 		rmb();
 
 		rxused = (desc->addr & MACB_BIT(RX_USED)) ? true : false;
-		addr = macb_get_addr(bp->caps, desc);
+		addr = macb_get_addr(ctx, desc);
 
 		if (!rxused)
 			break;
@@ -1702,7 +1715,7 @@ static int gem_rx(struct macb_queue *queue, struct napi_struct *napi,
 		napi_gro_receive(napi, skb);
 	}
 
-	gem_rx_refill(queue);
+	gem_rx_refill(ctx, q);
 
 	return count;
 }
@@ -1710,6 +1723,8 @@ static int gem_rx(struct macb_queue *queue, struct napi_struct *napi,
 static int macb_rx_frame(struct macb_queue *queue, struct napi_struct *napi,
 			 unsigned int first_frag, unsigned int last_frag)
 {
+	unsigned int q = queue - queue->bp->queues;
+	struct macb_context *ctx = queue->bp->ctx;
 	struct macb *bp = queue->bp;
 	struct macb_dma_desc *desc;
 	unsigned int offset;
@@ -1717,12 +1732,12 @@ static int macb_rx_frame(struct macb_queue *queue, struct napi_struct *napi,
 	unsigned int frag;
 	unsigned int len;
 
-	desc = macb_rx_desc(queue, last_frag);
+	desc = macb_rx_desc(ctx, q, last_frag);
 	len = desc->ctrl & bp->rx_frm_len_mask;
 
 	netdev_vdbg(bp->netdev, "macb_rx_frame frags %u - %u (len %u)\n",
-		    macb_rx_ring_wrap(bp, first_frag),
-		    macb_rx_ring_wrap(bp, last_frag), len);
+		    macb_rx_ring_wrap(ctx, first_frag),
+		    macb_rx_ring_wrap(ctx, last_frag), len);
 
 	/* The ethernet header starts NET_IP_ALIGN bytes into the
 	 * first buffer. Since the header is 14 bytes, this makes the
@@ -1736,7 +1751,7 @@ static int macb_rx_frame(struct macb_queue *queue, struct napi_struct *napi,
 	if (!skb) {
 		bp->netdev->stats.rx_dropped++;
 		for (frag = first_frag; ; frag++) {
-			desc = macb_rx_desc(queue, frag);
+			desc = macb_rx_desc(ctx, q, frag);
 			desc->addr &= ~MACB_BIT(RX_USED);
 			if (frag == last_frag)
 				break;
@@ -1767,7 +1782,7 @@ static int macb_rx_frame(struct macb_queue *queue, struct napi_struct *napi,
 					       macb_rx_buffer(queue, frag),
 					       frag_len);
 		offset += bp->ctx->rx_buffer_size;
-		desc = macb_rx_desc(queue, frag);
+		desc = macb_rx_desc(ctx, q, frag);
 		desc->addr &= ~MACB_BIT(RX_USED);
 
 		if (frag == last_frag)
@@ -1789,20 +1804,19 @@ static int macb_rx_frame(struct macb_queue *queue, struct napi_struct *napi,
 	return 0;
 }
 
-static inline void macb_init_rx_ring(struct macb_queue *queue)
+static inline void macb_init_rx_ring(struct macb_context *ctx, unsigned int q)
 {
-	struct macb_rxq *rxq = macb_rxq(queue);
+	struct macb_rxq *rxq = &ctx->rxq[q];
 	struct macb_dma_desc *desc = NULL;
-	struct macb *bp = queue->bp;
 	dma_addr_t addr;
 	int i;
 
 	addr = rxq->buffers_dma;
-	for (i = 0; i < bp->ctx->rx_ring_size; i++) {
-		desc = macb_rx_desc(queue, i);
-		macb_set_addr(bp->caps, desc, addr);
+	for (i = 0; i < ctx->rx_ring_size; i++) {
+		desc = macb_rx_desc(ctx, q, i);
+		macb_set_addr(ctx->info->caps, desc, addr);
 		desc->ctrl = 0;
-		addr += bp->ctx->rx_buffer_size;
+		addr += ctx->rx_buffer_size;
 	}
 	desc->addr |= MACB_BIT(RX_WRAP);
 	rxq->tail = 0;
@@ -1811,6 +1825,8 @@ static inline void macb_init_rx_ring(struct macb_queue *queue)
 static int macb_rx(struct macb_queue *queue, struct napi_struct *napi,
 		   int budget)
 {
+	unsigned int q = queue - queue->bp->queues;
+	struct macb_context *ctx = queue->bp->ctx;
 	struct macb_rxq *rxq = macb_rxq(queue);
 	struct macb *bp = queue->bp;
 	bool reset_rx_queue = false;
@@ -1819,7 +1835,7 @@ static int macb_rx(struct macb_queue *queue, struct napi_struct *napi,
 	int received = 0;
 
 	for (tail = rxq->tail; budget > 0; tail++) {
-		struct macb_dma_desc *desc = macb_rx_desc(queue, tail);
+		struct macb_dma_desc *desc = macb_rx_desc(ctx, q, tail);
 		u32 ctrl;
 
 		/* Make hw descriptor updates visible to CPU */
@@ -1871,7 +1887,7 @@ static int macb_rx(struct macb_queue *queue, struct napi_struct *napi,
 		ctrl = macb_readl(bp, NCR);
 		macb_writel(bp, NCR, ctrl & ~MACB_BIT(RE));
 
-		macb_init_rx_ring(queue);
+		macb_init_rx_ring(ctx, q);
 		queue_writel(queue, RBQP, rxq->ring_dma);
 
 		macb_writel(bp, NCR, ctrl | MACB_BIT(RE));
@@ -1890,13 +1906,14 @@ static int macb_rx(struct macb_queue *queue, struct napi_struct *napi,
 
 static bool macb_rx_pending(struct macb_queue *queue)
 {
+	unsigned int q = queue - queue->bp->queues;
+	struct macb_context *ctx = queue->bp->ctx;
 	struct macb_rxq *rxq = macb_rxq(queue);
-	struct macb *bp = queue->bp;
 	struct macb_dma_desc *desc;
 	unsigned int entry;
 
-	entry = macb_rx_ring_wrap(bp, rxq->tail);
-	desc = macb_rx_desc(queue, entry);
+	entry = macb_rx_ring_wrap(ctx, rxq->tail);
+	desc = macb_rx_desc(ctx, q, entry);
 
 	/* Make hw descriptor updates visible to CPU */
 	rmb();
@@ -1943,6 +1960,7 @@ static int macb_rx_poll(struct napi_struct *napi, int budget)
 
 static void macb_tx_restart(struct macb_queue *queue)
 {
+	struct macb_context *ctx = queue->bp->ctx;
 	struct macb_txq *txq = macb_txq(queue);
 	struct macb *bp = queue->bp;
 	unsigned int head_idx, tbqp;
@@ -1953,9 +1971,9 @@ static void macb_tx_restart(struct macb_queue *queue)
 	if (txq->head == txq->tail)
 		goto out_tx_ptr_unlock;
 
-	tbqp = queue_readl(queue, TBQP) / macb_dma_desc_get_size(bp->caps);
-	tbqp = macb_adj_dma_desc_idx(bp, macb_tx_ring_wrap(bp, tbqp));
-	head_idx = macb_adj_dma_desc_idx(bp, macb_tx_ring_wrap(bp, txq->head));
+	tbqp = queue_readl(queue, TBQP) / macb_dma_desc_get_size(ctx->info->caps);
+	tbqp = macb_adj_dma_desc_idx(ctx, macb_tx_ring_wrap(ctx, tbqp));
+	head_idx = macb_adj_dma_desc_idx(ctx, macb_tx_ring_wrap(ctx, txq->head));
 
 	if (tbqp == head_idx)
 		goto out_tx_ptr_unlock;
@@ -1970,6 +1988,8 @@ static void macb_tx_restart(struct macb_queue *queue)
 
 static bool macb_tx_complete_pending(struct macb_queue *queue)
 {
+	unsigned int q = queue - queue->bp->queues;
+	struct macb_context *ctx = queue->bp->ctx;
 	struct macb_txq *txq = macb_txq(queue);
 	bool retval = false;
 	unsigned long flags;
@@ -1979,7 +1999,7 @@ static bool macb_tx_complete_pending(struct macb_queue *queue)
 		/* Make hw descriptor updates visible to CPU */
 		rmb();
 
-		if (macb_tx_desc(queue, txq->tail)->ctrl & MACB_BIT(TX_USED))
+		if (macb_tx_desc(ctx, q, txq->tail)->ctrl & MACB_BIT(TX_USED))
 			retval = true;
 	}
 	spin_unlock_irqrestore(&queue->tx_ptr_lock, flags);
@@ -2032,6 +2052,7 @@ static void macb_hresp_error_task(struct work_struct *work)
 {
 	struct macb *bp = from_work(bp, work, hresp_err_bh_work);
 	struct net_device *netdev = bp->netdev;
+	struct macb_context *ctx = bp->ctx;
 	struct macb_queue *queue;
 	unsigned int q;
 	u32 ctrl;
@@ -2048,7 +2069,7 @@ static void macb_hresp_error_task(struct work_struct *work)
 	netif_tx_stop_all_queues(netdev);
 	netif_carrier_off(netdev);
 
-	bp->macbgem_ops.mog_init_rings(bp);
+	bp->macbgem_ops.mog_init_rings(ctx);
 
 	/* Initialize TX and RX buffers */
 	macb_init_buffers(bp);
@@ -2245,6 +2266,8 @@ static unsigned int macb_tx_map(struct macb *bp,
 	unsigned int f, nr_frags = skb_shinfo(skb)->nr_frags;
 	unsigned int len, i, tx_head = txq->head;
 	u32 ctrl, lso_ctrl = 0, seq_ctrl = 0;
+	unsigned int q = queue - bp->queues;
+	struct macb_context *ctx = bp->ctx;
 	unsigned int eof = 1, mss_mfs = 0;
 	struct macb_tx_skb *tx_skb = NULL;
 	struct macb_dma_desc *desc;
@@ -2335,7 +2358,7 @@ static unsigned int macb_tx_map(struct macb *bp,
 	 */
 	i = tx_head;
 	ctrl = MACB_BIT(TX_USED);
-	desc = macb_tx_desc(queue, i);
+	desc = macb_tx_desc(ctx, q, i);
 	desc->ctrl = ctrl;
 
 	if (lso_ctrl) {
@@ -2356,14 +2379,14 @@ static unsigned int macb_tx_map(struct macb *bp,
 	do {
 		i--;
 		tx_skb = macb_tx_skb(queue, i);
-		desc = macb_tx_desc(queue, i);
+		desc = macb_tx_desc(ctx, q, i);
 
 		ctrl = (u32)tx_skb->size;
 		if (eof) {
 			ctrl |= MACB_BIT(TX_LAST);
 			eof = 0;
 		}
-		if (unlikely(macb_tx_ring_wrap(bp, i) ==
+		if (unlikely(macb_tx_ring_wrap(ctx, i) ==
 				bp->ctx->tx_ring_size - 1))
 			ctrl |= MACB_BIT(TX_WRAP);
 
@@ -2638,33 +2661,32 @@ static unsigned int macb_rx_buffer_size(struct macb *bp, unsigned int mtu)
 	return size;
 }
 
-static void gem_free_rx_buffers(struct macb *bp)
+static void gem_free_rx_buffers(struct macb_context *ctx)
 {
+	struct device *dev = &ctx->info->pdev->dev;
 	struct macb_dma_desc *desc;
-	struct macb_queue *queue;
 	struct macb_rxq *rxq;
 	struct sk_buff *skb;
 	dma_addr_t addr;
 	unsigned int q;
 	int i;
 
-	for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
-		rxq = &bp->ctx->rxq[q];
+	for (q = 0; q < ctx->info->num_queues; ++q) {
+		rxq = &ctx->rxq[q];
 
 		if (!rxq->skbuff)
 			continue;
 
-		for (i = 0; i < bp->ctx->rx_ring_size; i++) {
+		for (i = 0; i < ctx->rx_ring_size; i++) {
 			skb = rxq->skbuff[i];
 
 			if (!skb)
 				continue;
 
-			desc = macb_rx_desc(queue, i);
-			addr = macb_get_addr(bp->caps, desc);
+			desc = macb_rx_desc(ctx, q, i);
+			addr = macb_get_addr(ctx, desc);
 
-			dma_unmap_single(&bp->pdev->dev, addr,
-					 bp->ctx->rx_buffer_size,
+			dma_unmap_single(dev, addr, ctx->rx_buffer_size,
 					 DMA_FROM_DEVICE);
 			dev_kfree_skb_any(skb);
 			skb = NULL;
@@ -2675,52 +2697,52 @@ static void gem_free_rx_buffers(struct macb *bp)
 	}
 }
 
-static void macb_free_rx_buffers(struct macb *bp)
+static void macb_free_rx_buffers(struct macb_context *ctx)
 {
-	struct macb_rxq *rxq = &bp->ctx->rxq[0];
+	struct device *dev = &ctx->info->pdev->dev;
+	struct macb_rxq *rxq = &ctx->rxq[0];
 
 	if (rxq->buffers) {
-		dma_free_coherent(&bp->pdev->dev,
-				  bp->ctx->rx_ring_size *
-					bp->ctx->rx_buffer_size,
+		dma_free_coherent(dev,
+				  ctx->rx_ring_size * ctx->rx_buffer_size,
 				  rxq->buffers, rxq->buffers_dma);
 		rxq->buffers = NULL;
 	}
 }
 
-static unsigned int macb_tx_ring_size_per_queue(struct macb *bp)
+static unsigned int macb_tx_ring_size_per_queue(struct macb_context *ctx)
 {
-	return macb_dma_desc_get_size(bp->caps) * bp->ctx->tx_ring_size +
-		bp->tx_bd_rd_prefetch;
+	return macb_dma_desc_get_size(ctx->info->caps) * ctx->tx_ring_size +
+			ctx->info->tx_bd_rd_prefetch;
 }
 
-static unsigned int macb_rx_ring_size_per_queue(struct macb *bp)
+static unsigned int macb_rx_ring_size_per_queue(struct macb_context *ctx)
 {
-	return macb_dma_desc_get_size(bp->caps) * bp->ctx->rx_ring_size +
-		bp->rx_bd_rd_prefetch;
+	return macb_dma_desc_get_size(ctx->info->caps) * ctx->rx_ring_size +
+			ctx->info->rx_bd_rd_prefetch;
 }
 
-static void macb_free_consistent(struct macb *bp)
+static void macb_free_consistent(struct macb_context *ctx)
 {
-	struct device *dev = &bp->pdev->dev;
+	struct device *dev = &ctx->info->pdev->dev;
 	struct macb_txq *txq;
 	struct macb_rxq *rxq;
 	unsigned int q;
 	size_t size;
 
-	bp->macbgem_ops.mog_free_rx_buffers(bp);
+	ctx->info->macbgem_ops.mog_free_rx_buffers(ctx);
 
-	txq = &bp->ctx->txq[0];
-	size = bp->num_queues * macb_tx_ring_size_per_queue(bp);
+	txq = &ctx->txq[0];
+	size = ctx->info->num_queues * macb_tx_ring_size_per_queue(ctx);
 	dma_free_coherent(dev, size, txq->ring, txq->ring_dma);
 
-	rxq = &bp->ctx->rxq[0];
-	size = bp->num_queues * macb_rx_ring_size_per_queue(bp);
+	rxq = &ctx->rxq[0];
+	size = ctx->info->num_queues * macb_rx_ring_size_per_queue(ctx);
 	dma_free_coherent(dev, size, rxq->ring, rxq->ring_dma);
 
-	for (q = 0; q < bp->num_queues; ++q) {
-		txq = &bp->ctx->txq[q];
-		rxq = &bp->ctx->rxq[q];
+	for (q = 0; q < ctx->info->num_queues; ++q) {
+		txq = &ctx->txq[q];
+		rxq = &ctx->rxq[q];
 
 		kfree(txq->skb);
 		txq->skb = NULL;
@@ -2729,46 +2751,48 @@ static void macb_free_consistent(struct macb *bp)
 	}
 }
 
-static int gem_alloc_rx_buffers(struct macb *bp)
+static int gem_alloc_rx_buffers(struct macb_context *ctx)
 {
 	struct macb_rxq *rxq;
 	unsigned int q;
 	int size;
 
-	for (q = 0; q < bp->num_queues; ++q) {
-		rxq = &bp->ctx->rxq[q];
-		size = bp->ctx->rx_ring_size * sizeof(struct sk_buff *);
+	for (q = 0; q < ctx->info->num_queues; ++q) {
+		rxq = &ctx->rxq[q];
+		size = ctx->rx_ring_size * sizeof(struct sk_buff *);
 		rxq->skbuff = kzalloc(size, GFP_KERNEL);
 		if (!rxq->skbuff)
 			return -ENOMEM;
 		else
-			netdev_dbg(bp->netdev,
+			netdev_dbg(ctx->info->netdev,
 				   "Allocated %d RX struct sk_buff entries at %p\n",
-				   bp->ctx->rx_ring_size, rxq->skbuff);
+				   ctx->rx_ring_size, rxq->skbuff);
 	}
 	return 0;
 }
 
-static int macb_alloc_rx_buffers(struct macb *bp)
+static int macb_alloc_rx_buffers(struct macb_context *ctx)
 {
-	struct macb_rxq *rxq = &bp->ctx->rxq[0];
+	struct device *dev = &ctx->info->pdev->dev;
+	struct macb_rxq *rxq = &ctx->rxq[0];
 	int size;
 
-	size = bp->ctx->rx_ring_size * bp->ctx->rx_buffer_size;
-	rxq->buffers = dma_alloc_coherent(&bp->pdev->dev, size,
+	size = ctx->rx_ring_size * ctx->rx_buffer_size;
+	rxq->buffers = dma_alloc_coherent(dev, size,
 					  &rxq->buffers_dma, GFP_KERNEL);
 	if (!rxq->buffers)
 		return -ENOMEM;
 
-	netdev_dbg(bp->netdev,
+	netdev_dbg(ctx->info->netdev,
 		   "Allocated RX buffers of %d bytes at %08lx (mapped %p)\n",
 		   size, (unsigned long)rxq->buffers_dma, rxq->buffers);
 	return 0;
 }
 
-static int macb_alloc_consistent(struct macb *bp)
+static int macb_alloc_consistent(struct macb_context *ctx)
 {
-	struct device *dev = &bp->pdev->dev;
+	unsigned int num_queues = ctx->info->num_queues;
+	struct device *dev = &ctx->info->pdev->dev;
 	dma_addr_t tx_dma, rx_dma;
 	struct macb_txq *txq;
 	struct macb_rxq *rxq;
@@ -2783,89 +2807,90 @@ static int macb_alloc_consistent(struct macb *bp)
 	 * natural alignment of physical addresses.
 	 */
 
-	size = bp->num_queues * macb_tx_ring_size_per_queue(bp);
+	size = num_queues * macb_tx_ring_size_per_queue(ctx);
 	tx = dma_alloc_coherent(dev, size, &tx_dma, GFP_KERNEL);
 	if (!tx || upper_32_bits(tx_dma) != upper_32_bits(tx_dma + size - 1))
 		goto out_err;
-	netdev_dbg(bp->netdev, "Allocated %zu bytes for %u TX rings at %08lx (mapped %p)\n",
-		   size, bp->num_queues, (unsigned long)tx_dma, tx);
+	netdev_dbg(ctx->info->netdev,
+		   "Allocated %zu bytes for %u TX rings at %08lx (mapped %p)\n",
+		   size, num_queues, (unsigned long)tx_dma, tx);
 
-	size = bp->num_queues * macb_rx_ring_size_per_queue(bp);
+	size = num_queues * macb_rx_ring_size_per_queue(ctx);
 	rx = dma_alloc_coherent(dev, size, &rx_dma, GFP_KERNEL);
 	if (!rx || upper_32_bits(rx_dma) != upper_32_bits(rx_dma + size - 1))
 		goto out_err;
-	netdev_dbg(bp->netdev, "Allocated %zu bytes for %u RX rings at %08lx (mapped %p)\n",
-		   size, bp->num_queues, (unsigned long)rx_dma, rx);
+	netdev_dbg(ctx->info->netdev,
+		   "Allocated %zu bytes for %u RX rings at %08lx (mapped %p)\n",
+		   size, num_queues, (unsigned long)rx_dma, rx);
 
-	for (q = 0; q < bp->num_queues; ++q) {
-		txq = &bp->ctx->txq[q];
-		rxq = &bp->ctx->rxq[q];
+	for (q = 0; q < num_queues; ++q) {
+		txq = &ctx->txq[q];
+		rxq = &ctx->rxq[q];
 
-		txq->ring = tx + macb_tx_ring_size_per_queue(bp) * q;
-		txq->ring_dma = tx_dma + macb_tx_ring_size_per_queue(bp) * q;
+		txq->ring = tx + macb_tx_ring_size_per_queue(ctx) * q;
+		txq->ring_dma = tx_dma + macb_tx_ring_size_per_queue(ctx) * q;
 
-		rxq->ring = rx + macb_rx_ring_size_per_queue(bp) * q;
-		rxq->ring_dma = rx_dma + macb_rx_ring_size_per_queue(bp) * q;
+		rxq->ring = rx + macb_rx_ring_size_per_queue(ctx) * q;
+		rxq->ring_dma = rx_dma + macb_rx_ring_size_per_queue(ctx) * q;
 
-		size = bp->ctx->tx_ring_size * sizeof(struct macb_tx_skb);
+		size = ctx->tx_ring_size * sizeof(struct macb_tx_skb);
 		txq->skb = kmalloc(size, GFP_KERNEL);
 		if (!txq->skb)
 			goto out_err;
 	}
-	if (bp->macbgem_ops.mog_alloc_rx_buffers(bp))
+	if (ctx->info->macbgem_ops.mog_alloc_rx_buffers(ctx))
 		goto out_err;
 
 	return 0;
 
 out_err:
-	macb_free_consistent(bp);
+	macb_free_consistent(ctx);
 	return -ENOMEM;
 }
 
-static void gem_init_rx_ring(struct macb_queue *queue)
+static void gem_init_rx_ring(struct macb_context *ctx, unsigned int q)
 {
-	struct macb_rxq *rxq = macb_rxq(queue);
+	struct macb_rxq *rxq = &ctx->rxq[q];
 
 	rxq->tail = 0;
 	rxq->prepared_head = 0;
 
-	gem_rx_refill(queue);
+	gem_rx_refill(ctx, q);
 }
 
-static void gem_init_rings(struct macb *bp)
+static void gem_init_rings(struct macb_context *ctx)
 {
-	struct macb_queue *queue;
 	struct macb_dma_desc *desc = NULL;
 	struct macb_txq *txq;
 	unsigned int q;
 	int i;
 
-	for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
-		txq = &bp->ctx->txq[q];
-		for (i = 0; i < bp->ctx->tx_ring_size; i++) {
-			desc = macb_tx_desc(queue, i);
-			macb_set_addr(bp->caps, desc, 0);
+	for (q = 0; q < ctx->info->num_queues; ++q) {
+		txq = &ctx->txq[q];
+		for (i = 0; i < ctx->tx_ring_size; i++) {
+			desc = macb_tx_desc(ctx, q, i);
+			macb_set_addr(ctx->info->caps, desc, 0);
 			desc->ctrl = MACB_BIT(TX_USED);
 		}
 		desc->ctrl |= MACB_BIT(TX_WRAP);
 		txq->head = 0;
 		txq->tail = 0;
 
-		gem_init_rx_ring(queue);
+		gem_init_rx_ring(ctx, q);
 	}
 }
 
-static void macb_init_rings(struct macb *bp)
+static void macb_init_rings(struct macb_context *ctx)
 {
-	struct macb_txq *txq = &bp->ctx->txq[0];
+	struct macb_txq *txq = &ctx->txq[0];
 	struct macb_dma_desc *desc = NULL;
 	int i;
 
-	macb_init_rx_ring(&bp->queues[0]);
+	macb_init_rx_ring(ctx, 0);
 
-	for (i = 0; i < bp->ctx->tx_ring_size; i++) {
-		desc = macb_tx_desc(&bp->queues[0], i);
-		macb_set_addr(bp->caps, desc, 0);
+	for (i = 0; i < ctx->tx_ring_size; i++) {
+		desc = macb_tx_desc(ctx, 0, i);
+		macb_set_addr(ctx->info->caps, desc, 0);
 		desc->ctrl = MACB_BIT(TX_USED);
 	}
 	txq->head = 0;
@@ -3203,14 +3228,14 @@ static int macb_open(struct net_device *netdev)
 	bp->ctx->rx_ring_size = bp->configured_rx_ring_size;
 	bp->ctx->tx_ring_size = bp->configured_tx_ring_size;
 
-	err = macb_alloc_consistent(bp);
+	err = macb_alloc_consistent(bp->ctx);
 	if (err) {
 		netdev_err(netdev, "Unable to allocate DMA memory (error %d)\n",
 			   err);
 		goto free_ctx;
 	}
 
-	bp->macbgem_ops.mog_init_rings(bp);
+	bp->macbgem_ops.mog_init_rings(bp->ctx);
 	macb_init_buffers(bp);
 
 	for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
@@ -3248,7 +3273,7 @@ static int macb_open(struct net_device *netdev)
 		napi_disable(&queue->napi_rx);
 		napi_disable(&queue->napi_tx);
 	}
-	macb_free_consistent(bp);
+	macb_free_consistent(bp->ctx);
 free_ctx:
 	kfree(bp->ctx);
 	bp->ctx = NULL;
@@ -3284,7 +3309,7 @@ static int macb_close(struct net_device *netdev)
 	netif_carrier_off(netdev);
 	spin_unlock_irqrestore(&bp->lock, flags);
 
-	macb_free_consistent(bp);
+	macb_free_consistent(bp->ctx);
 	kfree(bp->ctx);
 	bp->ctx = NULL;
 
@@ -3663,8 +3688,8 @@ static void macb_get_regs(struct net_device *netdev, struct ethtool_regs *regs,
 
 	if (bp->ctx) {
 		txq = &bp->ctx->txq[0];
-		tail = macb_tx_ring_wrap(bp, txq->tail);
-		head = macb_tx_ring_wrap(bp, txq->head);
+		tail = macb_tx_ring_wrap(bp->ctx, txq->tail);
+		head = macb_tx_ring_wrap(bp->ctx, txq->head);
 		tx_dma_tail = macb_tx_dma(&bp->queues[0], tail);
 		tx_dma_head = macb_tx_dma(&bp->queues[0], head);
 	}
@@ -4998,7 +5023,7 @@ static int at91ether_alloc_coherent(struct macb *bp)
 
 	rxq->ring = dma_alloc_coherent(&bp->pdev->dev,
 				       (AT91ETHER_MAX_RX_DESCR *
-					macb_dma_desc_get_size(bp->caps)),
+				       macb_dma_desc_get_size(bp->caps)),
 				       &rxq->ring_dma, GFP_KERNEL);
 	if (!rxq->ring)
 		return -ENOMEM;
@@ -5044,7 +5069,6 @@ static void at91ether_free_coherent(struct macb *bp)
 /* Initialize and start the Receiver and Transmit subsystems */
 static int at91ether_start(struct macb *bp)
 {
-	struct macb_queue *queue = &bp->queues[0];
 	struct macb_rxq *rxq = &bp->ctx->rxq[0];
 	struct macb_dma_desc *desc;
 	dma_addr_t addr;
@@ -5057,7 +5081,7 @@ static int at91ether_start(struct macb *bp)
 
 	addr = rxq->buffers_dma;
 	for (i = 0; i < AT91ETHER_MAX_RX_DESCR; i++) {
-		desc = macb_rx_desc(queue, i);
+		desc = macb_rx_desc(bp->ctx, 0, i);
 		macb_set_addr(bp->caps, desc, addr);
 		desc->ctrl = 0;
 		addr += AT91ETHER_MAX_RBUFF_SZ;
@@ -5207,14 +5231,13 @@ static netdev_tx_t at91ether_start_xmit(struct sk_buff *skb,
 static void at91ether_rx(struct net_device *netdev)
 {
 	struct macb *bp = netdev_priv(netdev);
-	struct macb_queue *queue = &bp->queues[0];
 	struct macb_rxq *rxq = &bp->ctx->rxq[0];
 	struct macb_dma_desc *desc;
 	unsigned char *p_recv;
 	struct sk_buff *skb;
 	unsigned int pktlen;
 
-	desc = macb_rx_desc(queue, rxq->tail);
+	desc = macb_rx_desc(bp->ctx, 0, rxq->tail);
 	while (desc->addr & MACB_BIT(RX_USED)) {
 		p_recv = rxq->buffers + rxq->tail * AT91ETHER_MAX_RBUFF_SZ;
 		pktlen = MACB_BF(RX_FRMLEN, desc->ctrl);
@@ -5243,7 +5266,7 @@ static void at91ether_rx(struct net_device *netdev)
 		else
 			rxq->tail++;
 
-		desc = macb_rx_desc(queue, rxq->tail);
+		desc = macb_rx_desc(bp->ctx, 0, rxq->tail);
 	}
 }
 
@@ -6197,6 +6220,7 @@ static int __maybe_unused macb_resume(struct device *dev)
 {
 	struct net_device *netdev = dev_get_drvdata(dev);
 	struct macb *bp = netdev_priv(netdev);
+	struct macb_context *ctx = bp->ctx;
 	struct macb_queue *queue;
 	unsigned long flags;
 	unsigned int q;
@@ -6242,9 +6266,9 @@ static int __maybe_unused macb_resume(struct device *dev)
 	     ++q, ++queue) {
 		if (!(bp->caps & MACB_CAPS_MACB_IS_EMAC)) {
 			if (macb_is_gem(bp->caps))
-				gem_init_rx_ring(queue);
+				gem_init_rx_ring(ctx, q);
 			else
-				macb_init_rx_ring(queue);
+				macb_init_rx_ring(ctx, q);
 		}
 
 		napi_enable(&queue->napi_rx);

-- 
2.53.0


^ permalink raw reply related

* [PATCH net-next v2 11/14] net: macb: introduce macb_context_alloc() helper
From: Théo Lebrun @ 2026-04-10 19:51 UTC (permalink / raw)
  To: Nicolas Ferre, Claudiu Beznea, Andrew Lunn, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Richard Cochran,
	Russell King
  Cc: Paolo Valerio, Conor Dooley, Nicolai Buchwitz,
	Vladimir Kondratiev, Gregory CLEMENT, Benoît Monin,
	Tawfik Bayouk, Thomas Petazzoni, Maxime Chevallier, netdev,
	linux-kernel, Théo Lebrun
In-Reply-To: <20260410-macb-context-v2-0-af39f71d40b6@bootlin.com>

Move the context allocation sequence from inline macb_open() to its own
helper function called macb_context_alloc(). All ops doing context
swapping (set_ringparam, change_mtu, etc) will use this helper.

Signed-off-by: Théo Lebrun <theo.lebrun@bootlin.com>
---
 drivers/net/ethernet/cadence/macb_main.c | 55 +++++++++++++++++++++-----------
 1 file changed, 36 insertions(+), 19 deletions(-)

diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index 71d60d8d1993..ba7463a857dd 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -2848,6 +2848,36 @@ static int macb_alloc_consistent(struct macb_context *ctx)
 	return -ENOMEM;
 }
 
+static struct macb_context *macb_context_alloc(struct macb *bp,
+					       unsigned int mtu,
+					       unsigned int rx_ring_size,
+					       unsigned int tx_ring_size)
+{
+	struct macb_context *ctx;
+	int err;
+
+	ctx = kzalloc_obj(*ctx);
+	if (!ctx)
+		return ERR_PTR(-ENOMEM);
+
+	ctx->info = &bp->info;
+	ctx->rx_buffer_size = macb_rx_buffer_size(bp, mtu);
+	ctx->rx_ring_size = rx_ring_size;
+	ctx->tx_ring_size = tx_ring_size;
+
+	err = macb_alloc_consistent(ctx);
+	if (err) {
+		netdev_err(bp->netdev,
+			   "Unable to allocate DMA memory (error %d)\n", err);
+		kfree(ctx);
+		return ERR_PTR(err);
+	}
+
+	bp->macbgem_ops.mog_init_rings(ctx);
+
+	return ctx;
+}
+
 static void gem_init_rx_ring(struct macb_context *ctx, unsigned int q)
 {
 	struct macb_rxq *rxq = &ctx->rxq[q];
@@ -3215,27 +3245,15 @@ static int macb_open(struct net_device *netdev)
 	if (err < 0)
 		return err;
 
-	bp->ctx = kzalloc_obj(*bp->ctx);
-	if (!bp->ctx) {
-		err = -ENOMEM;
+	bp->ctx = macb_context_alloc(bp, netdev->mtu,
+				     bp->configured_rx_ring_size,
+				     bp->configured_tx_ring_size);
+	if (IS_ERR(bp->ctx)) {
+		err = PTR_ERR(bp->ctx);
+		bp->ctx = NULL;
 		goto pm_exit;
 	}
 
-	bp->ctx->info = &bp->info;
-
-	/* RX buffers initialization */
-	bp->ctx->rx_buffer_size = macb_rx_buffer_size(bp, netdev->mtu);
-	bp->ctx->rx_ring_size = bp->configured_rx_ring_size;
-	bp->ctx->tx_ring_size = bp->configured_tx_ring_size;
-
-	err = macb_alloc_consistent(bp->ctx);
-	if (err) {
-		netdev_err(netdev, "Unable to allocate DMA memory (error %d)\n",
-			   err);
-		goto free_ctx;
-	}
-
-	bp->macbgem_ops.mog_init_rings(bp->ctx);
 	macb_init_buffers(bp);
 
 	for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
@@ -3274,7 +3292,6 @@ static int macb_open(struct net_device *netdev)
 		napi_disable(&queue->napi_tx);
 	}
 	macb_free_consistent(bp->ctx);
-free_ctx:
 	kfree(bp->ctx);
 	bp->ctx = NULL;
 pm_exit:

-- 
2.53.0


^ permalink raw reply related

* [PATCH net-next v2 12/14] net: macb: re-read ISR inside IRQ handler locked section
From: Théo Lebrun @ 2026-04-10 19:52 UTC (permalink / raw)
  To: Nicolas Ferre, Claudiu Beznea, Andrew Lunn, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Richard Cochran,
	Russell King
  Cc: Paolo Valerio, Conor Dooley, Nicolai Buchwitz,
	Vladimir Kondratiev, Gregory CLEMENT, Benoît Monin,
	Tawfik Bayouk, Thomas Petazzoni, Maxime Chevallier, netdev,
	linux-kernel, Théo Lebrun
In-Reply-To: <20260410-macb-context-v2-0-af39f71d40b6@bootlin.com>

The IRQ handler reads ISR register into the `status` stack variable.
If empty, it early returns. Else, it grabs bp->lock and iterates on
the status bits.

If we tried grabbing bp->lock while already acquired, we might have
slept and the status might have been updated. Our most likely
competitor in this race (condition) is a swap operation, used in
change_mtu and set_ringparam. It is the only MACB codepath that resets
interrupts and HW inside a bp->lock critical section. Other codepaths
that clear HW IRQ status do so outside the bp->lock critical section.

Signed-off-by: Théo Lebrun <theo.lebrun@bootlin.com>
---
 drivers/net/ethernet/cadence/macb_main.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index ba7463a857dd..81beb67b206a 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -2190,6 +2190,13 @@ static irqreturn_t macb_interrupt(int irq, void *dev_id)
 
 	spin_lock(&bp->lock);
 
+	/* `status` stack variable might be stalled => re-read it */
+	status = queue_readl(queue, ISR);
+	if (unlikely(!status)) {
+		spin_unlock(&bp->lock);
+		return IRQ_NONE;
+	}
+
 	while (status) {
 		/* close possible race with dev_close */
 		if (unlikely(!netif_running(netdev))) {

-- 
2.53.0


^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox