Netdev List
 help / color / mirror / Atom feed
* [PATCH v1 net 06/15] bpf: Fix data-races around bpf_jit_kallsyms.
From: Kuniyuki Iwashima @ 2022-08-16  5:23 UTC (permalink / raw)
  To: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni
  Cc: Kuniyuki Iwashima, Kuniyuki Iwashima, netdev, linux-kernel,
	Daniel Borkmann
In-Reply-To: <20220816052347.70042-1-kuniyu@amazon.com>

While reading bpf_jit_kallsyms, it can be changed concurrently.
Thus, we need to add READ_ONCE() to its readers.

Fixes: 74451e66d516 ("bpf: make jited programs visible in traces")
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
---
CC: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/filter.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 09566ad211bd..35881fccce05 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1110,14 +1110,16 @@ static inline bool bpf_jit_blinding_enabled(struct bpf_prog *prog)
 
 static inline bool bpf_jit_kallsyms_enabled(void)
 {
+	int jit_kallsyms = READ_ONCE(bpf_jit_kallsyms);
+
 	/* There are a couple of corner cases where kallsyms should
 	 * not be enabled f.e. on hardening.
 	 */
 	if (READ_ONCE(bpf_jit_harden))
 		return false;
-	if (!bpf_jit_kallsyms)
+	if (!jit_kallsyms)
 		return false;
-	if (bpf_jit_kallsyms == 1)
+	if (jit_kallsyms == 1)
 		return true;
 
 	return false;
-- 
2.30.2


^ permalink raw reply related

* [PATCH v1 net 04/15] bpf: Fix data-races around bpf_jit_enable.
From: Kuniyuki Iwashima @ 2022-08-16  5:23 UTC (permalink / raw)
  To: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni
  Cc: Kuniyuki Iwashima, Kuniyuki Iwashima, netdev, linux-kernel
In-Reply-To: <20220816052347.70042-1-kuniyu@amazon.com>

A sysctl variable bpf_jit_enable is accessed concurrently, and there is
always a chance of data-race.  So, all readers and a writer need some
basic protection to avoid load/store-tearing.

Fixes: 0a14842f5a3c ("net: filter: Just In Time compiler for x86-64")
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
---
 arch/arm/net/bpf_jit_32.c        | 2 +-
 arch/arm64/net/bpf_jit_comp.c    | 2 +-
 arch/mips/net/bpf_jit_comp.c     | 2 +-
 arch/powerpc/net/bpf_jit_comp.c  | 5 +++--
 arch/riscv/net/bpf_jit_core.c    | 2 +-
 arch/s390/net/bpf_jit_comp.c     | 2 +-
 arch/sparc/net/bpf_jit_comp_32.c | 5 +++--
 arch/sparc/net/bpf_jit_comp_64.c | 5 +++--
 arch/x86/net/bpf_jit_comp.c      | 2 +-
 arch/x86/net/bpf_jit_comp32.c    | 2 +-
 include/linux/filter.h           | 2 +-
 net/core/sysctl_net_core.c       | 4 ++--
 12 files changed, 19 insertions(+), 16 deletions(-)

diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index 6a1c9fca5260..4b6b62a6fdd4 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -1999,7 +1999,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 	}
 	flush_icache_range((u32)header, (u32)(ctx.target + ctx.idx));
 
-	if (bpf_jit_enable > 1)
+	if (READ_ONCE(bpf_jit_enable) > 1)
 		/* there are 2 passes here */
 		bpf_jit_dump(prog->len, image_size, 2, ctx.target);
 
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 389623ae5a91..03bb40352d2c 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -1568,7 +1568,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 	}
 
 	/* And we're done. */
-	if (bpf_jit_enable > 1)
+	if (READ_ONCE(bpf_jit_enable) > 1)
 		bpf_jit_dump(prog->len, prog_size, 2, ctx.image);
 
 	bpf_flush_icache(header, ctx.image + ctx.idx);
diff --git a/arch/mips/net/bpf_jit_comp.c b/arch/mips/net/bpf_jit_comp.c
index b17130d510d4..1e623ae7eadf 100644
--- a/arch/mips/net/bpf_jit_comp.c
+++ b/arch/mips/net/bpf_jit_comp.c
@@ -1012,7 +1012,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 	flush_icache_range((unsigned long)header,
 			   (unsigned long)&ctx.target[ctx.jit_index]);
 
-	if (bpf_jit_enable > 1)
+	if (READ_ONCE(bpf_jit_enable) > 1)
 		bpf_jit_dump(prog->len, image_size, 2, ctx.target);
 
 	prog->bpf_func = (void *)ctx.target;
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 43e634126514..c71d1e94ee7e 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -122,6 +122,7 @@ bool bpf_jit_needs_zext(void)
 
 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 {
+	int jit_enable = READ_ONCE(bpf_jit_enable);
 	u32 proglen;
 	u32 alloclen;
 	u8 *image = NULL;
@@ -263,13 +264,13 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 		}
 		bpf_jit_build_epilogue(code_base, &cgctx);
 
-		if (bpf_jit_enable > 1)
+		if (jit_enable > 1)
 			pr_info("Pass %d: shrink = %d, seen = 0x%x\n", pass,
 				proglen - (cgctx.idx * 4), cgctx.seen);
 	}
 
 skip_codegen_passes:
-	if (bpf_jit_enable > 1)
+	if (jit_enable > 1)
 		/*
 		 * Note that we output the base address of the code_base
 		 * rather than image, since opcodes are in code_base.
diff --git a/arch/riscv/net/bpf_jit_core.c b/arch/riscv/net/bpf_jit_core.c
index 737baf8715da..603b5b66379b 100644
--- a/arch/riscv/net/bpf_jit_core.c
+++ b/arch/riscv/net/bpf_jit_core.c
@@ -151,7 +151,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 	}
 	bpf_jit_build_epilogue(ctx);
 
-	if (bpf_jit_enable > 1)
+	if (READ_ONCE(bpf_jit_enable) > 1)
 		bpf_jit_dump(prog->len, prog_size, pass, ctx->insns);
 
 	prog->bpf_func = (void *)ctx->insns;
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index af35052d06ed..06897a4e9c62 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -1831,7 +1831,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 		fp = orig_fp;
 		goto free_addrs;
 	}
-	if (bpf_jit_enable > 1) {
+	if (READ_ONCE(bpf_jit_enable) > 1) {
 		bpf_jit_dump(fp->len, jit.size, pass, jit.prg_buf);
 		print_fn_code(jit.prg_buf, jit.size_prg);
 	}
diff --git a/arch/sparc/net/bpf_jit_comp_32.c b/arch/sparc/net/bpf_jit_comp_32.c
index b1dbf2fa8c0a..7c454b920250 100644
--- a/arch/sparc/net/bpf_jit_comp_32.c
+++ b/arch/sparc/net/bpf_jit_comp_32.c
@@ -326,13 +326,14 @@ do {	*prog++ = BR_OPC | WDISP22(OFF);		\
 void bpf_jit_compile(struct bpf_prog *fp)
 {
 	unsigned int cleanup_addr, proglen, oldproglen = 0;
+	int jit_enable = READ_ONCE(bpf_jit_enable);
 	u32 temp[8], *prog, *func, seen = 0, pass;
 	const struct sock_filter *filter = fp->insns;
 	int i, flen = fp->len, pc_ret0 = -1;
 	unsigned int *addrs;
 	void *image;
 
-	if (!bpf_jit_enable)
+	if (!jit_enable)
 		return;
 
 	addrs = kmalloc_array(flen, sizeof(*addrs), GFP_KERNEL);
@@ -743,7 +744,7 @@ cond_branch:			f_offset = addrs[i + filter[i].jf];
 		oldproglen = proglen;
 	}
 
-	if (bpf_jit_enable > 1)
+	if (jit_enable > 1)
 		bpf_jit_dump(flen, proglen, pass + 1, image);
 
 	if (image) {
diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c
index fa0759bfe498..74cc1fa1f97f 100644
--- a/arch/sparc/net/bpf_jit_comp_64.c
+++ b/arch/sparc/net/bpf_jit_comp_64.c
@@ -1479,6 +1479,7 @@ struct sparc64_jit_data {
 
 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 {
+	int jit_enable = READ_ONCE(bpf_jit_enable);
 	struct bpf_prog *tmp, *orig_prog = prog;
 	struct sparc64_jit_data *jit_data;
 	struct bpf_binary_header *header;
@@ -1549,7 +1550,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 		}
 		build_epilogue(&ctx);
 
-		if (bpf_jit_enable > 1)
+		if (jit_enable > 1)
 			pr_info("Pass %d: size = %u, seen = [%c%c%c%c%c%c]\n", pass,
 				ctx.idx * 4,
 				ctx.tmp_1_used ? '1' : ' ',
@@ -1596,7 +1597,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 		goto out_off;
 	}
 
-	if (bpf_jit_enable > 1)
+	if (jit_enable > 1)
 		bpf_jit_dump(prog->len, image_size, pass, ctx.image);
 
 	bpf_flush_icache(header, (u8 *)header + header->size);
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index c1f6c1c51d99..a5c7df7cab2a 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -2439,7 +2439,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 		cond_resched();
 	}
 
-	if (bpf_jit_enable > 1)
+	if (READ_ONCE(bpf_jit_enable) > 1)
 		bpf_jit_dump(prog->len, proglen, pass + 1, image);
 
 	if (image) {
diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c
index 429a89c5468b..745f15a29dd3 100644
--- a/arch/x86/net/bpf_jit_comp32.c
+++ b/arch/x86/net/bpf_jit_comp32.c
@@ -2597,7 +2597,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 		cond_resched();
 	}
 
-	if (bpf_jit_enable > 1)
+	if (READ_ONCE(bpf_jit_enable) > 1)
 		bpf_jit_dump(prog->len, proglen, pass + 1, image);
 
 	if (image) {
diff --git a/include/linux/filter.h b/include/linux/filter.h
index a5f21dc3c432..ce8072626ccf 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1080,7 +1080,7 @@ static inline bool bpf_jit_is_ebpf(void)
 
 static inline bool ebpf_jit_enabled(void)
 {
-	return bpf_jit_enable && bpf_jit_is_ebpf();
+	return READ_ONCE(bpf_jit_enable) && bpf_jit_is_ebpf();
 }
 
 static inline bool bpf_prog_ebpf_jited(const struct bpf_prog *fp)
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index d82ba0c27175..022abf326dfe 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -265,7 +265,7 @@ static int proc_dointvec_minmax_bpf_enable(struct ctl_table *table, int write,
 					   void *buffer, size_t *lenp,
 					   loff_t *ppos)
 {
-	int ret, jit_enable = *(int *)table->data;
+	int ret, jit_enable = READ_ONCE(*(int *)table->data);
 	int min = *(int *)table->extra1;
 	int max = *(int *)table->extra2;
 	struct ctl_table tmp = *table;
@@ -278,7 +278,7 @@ static int proc_dointvec_minmax_bpf_enable(struct ctl_table *table, int write,
 	if (write && !ret) {
 		if (jit_enable < 2 ||
 		    (jit_enable == 2 && bpf_dump_raw_ok(current_cred()))) {
-			*(int *)table->data = jit_enable;
+			WRITE_ONCE(*(int *)table->data, jit_enable);
 			if (jit_enable == 2)
 				pr_warn("bpf_jit_enable = 2 was set! NEVER use this in production, only for JIT debugging!\n");
 		} else {
-- 
2.30.2


^ permalink raw reply related

* [PATCH v1 net 07/15] bpf: Fix a data-race around bpf_jit_limit.
From: Kuniyuki Iwashima @ 2022-08-16  5:23 UTC (permalink / raw)
  To: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni
  Cc: Kuniyuki Iwashima, Kuniyuki Iwashima, netdev, linux-kernel,
	Daniel Borkmann
In-Reply-To: <20220816052347.70042-1-kuniyu@amazon.com>

While reading bpf_jit_limit, it can be changed concurrently.
Thus, we need to add READ_ONCE() to its reader.

Fixes: ede95a63b5e8 ("bpf: add bpf_jit_limit knob to restrict unpriv allocations")
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
---
CC: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/bpf/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index c1e10d088dbb..3d9eb3ae334c 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -971,7 +971,7 @@ pure_initcall(bpf_jit_charge_init);
 
 int bpf_jit_charge_modmem(u32 size)
 {
-	if (atomic_long_add_return(size, &bpf_jit_current) > bpf_jit_limit) {
+	if (atomic_long_add_return(size, &bpf_jit_current) > READ_ONCE(bpf_jit_limit)) {
 		if (!bpf_capable()) {
 			atomic_long_sub(size, &bpf_jit_current);
 			return -EPERM;
-- 
2.30.2


^ permalink raw reply related

* [PATCH v1 net 09/15] ratelimit: Fix data-races in ___ratelimit().
From: Kuniyuki Iwashima @ 2022-08-16  5:23 UTC (permalink / raw)
  To: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni
  Cc: Kuniyuki Iwashima, Kuniyuki Iwashima, netdev, linux-kernel
In-Reply-To: <20220816052347.70042-1-kuniyu@amazon.com>

While reading rs->interval and rs->burst, they can be changed
concurrently.  Thus, we need to add READ_ONCE() to their readers.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
---
 lib/ratelimit.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/lib/ratelimit.c b/lib/ratelimit.c
index e01a93f46f83..b59a1d3d0cc3 100644
--- a/lib/ratelimit.c
+++ b/lib/ratelimit.c
@@ -26,10 +26,12 @@
  */
 int ___ratelimit(struct ratelimit_state *rs, const char *func)
 {
+	int interval = READ_ONCE(rs->interval);
+	int burst = READ_ONCE(rs->burst);
 	unsigned long flags;
 	int ret;
 
-	if (!rs->interval)
+	if (!interval)
 		return 1;
 
 	/*
@@ -44,7 +46,7 @@ int ___ratelimit(struct ratelimit_state *rs, const char *func)
 	if (!rs->begin)
 		rs->begin = jiffies;
 
-	if (time_is_before_jiffies(rs->begin + rs->interval)) {
+	if (time_is_before_jiffies(rs->begin + interval)) {
 		if (rs->missed) {
 			if (!(rs->flags & RATELIMIT_MSG_ON_RELEASE)) {
 				printk_deferred(KERN_WARNING
@@ -56,7 +58,7 @@ int ___ratelimit(struct ratelimit_state *rs, const char *func)
 		rs->begin   = jiffies;
 		rs->printed = 0;
 	}
-	if (rs->burst && rs->burst > rs->printed) {
+	if (burst && burst > rs->printed) {
 		rs->printed++;
 		ret = 1;
 	} else {
-- 
2.30.2


^ permalink raw reply related

* [PATCH v1 net 08/15] net: Fix data-races around netdev_tstamp_prequeue.
From: Kuniyuki Iwashima @ 2022-08-16  5:23 UTC (permalink / raw)
  To: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni
  Cc: Kuniyuki Iwashima, Kuniyuki Iwashima, netdev, linux-kernel
In-Reply-To: <20220816052347.70042-1-kuniyu@amazon.com>

While reading netdev_tstamp_prequeue, it can be changed concurrently.
Thus, we need to add READ_ONCE() to its readers.

Fixes: 3b098e2d7c69 ("net: Consistent skb timestamping")
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
---
 net/core/dev.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 07da69c1ac0a..4705e6630efa 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4928,7 +4928,7 @@ static int netif_rx_internal(struct sk_buff *skb)
 {
 	int ret;
 
-	net_timestamp_check(netdev_tstamp_prequeue, skb);
+	net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb);
 
 	trace_netif_rx(skb);
 
@@ -5281,7 +5281,7 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc,
 	int ret = NET_RX_DROP;
 	__be16 type;
 
-	net_timestamp_check(!netdev_tstamp_prequeue, skb);
+	net_timestamp_check(!READ_ONCE(netdev_tstamp_prequeue), skb);
 
 	trace_netif_receive_skb(skb);
 
@@ -5664,7 +5664,7 @@ static int netif_receive_skb_internal(struct sk_buff *skb)
 {
 	int ret;
 
-	net_timestamp_check(netdev_tstamp_prequeue, skb);
+	net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb);
 
 	if (skb_defer_rx_timestamp(skb))
 		return NET_RX_SUCCESS;
@@ -5694,7 +5694,7 @@ void netif_receive_skb_list_internal(struct list_head *head)
 
 	INIT_LIST_HEAD(&sublist);
 	list_for_each_entry_safe(skb, next, head, list) {
-		net_timestamp_check(netdev_tstamp_prequeue, skb);
+		net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb);
 		skb_list_del_init(skb);
 		if (!skb_defer_rx_timestamp(skb))
 			list_add_tail(&skb->list, &sublist);
-- 
2.30.2


^ permalink raw reply related

* [PATCH v1 net 11/15] net: Fix a data-race around sysctl_tstamp_allow_data.
From: Kuniyuki Iwashima @ 2022-08-16  5:23 UTC (permalink / raw)
  To: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni
  Cc: Kuniyuki Iwashima, Kuniyuki Iwashima, netdev, linux-kernel,
	Willem de Bruijn
In-Reply-To: <20220816052347.70042-1-kuniyu@amazon.com>

While reading sysctl_tstamp_allow_data, it can be changed
concurrently.  Thus, we need to add READ_ONCE() to its reader.

Fixes: b245be1f4db1 ("net-timestamp: no-payload only sysctl")
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
---
CC: Willem de Bruijn <willemb@google.com>
---
 net/core/skbuff.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 974bbbbe7138..174f34124c06 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4798,7 +4798,7 @@ static bool skb_may_tx_timestamp(struct sock *sk, bool tsonly)
 {
 	bool ret;
 
-	if (likely(sysctl_tstamp_allow_data || tsonly))
+	if (likely(READ_ONCE(sysctl_tstamp_allow_data) || tsonly))
 		return true;
 
 	read_lock_bh(&sk->sk_callback_lock);
-- 
2.30.2


^ permalink raw reply related

* [PATCH v1 net 10/15] net: Fix data-races around sysctl_optmem_max.
From: Kuniyuki Iwashima @ 2022-08-16  5:23 UTC (permalink / raw)
  To: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni
  Cc: Kuniyuki Iwashima, Kuniyuki Iwashima, netdev, linux-kernel
In-Reply-To: <20220816052347.70042-1-kuniyu@amazon.com>

While reading sysctl_optmem_max, it can be changed concurrently.
Thus, we need to add READ_ONCE() to its readers.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
---
 net/core/bpf_sk_storage.c | 5 +++--
 net/core/filter.c         | 9 +++++----
 net/core/sock.c           | 8 +++++---
 net/ipv4/ip_sockglue.c    | 6 +++---
 net/ipv6/ipv6_sockglue.c  | 4 ++--
 5 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index 1b7f385643b4..94374d529ea4 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -310,11 +310,12 @@ BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk)
 static int bpf_sk_storage_charge(struct bpf_local_storage_map *smap,
 				 void *owner, u32 size)
 {
+	int optmem_max = READ_ONCE(sysctl_optmem_max);
 	struct sock *sk = (struct sock *)owner;
 
 	/* same check as in sock_kmalloc() */
-	if (size <= sysctl_optmem_max &&
-	    atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
+	if (size <= optmem_max &&
+	    atomic_read(&sk->sk_omem_alloc) + size < optmem_max) {
 		atomic_add(size, &sk->sk_omem_alloc);
 		return 0;
 	}
diff --git a/net/core/filter.c b/net/core/filter.c
index c4f14ad82029..c191db80ce93 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1214,10 +1214,11 @@ void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp)
 static bool __sk_filter_charge(struct sock *sk, struct sk_filter *fp)
 {
 	u32 filter_size = bpf_prog_size(fp->prog->len);
+	int optmem_max = READ_ONCE(sysctl_optmem_max);
 
 	/* same check as in sock_kmalloc() */
-	if (filter_size <= sysctl_optmem_max &&
-	    atomic_read(&sk->sk_omem_alloc) + filter_size < sysctl_optmem_max) {
+	if (filter_size <= optmem_max &&
+	    atomic_read(&sk->sk_omem_alloc) + filter_size < optmem_max) {
 		atomic_add(filter_size, &sk->sk_omem_alloc);
 		return true;
 	}
@@ -1548,7 +1549,7 @@ int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk)
 	if (IS_ERR(prog))
 		return PTR_ERR(prog);
 
-	if (bpf_prog_size(prog->len) > sysctl_optmem_max)
+	if (bpf_prog_size(prog->len) > READ_ONCE(sysctl_optmem_max))
 		err = -ENOMEM;
 	else
 		err = reuseport_attach_prog(sk, prog);
@@ -1615,7 +1616,7 @@ int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk)
 		}
 	} else {
 		/* BPF_PROG_TYPE_SOCKET_FILTER */
-		if (bpf_prog_size(prog->len) > sysctl_optmem_max) {
+		if (bpf_prog_size(prog->len) > READ_ONCE(sysctl_optmem_max)) {
 			err = -ENOMEM;
 			goto err_prog_put;
 		}
diff --git a/net/core/sock.c b/net/core/sock.c
index 303af52f3b79..95abf4604d88 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2536,7 +2536,7 @@ struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size,
 
 	/* small safe race: SKB_TRUESIZE may differ from final skb->truesize */
 	if (atomic_read(&sk->sk_omem_alloc) + SKB_TRUESIZE(size) >
-	    sysctl_optmem_max)
+	    READ_ONCE(sysctl_optmem_max))
 		return NULL;
 
 	skb = alloc_skb(size, priority);
@@ -2554,8 +2554,10 @@ struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size,
  */
 void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
 {
-	if ((unsigned int)size <= sysctl_optmem_max &&
-	    atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
+	int optmem_max = READ_ONCE(sysctl_optmem_max);
+
+	if ((unsigned int)size <= optmem_max &&
+	    atomic_read(&sk->sk_omem_alloc) + size < optmem_max) {
 		void *mem;
 		/* First do the add, to avoid the race if kmalloc
 		 * might sleep.
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index a8a323ecbb54..e49a61a053a6 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -772,7 +772,7 @@ static int ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, int optlen)
 
 	if (optlen < GROUP_FILTER_SIZE(0))
 		return -EINVAL;
-	if (optlen > sysctl_optmem_max)
+	if (optlen > READ_ONCE(sysctl_optmem_max))
 		return -ENOBUFS;
 
 	gsf = memdup_sockptr(optval, optlen);
@@ -808,7 +808,7 @@ static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
 
 	if (optlen < size0)
 		return -EINVAL;
-	if (optlen > sysctl_optmem_max - 4)
+	if (optlen > READ_ONCE(sysctl_optmem_max) - 4)
 		return -ENOBUFS;
 
 	p = kmalloc(optlen + 4, GFP_KERNEL);
@@ -1233,7 +1233,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, int optname,
 
 		if (optlen < IP_MSFILTER_SIZE(0))
 			goto e_inval;
-		if (optlen > sysctl_optmem_max) {
+		if (optlen > READ_ONCE(sysctl_optmem_max)) {
 			err = -ENOBUFS;
 			break;
 		}
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 222f6bf220ba..e0dcc7a193df 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -210,7 +210,7 @@ static int ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
 
 	if (optlen < GROUP_FILTER_SIZE(0))
 		return -EINVAL;
-	if (optlen > sysctl_optmem_max)
+	if (optlen > READ_ONCE(sysctl_optmem_max))
 		return -ENOBUFS;
 
 	gsf = memdup_sockptr(optval, optlen);
@@ -244,7 +244,7 @@ static int compat_ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
 
 	if (optlen < size0)
 		return -EINVAL;
-	if (optlen > sysctl_optmem_max - 4)
+	if (optlen > READ_ONCE(sysctl_optmem_max) - 4)
 		return -ENOBUFS;
 
 	p = kmalloc(optlen + 4, GFP_KERNEL);
-- 
2.30.2


^ permalink raw reply related

* [PATCH v1 net 12/15] net: Fix a data-race around sysctl_net_busy_poll.
From: Kuniyuki Iwashima @ 2022-08-16  5:23 UTC (permalink / raw)
  To: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni
  Cc: Kuniyuki Iwashima, Kuniyuki Iwashima, netdev, linux-kernel,
	Eliezer Tamir
In-Reply-To: <20220816052347.70042-1-kuniyu@amazon.com>

While reading sysctl_net_busy_poll, it can be changed concurrently.
Thus, we need to add READ_ONCE() to its reader.

Fixes: 060212928670 ("net: add low latency socket poll")
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
---
CC: Eliezer Tamir <eliezer.tamir@linux.intel.com>
---
 include/net/busy_poll.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h
index c4898fcbf923..f90f0021f5f2 100644
--- a/include/net/busy_poll.h
+++ b/include/net/busy_poll.h
@@ -33,7 +33,7 @@ extern unsigned int sysctl_net_busy_poll __read_mostly;
 
 static inline bool net_busy_loop_on(void)
 {
-	return sysctl_net_busy_poll;
+	return READ_ONCE(sysctl_net_busy_poll);
 }
 
 static inline bool sk_can_busy_loop(const struct sock *sk)
-- 
2.30.2


^ permalink raw reply related

* [PATCH v1 net 13/15] net: Fix a data-race around sysctl_net_busy_read.
From: Kuniyuki Iwashima @ 2022-08-16  5:23 UTC (permalink / raw)
  To: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni
  Cc: Kuniyuki Iwashima, Kuniyuki Iwashima, netdev, linux-kernel,
	Eliezer Tamir
In-Reply-To: <20220816052347.70042-1-kuniyu@amazon.com>

While reading sysctl_net_busy_read, it can be changed concurrently.
Thus, we need to add READ_ONCE() to its reader.

Fixes: 2d48d67fa8cd ("net: poll/select low latency socket support")
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
---
CC: Eliezer Tamir <eliezer.tamir@linux.intel.com>
---
 net/core/sock.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/sock.c b/net/core/sock.c
index 95abf4604d88..788c1372663c 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -3367,7 +3367,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 
 #ifdef CONFIG_NET_RX_BUSY_POLL
 	sk->sk_napi_id		=	0;
-	sk->sk_ll_usec		=	sysctl_net_busy_read;
+	sk->sk_ll_usec		=	READ_ONCE(sysctl_net_busy_read);
 #endif
 
 	sk->sk_max_pacing_rate = ~0UL;
-- 
2.30.2


^ permalink raw reply related

* [PATCH v1 net 14/15] net: Fix a data-race around netdev_budget.
From: Kuniyuki Iwashima @ 2022-08-16  5:23 UTC (permalink / raw)
  To: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni
  Cc: Kuniyuki Iwashima, Kuniyuki Iwashima, netdev, linux-kernel,
	Stephen Hemminger
In-Reply-To: <20220816052347.70042-1-kuniyu@amazon.com>

While reading netdev_budget, it can be changed concurrently.
Thus, we need to add READ_ONCE() to its reader.

Fixes: 51b0bdedb8e7 ("[NET]: Separate two usages of netdev_max_backlog.")
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
---
CC: Stephen Hemminger <shemminger@osdl.org>
---
 net/core/dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 4705e6630efa..c83e23cfc57d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6666,7 +6666,7 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)
 	struct softnet_data *sd = this_cpu_ptr(&softnet_data);
 	unsigned long time_limit = jiffies +
 		usecs_to_jiffies(netdev_budget_usecs);
-	int budget = netdev_budget;
+	int budget = READ_ONCE(netdev_budget);
 	LIST_HEAD(list);
 	LIST_HEAD(repoll);
 
-- 
2.30.2


^ permalink raw reply related

* [PATCH v1 net 15/15] net: Fix data-races around sysctl_max_skb_frags.
From: Kuniyuki Iwashima @ 2022-08-16  5:23 UTC (permalink / raw)
  To: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni
  Cc: Kuniyuki Iwashima, Kuniyuki Iwashima, netdev, linux-kernel,
	Hans Westgaard Ry
In-Reply-To: <20220816052347.70042-1-kuniyu@amazon.com>

While reading sysctl_max_skb_frags, it can be changed concurrently.
Thus, we need to add READ_ONCE() to its readers.

Fixes: 5f74f82ea34c ("net:Add sysctl_max_skb_frags")
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
---
CC: Hans Westgaard Ry <hans.westgaard.ry@oracle.com>
---
 net/ipv4/tcp.c       | 4 ++--
 net/mptcp/protocol.c | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 970e9a2cca4a..9a6fe3d6ab26 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1000,7 +1000,7 @@ static struct sk_buff *tcp_build_frag(struct sock *sk, int size_goal, int flags,
 
 	i = skb_shinfo(skb)->nr_frags;
 	can_coalesce = skb_can_coalesce(skb, i, page, offset);
-	if (!can_coalesce && i >= sysctl_max_skb_frags) {
+	if (!can_coalesce && i >= READ_ONCE(sysctl_max_skb_frags)) {
 		tcp_mark_push(tp, skb);
 		goto new_segment;
 	}
@@ -1354,7 +1354,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
 
 			if (!skb_can_coalesce(skb, i, pfrag->page,
 					      pfrag->offset)) {
-				if (i >= sysctl_max_skb_frags) {
+				if (i >= READ_ONCE(sysctl_max_skb_frags)) {
 					tcp_mark_push(tp, skb);
 					goto new_segment;
 				}
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index da4257504fad..d398f3810662 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1263,7 +1263,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
 
 		i = skb_shinfo(skb)->nr_frags;
 		can_coalesce = skb_can_coalesce(skb, i, dfrag->page, offset);
-		if (!can_coalesce && i >= sysctl_max_skb_frags) {
+		if (!can_coalesce && i >= READ_ONCE(sysctl_max_skb_frags)) {
 			tcp_mark_push(tcp_sk(ssk), skb);
 			goto alloc_skb;
 		}
-- 
2.30.2


^ permalink raw reply related

* [PATCH v4 4/6] virtio_pci: Revert "virtio_pci: support the arg sizes of find_vqs()"
From: Michael S. Tsirkin @ 2022-08-16  5:36 UTC (permalink / raw)
  To: linux-kernel
  Cc: Xuan Zhuo, Jason Wang, David S. Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, virtualization, netdev,
	Linus Torvalds, Jens Axboe, James Bottomley, Martin K. Petersen,
	Guenter Roeck, Greg KH, Andres Freund
In-Reply-To: <20220816053602.173815-1-mst@redhat.com>

This reverts commit cdb44806fca2d0ad29ca644cbf1505433902ee0c: the legacy
path is wrong and in fact can not support the proposed API since for a
legacy device we never communicate the vq size to the hypervisor.

Reported-by: Andres Freund <andres@anarazel.de>
Fixes: cdb44806fca2 ("virtio_pci: support the arg sizes of find_vqs()")
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/virtio/virtio_pci_common.c | 18 ++++++++----------
 drivers/virtio/virtio_pci_common.h |  1 -
 drivers/virtio/virtio_pci_legacy.c |  6 +-----
 drivers/virtio/virtio_pci_modern.c | 10 +++-------
 4 files changed, 12 insertions(+), 23 deletions(-)

diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c
index 00ad476a815d..7ad734584823 100644
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c
@@ -174,7 +174,6 @@ static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors,
 static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned int index,
 				     void (*callback)(struct virtqueue *vq),
 				     const char *name,
-				     u32 size,
 				     bool ctx,
 				     u16 msix_vec)
 {
@@ -187,7 +186,7 @@ static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned int in
 	if (!info)
 		return ERR_PTR(-ENOMEM);
 
-	vq = vp_dev->setup_vq(vp_dev, info, index, callback, name, size, ctx,
+	vq = vp_dev->setup_vq(vp_dev, info, index, callback, name, ctx,
 			      msix_vec);
 	if (IS_ERR(vq))
 		goto out_info;
@@ -284,7 +283,7 @@ void vp_del_vqs(struct virtio_device *vdev)
 
 static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned int nvqs,
 		struct virtqueue *vqs[], vq_callback_t *callbacks[],
-		const char * const names[], u32 sizes[], bool per_vq_vectors,
+		const char * const names[], bool per_vq_vectors,
 		const bool *ctx,
 		struct irq_affinity *desc)
 {
@@ -327,8 +326,8 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned int nvqs,
 		else
 			msix_vec = VP_MSIX_VQ_VECTOR;
 		vqs[i] = vp_setup_vq(vdev, queue_idx++, callbacks[i], names[i],
-				     sizes ? sizes[i] : 0,
-				     ctx ? ctx[i] : false, msix_vec);
+				     ctx ? ctx[i] : false,
+				     msix_vec);
 		if (IS_ERR(vqs[i])) {
 			err = PTR_ERR(vqs[i]);
 			goto error_find;
@@ -358,7 +357,7 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned int nvqs,
 
 static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned int nvqs,
 		struct virtqueue *vqs[], vq_callback_t *callbacks[],
-		const char * const names[], u32 sizes[], const bool *ctx)
+		const char * const names[], const bool *ctx)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 	int i, err, queue_idx = 0;
@@ -380,7 +379,6 @@ static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned int nvqs,
 			continue;
 		}
 		vqs[i] = vp_setup_vq(vdev, queue_idx++, callbacks[i], names[i],
-				     sizes ? sizes[i] : 0,
 				     ctx ? ctx[i] : false,
 				     VIRTIO_MSI_NO_VECTOR);
 		if (IS_ERR(vqs[i])) {
@@ -404,15 +402,15 @@ int vp_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
 	int err;
 
 	/* Try MSI-X with one vector per queue. */
-	err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, sizes, true, ctx, desc);
+	err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, true, ctx, desc);
 	if (!err)
 		return 0;
 	/* Fallback: MSI-X with one vector for config, one shared for queues. */
-	err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, sizes, false, ctx, desc);
+	err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, false, ctx, desc);
 	if (!err)
 		return 0;
 	/* Finally fall back to regular interrupts. */
-	return vp_find_vqs_intx(vdev, nvqs, vqs, callbacks, names, sizes, ctx);
+	return vp_find_vqs_intx(vdev, nvqs, vqs, callbacks, names, ctx);
 }
 
 const char *vp_bus_name(struct virtio_device *vdev)
diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h
index c0448378b698..a5ff838b85a5 100644
--- a/drivers/virtio/virtio_pci_common.h
+++ b/drivers/virtio/virtio_pci_common.h
@@ -80,7 +80,6 @@ struct virtio_pci_device {
 				      unsigned int idx,
 				      void (*callback)(struct virtqueue *vq),
 				      const char *name,
-				      u32 size,
 				      bool ctx,
 				      u16 msix_vec);
 	void (*del_vq)(struct virtio_pci_vq_info *info);
diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c
index d75e5c4e637f..2257f1b3d8ae 100644
--- a/drivers/virtio/virtio_pci_legacy.c
+++ b/drivers/virtio/virtio_pci_legacy.c
@@ -112,7 +112,6 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
 				  unsigned int index,
 				  void (*callback)(struct virtqueue *vq),
 				  const char *name,
-				  u32 size,
 				  bool ctx,
 				  u16 msix_vec)
 {
@@ -126,13 +125,10 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
 	if (!num || vp_legacy_get_queue_enable(&vp_dev->ldev, index))
 		return ERR_PTR(-ENOENT);
 
-	if (!size || size > num)
-		size = num;
-
 	info->msix_vector = msix_vec;
 
 	/* create the vring */
-	vq = vring_create_virtqueue(index, size,
+	vq = vring_create_virtqueue(index, num,
 				    VIRTIO_PCI_VRING_ALIGN, &vp_dev->vdev,
 				    true, false, ctx,
 				    vp_notify, callback, name);
diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c
index f7965c5dd36b..be51ec849252 100644
--- a/drivers/virtio/virtio_pci_modern.c
+++ b/drivers/virtio/virtio_pci_modern.c
@@ -293,7 +293,6 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
 				  unsigned int index,
 				  void (*callback)(struct virtqueue *vq),
 				  const char *name,
-				  u32 size,
 				  bool ctx,
 				  u16 msix_vec)
 {
@@ -311,18 +310,15 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
 	if (!num || vp_modern_get_queue_enable(mdev, index))
 		return ERR_PTR(-ENOENT);
 
-	if (!size || size > num)
-		size = num;
-
-	if (size & (size - 1)) {
-		dev_warn(&vp_dev->pci_dev->dev, "bad queue size %u", size);
+	if (num & (num - 1)) {
+		dev_warn(&vp_dev->pci_dev->dev, "bad queue size %u", num);
 		return ERR_PTR(-EINVAL);
 	}
 
 	info->msix_vector = msix_vec;
 
 	/* create the vring */
-	vq = vring_create_virtqueue(index, size,
+	vq = vring_create_virtqueue(index, num,
 				    SMP_CACHE_BYTES, &vp_dev->vdev,
 				    true, true, ctx,
 				    vp_notify, callback, name);
-- 
MST


^ permalink raw reply related

* [PATCH v4 2/6] virtio: Revert "virtio: add helper virtio_find_vqs_ctx_size()"
From: Michael S. Tsirkin @ 2022-08-16  5:36 UTC (permalink / raw)
  To: linux-kernel
  Cc: Xuan Zhuo, Jason Wang, David S. Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, virtualization, netdev,
	Linus Torvalds, Jens Axboe, James Bottomley, Martin K. Petersen,
	Guenter Roeck, Greg KH
In-Reply-To: <20220816053602.173815-1-mst@redhat.com>

This reverts commit fe3dc04e31aa51f91dc7f741a5f76cc4817eb5b4: the
API is now unused and in fact can't be implemented on top of a legacy
device.

Fixes: fe3dc04e31aa ("virtio: add helper virtio_find_vqs_ctx_size()")
Cc: "Xuan Zhuo" <xuanzhuo@linux.alibaba.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/linux/virtio_config.h | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 6adff09f7170..888f7e96f0c7 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -241,18 +241,6 @@ int virtio_find_vqs_ctx(struct virtio_device *vdev, unsigned nvqs,
 				      ctx, desc);
 }
 
-static inline
-int virtio_find_vqs_ctx_size(struct virtio_device *vdev, u32 nvqs,
-			     struct virtqueue *vqs[],
-			     vq_callback_t *callbacks[],
-			     const char * const names[],
-			     u32 sizes[],
-			     const bool *ctx, struct irq_affinity *desc)
-{
-	return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, sizes,
-				      ctx, desc);
-}
-
 /**
  * virtio_synchronize_cbs - synchronize with virtqueue callbacks
  * @vdev: the device
-- 
MST


^ permalink raw reply related

* [PATCH v4 3/6] virtio-mmio: Revert "virtio_mmio: support the arg sizes of find_vqs()"
From: Michael S. Tsirkin @ 2022-08-16  5:36 UTC (permalink / raw)
  To: linux-kernel
  Cc: Xuan Zhuo, Jason Wang, David S. Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, virtualization, netdev,
	Linus Torvalds, Jens Axboe, James Bottomley, Martin K. Petersen,
	Guenter Roeck, Greg KH
In-Reply-To: <20220816053602.173815-1-mst@redhat.com>

This reverts commit fbed86abba6e0472d98079790e58060e4332608a.
The API is now unused, let's not carry dead code around.

Fixes: fbed86abba6e ("virtio_mmio: support the arg sizes of find_vqs()")
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/virtio/virtio_mmio.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
index c492a57531c6..dfcecfd7aba1 100644
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c
@@ -360,7 +360,7 @@ static void vm_synchronize_cbs(struct virtio_device *vdev)
 
 static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned int index,
 				  void (*callback)(struct virtqueue *vq),
-				  const char *name, u32 size, bool ctx)
+				  const char *name, bool ctx)
 {
 	struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
 	struct virtio_mmio_vq_info *info;
@@ -395,11 +395,8 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned int in
 		goto error_new_virtqueue;
 	}
 
-	if (!size || size > num)
-		size = num;
-
 	/* Create the vring */
-	vq = vring_create_virtqueue(index, size, VIRTIO_MMIO_VRING_ALIGN, vdev,
+	vq = vring_create_virtqueue(index, num, VIRTIO_MMIO_VRING_ALIGN, vdev,
 				 true, true, ctx, vm_notify, callback, name);
 	if (!vq) {
 		err = -ENOMEM;
@@ -503,7 +500,6 @@ static int vm_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
 		}
 
 		vqs[i] = vm_setup_vq(vdev, queue_idx++, callbacks[i], names[i],
-				     sizes ? sizes[i] : 0,
 				     ctx ? ctx[i] : false);
 		if (IS_ERR(vqs[i])) {
 			vm_del_vqs(vdev);
-- 
MST


^ permalink raw reply related

* [PATCH v4 1/6] virtio_net: Revert "virtio_net: set the default max ring size by find_vqs()"
From: Michael S. Tsirkin @ 2022-08-16  5:36 UTC (permalink / raw)
  To: linux-kernel
  Cc: Xuan Zhuo, Jason Wang, David S. Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, virtualization, netdev,
	Linus Torvalds, Jens Axboe, James Bottomley, Martin K. Petersen,
	Guenter Roeck, Greg KH, Andres Freund
In-Reply-To: <20220816053602.173815-1-mst@redhat.com>

This reverts commit 762faee5a2678559d3dc09d95f8f2c54cd0466a7.

This has been reported to trip up guests on GCP (Google Cloud).
The reason is that virtio_find_vqs_ctx_size is broken on legacy
devices. We can in theory fix virtio_find_vqs_ctx_size but
in fact the patch itself has several other issues:

- It treats unknown speed as < 10G
- It leaves userspace no way to find out the ring size set by hypervisor
- It tests speed when link is down
- It ignores the virtio spec advice:
        Both \field{speed} and \field{duplex} can change, thus the driver
        is expected to re-read these values after receiving a
        configuration change notification.
- It is not clear the performance impact has been tested properly

Revert the patch for now.

Reported-by: Andres Freund <andres@anarazel.de>
Link: https://lore.kernel.org/r/20220814212610.GA3690074%40roeck-us.net
Link: https://lore.kernel.org/r/20220815070203.plwjx7b3cyugpdt7%40awork3.anarazel.de
Link: https://lore.kernel.org/r/3df6bb82-1951-455d-a768-e9e1513eb667%40www.fastmail.com
Link: https://lore.kernel.org/r/FCDC5DDE-3CDD-4B8A-916F-CA7D87B547CE%40anarazel.de
Fixes: 762faee5a267 ("virtio_net: set the default max ring size by find_vqs()")
Cc: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Cc: Jason Wang <jasowang@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Tested-by: Andres Freund <andres@anarazel.de>
Tested-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/net/virtio_net.c | 42 ++++------------------------------------
 1 file changed, 4 insertions(+), 38 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index d934774e9733..ece00b84e3a7 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -3432,29 +3432,6 @@ static unsigned int mergeable_min_buf_len(struct virtnet_info *vi, struct virtqu
 		   (unsigned int)GOOD_PACKET_LEN);
 }
 
-static void virtnet_config_sizes(struct virtnet_info *vi, u32 *sizes)
-{
-	u32 i, rx_size, tx_size;
-
-	if (vi->speed == SPEED_UNKNOWN || vi->speed < SPEED_10000) {
-		rx_size = 1024;
-		tx_size = 1024;
-
-	} else if (vi->speed < SPEED_40000) {
-		rx_size = 1024 * 4;
-		tx_size = 1024 * 4;
-
-	} else {
-		rx_size = 1024 * 8;
-		tx_size = 1024 * 8;
-	}
-
-	for (i = 0; i < vi->max_queue_pairs; i++) {
-		sizes[rxq2vq(i)] = rx_size;
-		sizes[txq2vq(i)] = tx_size;
-	}
-}
-
 static int virtnet_find_vqs(struct virtnet_info *vi)
 {
 	vq_callback_t **callbacks;
@@ -3462,7 +3439,6 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
 	int ret = -ENOMEM;
 	int i, total_vqs;
 	const char **names;
-	u32 *sizes;
 	bool *ctx;
 
 	/* We expect 1 RX virtqueue followed by 1 TX virtqueue, followed by
@@ -3490,15 +3466,10 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
 		ctx = NULL;
 	}
 
-	sizes = kmalloc_array(total_vqs, sizeof(*sizes), GFP_KERNEL);
-	if (!sizes)
-		goto err_sizes;
-
 	/* Parameters for control virtqueue, if any */
 	if (vi->has_cvq) {
 		callbacks[total_vqs - 1] = NULL;
 		names[total_vqs - 1] = "control";
-		sizes[total_vqs - 1] = 64;
 	}
 
 	/* Allocate/initialize parameters for send/receive virtqueues */
@@ -3513,10 +3484,8 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
 			ctx[rxq2vq(i)] = true;
 	}
 
-	virtnet_config_sizes(vi, sizes);
-
-	ret = virtio_find_vqs_ctx_size(vi->vdev, total_vqs, vqs, callbacks,
-				       names, sizes, ctx, NULL);
+	ret = virtio_find_vqs_ctx(vi->vdev, total_vqs, vqs, callbacks,
+				  names, ctx, NULL);
 	if (ret)
 		goto err_find;
 
@@ -3536,8 +3505,6 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
 
 
 err_find:
-	kfree(sizes);
-err_sizes:
 	kfree(ctx);
 err_ctx:
 	kfree(names);
@@ -3897,9 +3864,6 @@ static int virtnet_probe(struct virtio_device *vdev)
 		vi->curr_queue_pairs = num_online_cpus();
 	vi->max_queue_pairs = max_queue_pairs;
 
-	virtnet_init_settings(dev);
-	virtnet_update_settings(vi);
-
 	/* Allocate/initialize the rx/tx queues, and invoke find_vqs */
 	err = init_vqs(vi);
 	if (err)
@@ -3912,6 +3876,8 @@ static int virtnet_probe(struct virtio_device *vdev)
 	netif_set_real_num_tx_queues(dev, vi->curr_queue_pairs);
 	netif_set_real_num_rx_queues(dev, vi->curr_queue_pairs);
 
+	virtnet_init_settings(dev);
+
 	if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
 		vi->failover = net_failover_create(vi->dev);
 		if (IS_ERR(vi->failover)) {
-- 
MST


^ permalink raw reply related

* [PATCH v4 0/6] virtio: drop sizing vqs during init
From: Michael S. Tsirkin @ 2022-08-16  5:36 UTC (permalink / raw)
  To: linux-kernel
  Cc: Xuan Zhuo, Jason Wang, David S. Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, virtualization, netdev,
	Linus Torvalds, Jens Axboe, James Bottomley, Martin K. Petersen,
	Guenter Roeck, Greg KH



Supplying size during init does not work for all transports.
In fact for legacy pci doing that causes a memory
corruption which was reported on Google Cloud.

We might get away with changing size to size_hint so it's
safe to ignore and then fixing legacy to ignore the hint.

But the benefit is unclear in any case, so let's revert for now.
Any new version will have to come with
- documentation of performance gains
- performance testing showing existing workflows
  are not harmed materially. especially ones with
  bursty traffic
- report of testing on legacy devices


Huge shout out to Andres Freund for the effort spent reproducing and
debugging!  Thanks to Guenter Roeck for help with testing!


changes from v3
	added a vdpa revert
changes from v2
	drop unrelated patches
changes from v1
	revert the ring size api, it's unused now

Michael S. Tsirkin (5):
  virtio_net: Revert "virtio_net: set the default max ring size by
    find_vqs()"


Michael S. Tsirkin (6):
  virtio_net: Revert "virtio_net: set the default max ring size by
    find_vqs()"


Michael S. Tsirkin (6):
  virtio_net: Revert "virtio_net: set the default max ring size by
    find_vqs()"
  virtio: Revert "virtio: add helper virtio_find_vqs_ctx_size()"
  virtio-mmio: Revert "virtio_mmio: support the arg sizes of find_vqs()"
  virtio_pci: Revert "virtio_pci: support the arg sizes of find_vqs()"
  virtio_vdpa: Revert "virtio_vdpa: support the arg sizes of find_vqs()"
  virtio: Revert "virtio: find_vqs() add arg sizes"

 arch/um/drivers/virtio_uml.c             |  2 +-
 drivers/net/virtio_net.c                 | 42 +++---------------------
 drivers/platform/mellanox/mlxbf-tmfifo.c |  1 -
 drivers/remoteproc/remoteproc_virtio.c   |  1 -
 drivers/s390/virtio/virtio_ccw.c         |  1 -
 drivers/virtio/virtio_mmio.c             |  9 ++---
 drivers/virtio/virtio_pci_common.c       | 20 +++++------
 drivers/virtio/virtio_pci_common.h       |  3 +-
 drivers/virtio/virtio_pci_legacy.c       |  6 +---
 drivers/virtio/virtio_pci_modern.c       | 17 +++-------
 drivers/virtio/virtio_vdpa.c             | 16 ++++-----
 include/linux/virtio_config.h            | 26 +++------------
 12 files changed, 34 insertions(+), 110 deletions(-)

-- 
MST


^ permalink raw reply

* [PATCH v4 5/6] virtio: Revert "virtio_vdpa: support the arg sizes of find_vqs()"
From: Michael S. Tsirkin @ 2022-08-16  5:36 UTC (permalink / raw)
  To: linux-kernel
  Cc: Xuan Zhuo, Jason Wang, David S. Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, virtualization, netdev,
	Linus Torvalds, Jens Axboe, James Bottomley, Martin K. Petersen,
	Guenter Roeck, Greg KH, Bo Liu
In-Reply-To: <20220816053602.173815-1-mst@redhat.com>

This reverts commit 99e8927d8a4da8eb8a8a5904dc13a3156be8e7c0:
proposed API isn't supported on all transports but no
effort was made to address this.

It might not be hard to fix if we want to: maybe just rename size to
size_hint and make sure legacy transports ignore the hint.

But it's not sure what the benefit is in any case, so let's drop it.

Fixes: 99e8927d8a4d ("virtio_vdpa: support the arg sizes of find_vqs()")
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/virtio/virtio_vdpa.c | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/drivers/virtio/virtio_vdpa.c b/drivers/virtio/virtio_vdpa.c
index 9bc4d110b800..832d2c5b1b19 100644
--- a/drivers/virtio/virtio_vdpa.c
+++ b/drivers/virtio/virtio_vdpa.c
@@ -131,7 +131,7 @@ static irqreturn_t virtio_vdpa_virtqueue_cb(void *private)
 static struct virtqueue *
 virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index,
 		     void (*callback)(struct virtqueue *vq),
-		     const char *name, u32 size, bool ctx)
+		     const char *name, bool ctx)
 {
 	struct virtio_vdpa_device *vd_dev = to_virtio_vdpa_device(vdev);
 	struct vdpa_device *vdpa = vd_get_vdpa(vdev);
@@ -168,17 +168,14 @@ virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index,
 		goto error_new_virtqueue;
 	}
 
-	if (!size || size > max_num)
-		size = max_num;
-
 	if (ops->get_vq_num_min)
 		min_num = ops->get_vq_num_min(vdpa);
 
-	may_reduce_num = (size == min_num) ? false : true;
+	may_reduce_num = (max_num == min_num) ? false : true;
 
 	/* Create the vring */
 	align = ops->get_vq_align(vdpa);
-	vq = vring_create_virtqueue(index, size, align, vdev,
+	vq = vring_create_virtqueue(index, max_num, align, vdev,
 				    true, may_reduce_num, ctx,
 				    virtio_vdpa_notify, callback, name);
 	if (!vq) {
@@ -288,9 +285,9 @@ static int virtio_vdpa_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
 			continue;
 		}
 
-		vqs[i] = virtio_vdpa_setup_vq(vdev, queue_idx++, callbacks[i],
-						  names[i], sizes ? sizes[i] : 0,
-						  ctx ? ctx[i] : false);
+		vqs[i] = virtio_vdpa_setup_vq(vdev, queue_idx++,
+					      callbacks[i], names[i], ctx ?
+					      ctx[i] : false);
 		if (IS_ERR(vqs[i])) {
 			err = PTR_ERR(vqs[i]);
 			goto err_setup_vq;
-- 
MST


^ permalink raw reply related

* [PATCH v4 5/6] virtio_vdpa: Revert "virtio_vdpa: support the arg sizes of find_vqs()"
From: Michael S. Tsirkin @ 2022-08-16  5:36 UTC (permalink / raw)
  To: linux-kernel
  Cc: Xuan Zhuo, Jason Wang, David S. Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, virtualization, netdev,
	Linus Torvalds, Jens Axboe, James Bottomley, Martin K. Petersen,
	Guenter Roeck, Greg KH, Bo Liu
In-Reply-To: <20220816053602.173815-1-mst@redhat.com>

This reverts commit 99e8927d8a4da8eb8a8a5904dc13a3156be8e7c0:
proposed API isn't supported on all transports but no
effort was made to address this.

It might not be hard to fix if we want to: maybe just rename size to
size_hint and make sure legacy transports ignore the hint.

But it's not sure what the benefit is in any case, so let's drop it.

Fixes: 99e8927d8a4d ("virtio_vdpa: support the arg sizes of find_vqs()")
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/virtio/virtio_vdpa.c | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/drivers/virtio/virtio_vdpa.c b/drivers/virtio/virtio_vdpa.c
index 9bc4d110b800..832d2c5b1b19 100644
--- a/drivers/virtio/virtio_vdpa.c
+++ b/drivers/virtio/virtio_vdpa.c
@@ -131,7 +131,7 @@ static irqreturn_t virtio_vdpa_virtqueue_cb(void *private)
 static struct virtqueue *
 virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index,
 		     void (*callback)(struct virtqueue *vq),
-		     const char *name, u32 size, bool ctx)
+		     const char *name, bool ctx)
 {
 	struct virtio_vdpa_device *vd_dev = to_virtio_vdpa_device(vdev);
 	struct vdpa_device *vdpa = vd_get_vdpa(vdev);
@@ -168,17 +168,14 @@ virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index,
 		goto error_new_virtqueue;
 	}
 
-	if (!size || size > max_num)
-		size = max_num;
-
 	if (ops->get_vq_num_min)
 		min_num = ops->get_vq_num_min(vdpa);
 
-	may_reduce_num = (size == min_num) ? false : true;
+	may_reduce_num = (max_num == min_num) ? false : true;
 
 	/* Create the vring */
 	align = ops->get_vq_align(vdpa);
-	vq = vring_create_virtqueue(index, size, align, vdev,
+	vq = vring_create_virtqueue(index, max_num, align, vdev,
 				    true, may_reduce_num, ctx,
 				    virtio_vdpa_notify, callback, name);
 	if (!vq) {
@@ -288,9 +285,9 @@ static int virtio_vdpa_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
 			continue;
 		}
 
-		vqs[i] = virtio_vdpa_setup_vq(vdev, queue_idx++, callbacks[i],
-						  names[i], sizes ? sizes[i] : 0,
-						  ctx ? ctx[i] : false);
+		vqs[i] = virtio_vdpa_setup_vq(vdev, queue_idx++,
+					      callbacks[i], names[i], ctx ?
+					      ctx[i] : false);
 		if (IS_ERR(vqs[i])) {
 			err = PTR_ERR(vqs[i]);
 			goto err_setup_vq;
-- 
MST


^ permalink raw reply related

* [PATCH v4 6/6] virtio: Revert "virtio: find_vqs() add arg sizes"
From: Michael S. Tsirkin @ 2022-08-16  5:36 UTC (permalink / raw)
  To: linux-kernel
  Cc: Xuan Zhuo, Jason Wang, David S. Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, virtualization, netdev,
	Linus Torvalds, Jens Axboe, James Bottomley, Martin K. Petersen,
	Guenter Roeck, Greg KH, Richard Weinberger, Anton Ivanov,
	Johannes Berg, Hans de Goede, Mark Gross, Vadim Pasternak,
	Bjorn Andersson, Mathieu Poirier, Cornelia Huck, Halil Pasic,
	Eric Farman, Heiko Carstens, Vasily Gorbik, Alexander Gordeev,
	Christian Borntraeger, Sven Schnelle, linux-um,
	platform-driver-x86, linux-remoteproc, linux-s390, kvm
In-Reply-To: <20220816053602.173815-1-mst@redhat.com>

This reverts commit a10fba0377145fccefea4dc4dd5915b7ed87e546: the
proposed API isn't supported on all transports but no
effort was made to address this.

It might not be hard to fix if we want to: maybe just
rename size to size_hint and make sure legacy
transports ignore the hint.

But it's not sure what the benefit is in any case, so
let's drop it.

Fixes: a10fba037714 ("virtio: find_vqs() add arg sizes")
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 arch/um/drivers/virtio_uml.c             |  2 +-
 drivers/platform/mellanox/mlxbf-tmfifo.c |  1 -
 drivers/remoteproc/remoteproc_virtio.c   |  1 -
 drivers/s390/virtio/virtio_ccw.c         |  1 -
 drivers/virtio/virtio_mmio.c             |  1 -
 drivers/virtio/virtio_pci_common.c       |  2 +-
 drivers/virtio/virtio_pci_common.h       |  2 +-
 drivers/virtio/virtio_pci_modern.c       |  7 ++-----
 drivers/virtio/virtio_vdpa.c             |  1 -
 include/linux/virtio_config.h            | 14 +++++---------
 10 files changed, 10 insertions(+), 22 deletions(-)

diff --git a/arch/um/drivers/virtio_uml.c b/arch/um/drivers/virtio_uml.c
index 79e38afd4b91..e719af8bdf56 100644
--- a/arch/um/drivers/virtio_uml.c
+++ b/arch/um/drivers/virtio_uml.c
@@ -1011,7 +1011,7 @@ static struct virtqueue *vu_setup_vq(struct virtio_device *vdev,
 
 static int vu_find_vqs(struct virtio_device *vdev, unsigned nvqs,
 		       struct virtqueue *vqs[], vq_callback_t *callbacks[],
-		       const char * const names[], u32 sizes[], const bool *ctx,
+		       const char * const names[], const bool *ctx,
 		       struct irq_affinity *desc)
 {
 	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
diff --git a/drivers/platform/mellanox/mlxbf-tmfifo.c b/drivers/platform/mellanox/mlxbf-tmfifo.c
index 8be13d416f48..1ae3c56b66b0 100644
--- a/drivers/platform/mellanox/mlxbf-tmfifo.c
+++ b/drivers/platform/mellanox/mlxbf-tmfifo.c
@@ -928,7 +928,6 @@ static int mlxbf_tmfifo_virtio_find_vqs(struct virtio_device *vdev,
 					struct virtqueue *vqs[],
 					vq_callback_t *callbacks[],
 					const char * const names[],
-					u32 sizes[],
 					const bool *ctx,
 					struct irq_affinity *desc)
 {
diff --git a/drivers/remoteproc/remoteproc_virtio.c b/drivers/remoteproc/remoteproc_virtio.c
index 81c4f5776109..0f7706e23eb9 100644
--- a/drivers/remoteproc/remoteproc_virtio.c
+++ b/drivers/remoteproc/remoteproc_virtio.c
@@ -158,7 +158,6 @@ static int rproc_virtio_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
 				 struct virtqueue *vqs[],
 				 vq_callback_t *callbacks[],
 				 const char * const names[],
-				 u32 sizes[],
 				 const bool * ctx,
 				 struct irq_affinity *desc)
 {
diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c
index 896896e32664..a10dbe632ef9 100644
--- a/drivers/s390/virtio/virtio_ccw.c
+++ b/drivers/s390/virtio/virtio_ccw.c
@@ -637,7 +637,6 @@ static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs,
 			       struct virtqueue *vqs[],
 			       vq_callback_t *callbacks[],
 			       const char * const names[],
-			       u32 sizes[],
 			       const bool *ctx,
 			       struct irq_affinity *desc)
 {
diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
index dfcecfd7aba1..3ff746e3f24a 100644
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c
@@ -474,7 +474,6 @@ static int vm_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
 		       struct virtqueue *vqs[],
 		       vq_callback_t *callbacks[],
 		       const char * const names[],
-		       u32 sizes[],
 		       const bool *ctx,
 		       struct irq_affinity *desc)
 {
diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c
index 7ad734584823..ad258a9d3b9f 100644
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c
@@ -396,7 +396,7 @@ static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned int nvqs,
 /* the config->find_vqs() implementation */
 int vp_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
 		struct virtqueue *vqs[], vq_callback_t *callbacks[],
-		const char * const names[], u32 sizes[], const bool *ctx,
+		const char * const names[], const bool *ctx,
 		struct irq_affinity *desc)
 {
 	int err;
diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h
index a5ff838b85a5..23112d84218f 100644
--- a/drivers/virtio/virtio_pci_common.h
+++ b/drivers/virtio/virtio_pci_common.h
@@ -110,7 +110,7 @@ void vp_del_vqs(struct virtio_device *vdev);
 /* the config->find_vqs() implementation */
 int vp_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
 		struct virtqueue *vqs[], vq_callback_t *callbacks[],
-		const char * const names[], u32 sizes[], const bool *ctx,
+		const char * const names[], const bool *ctx,
 		struct irq_affinity *desc);
 const char *vp_bus_name(struct virtio_device *vdev);
 
diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c
index be51ec849252..c3b9f2761849 100644
--- a/drivers/virtio/virtio_pci_modern.c
+++ b/drivers/virtio/virtio_pci_modern.c
@@ -347,15 +347,12 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
 static int vp_modern_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
 			      struct virtqueue *vqs[],
 			      vq_callback_t *callbacks[],
-			      const char * const names[],
-			      u32 sizes[],
-			      const bool *ctx,
+			      const char * const names[], const bool *ctx,
 			      struct irq_affinity *desc)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 	struct virtqueue *vq;
-	int rc = vp_find_vqs(vdev, nvqs, vqs, callbacks, names, sizes, ctx,
-			     desc);
+	int rc = vp_find_vqs(vdev, nvqs, vqs, callbacks, names, ctx, desc);
 
 	if (rc)
 		return rc;
diff --git a/drivers/virtio/virtio_vdpa.c b/drivers/virtio/virtio_vdpa.c
index 832d2c5b1b19..9670cc79371d 100644
--- a/drivers/virtio/virtio_vdpa.c
+++ b/drivers/virtio/virtio_vdpa.c
@@ -269,7 +269,6 @@ static int virtio_vdpa_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
 				struct virtqueue *vqs[],
 				vq_callback_t *callbacks[],
 				const char * const names[],
-				u32 sizes[],
 				const bool *ctx,
 				struct irq_affinity *desc)
 {
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 888f7e96f0c7..36ec7be1f480 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -55,7 +55,6 @@ struct virtio_shm_region {
  *		include a NULL entry for vqs that do not need a callback
  *	names: array of virtqueue names (mainly for debugging)
  *		include a NULL entry for vqs unused by driver
- *	sizes: array of virtqueue sizes
  *	Returns 0 on success or error status
  * @del_vqs: free virtqueues found by find_vqs().
  * @synchronize_cbs: synchronize with the virtqueue callbacks (optional)
@@ -104,9 +103,7 @@ struct virtio_config_ops {
 	void (*reset)(struct virtio_device *vdev);
 	int (*find_vqs)(struct virtio_device *, unsigned nvqs,
 			struct virtqueue *vqs[], vq_callback_t *callbacks[],
-			const char * const names[],
-			u32 sizes[],
-			const bool *ctx,
+			const char * const names[], const bool *ctx,
 			struct irq_affinity *desc);
 	void (*del_vqs)(struct virtio_device *);
 	void (*synchronize_cbs)(struct virtio_device *);
@@ -215,7 +212,7 @@ struct virtqueue *virtio_find_single_vq(struct virtio_device *vdev,
 	const char *names[] = { n };
 	struct virtqueue *vq;
 	int err = vdev->config->find_vqs(vdev, 1, &vq, callbacks, names, NULL,
-					 NULL, NULL);
+					 NULL);
 	if (err < 0)
 		return ERR_PTR(err);
 	return vq;
@@ -227,8 +224,7 @@ int virtio_find_vqs(struct virtio_device *vdev, unsigned nvqs,
 			const char * const names[],
 			struct irq_affinity *desc)
 {
-	return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, NULL,
-				      NULL, desc);
+	return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, NULL, desc);
 }
 
 static inline
@@ -237,8 +233,8 @@ int virtio_find_vqs_ctx(struct virtio_device *vdev, unsigned nvqs,
 			const char * const names[], const bool *ctx,
 			struct irq_affinity *desc)
 {
-	return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, NULL,
-				      ctx, desc);
+	return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, ctx,
+				      desc);
 }
 
 /**
-- 
MST


^ permalink raw reply related

* KASAN: slab-out-of-bounds in ipvlan_queue_xmit
From: Jiacheng Xu @ 2022-08-16  5:37 UTC (permalink / raw)
  To: linux-kernel, davem, edumazet; +Cc: security, netdev

Hello,

When using modified Syzkaller to fuzz the latest Linux kernel, the
following crash was triggered.

HEAD commit: 3d7cb6b04c3f Linux-5.19
git tree: upstream

kernel config: https://drive.google.com/file/d/1wgIUDwP5ho29AM-K7HhysSTfWFpfXYkG/view?usp=sharing
syz repro: https://drive.google.com/file/d/1GcF5GvQ2LkFCm_ij_gCwb2qOUSAbdzRT/view?usp=sharing
C reproducer: https://drive.google.com/file/d/1XcEYzdr7cGzdjP8epEUe2HMJ0DgbZRWl/view?usp=sharing

Environment:
Ubuntu 20.04 on Linux 5.4.0
QEMU 4.2.1:
qemu-system-x86_64 \
  -m 2G \
  -smp 2 \
  -kernel /home/workdir/bzImage \
  -append "console=ttyS0 root=/dev/sda earlyprintk=serial net.ifnames=0" \
  -drive file=/home/workdir/stretch.img,format=raw \
  -net user,host=10.0.2.10,hostfwd=tcp:127.0.0.1:10021-:22 \
  -net nic,model=e1000 \
  -enable-kvm \
  -nographic \
  -pidfile vm.pid \
  2>&1 | tee vm.log

If you fix this issue, please add the following tag to the commit:
Credits to Jiacheng Xu<stitch@zju.edu.cn>

==================================================================
BUG: KASAN: slab-out-of-bounds in ipvlan_queue_xmit+0x16c0/0x1950
Read of size 4 at addr ffff888042046fff by task repro/6447

CPU: 1 PID: 6447 Comm: repro Not tainted 5.19.0 #2
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 4
Call Trace:
<TASK>
dump_stack_lvl+0xcd/0x134
print_report.cold+0xe5/0x659
? ipvlan_queue_xmit+0x16c0/0x1950
kasan_report+0x8a/0x1b0
? audit_kill_trees+0x2f0/0x300
? ipvlan_queue_xmit+0x16c0/0x1950
ipvlan_queue_xmit+0x16c0/0x1950
? ipvlan_handle_mode_l3+0x120/0x120
? skb_csum_hwoffload_help+0x1a0/0x1a0
? __sanitizer_cov_trace_pc+0x1a/0x40
? validate_xmit_xfrm+0x49d/0x10a0
? __sanitizer_cov_trace_pc+0x1a/0x40
? netif_skb_features+0x45b/0xbb0
? __sanitizer_cov_trace_pc+0x1a/0x40
? validate_xmit_skb+0x878/0xec0
? __sanitizer_cov_trace_pc+0x1a/0x40
ipvlan_start_xmit+0x45/0x190
__dev_direct_xmit+0x42d/0x630
? validate_xmit_skb_list+0x140/0x140
? packet_poll+0x4d0/0x4d0
? __sanitizer_cov_trace_pc+0x1a/0x40
? netdev_pick_tx+0x14f/0xbe0
packet_direct_xmit+0x1b8/0x2b0
packet_sendmsg+0x223e/0x4d50
? __sanitizer_cov_trace_pc+0x1a/0x40
? aa_label_sk_perm+0x89/0xe0
? __sanitizer_cov_trace_pc+0x1a/0x40
? aa_sk_perm+0x30f/0xa90
? tpacket_rcv+0x32c0/0x32c0
? aa_af_perm+0x230/0x230
? __sanitizer_cov_trace_pc+0x1a/0x40
? __sanitizer_cov_trace_pc+0x1a/0x40
? tpacket_rcv+0x32c0/0x32c0
sock_sendmsg+0xc3/0x120
__sys_sendto+0x21a/0x330
? __ia32_sys_getpeername+0xb0/0xb0
? x86_pmu_start+0x30/0x270
? syscall_enter_from_user_mode+0x1c/0x70
? rcu_read_lock_sched_held+0x9c/0xd0
? rcu_read_lock_bh_held+0xb0/0xb0
__x64_sys_sendto+0xdd/0x1b0
? lockdep_hardirqs_on+0x79/0x100
? syscall_enter_from_user_mode+0x21/0x70
do_syscall_64+0x35/0xb0
entry_SYSCALL_64_after_hwframe+0x63/0xcd
RIP: 0033:0x7f2dc0ae4469
Code: 00 f3 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 8
RSP: 002b:00007fff157b1d38 EFLAGS: 00000216 ORIG_RAX: 000000000000002c
RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f2dc0ae4469
RDX: 000000000000000e RSI: 0000000020000100 RDI: 0000000000000005
RBP: 00007fff157b1d60 R08: 0000000020000140 R09: 0000000000000014
R10: 0000000000000000 R11: 0000000000000216 R12: 000055dc98e00f10
R13: 00007fff157b1e70 R14: 0000000000000000 R15: 0000000000000000
</TASK>
Allocated by task 0:
(stack is not available)
The buggy address belongs to the object at ffff888042046000
which belongs to the cache kmalloc-2k of size 2048
The buggy address is located 2047 bytes to the right of
2048-byte region [ffff888042046000, ffff888042046800)

The buggy address belongs to the physical page:
page:ffffea0001081000 refcount:1 mapcount:0 mapping:0000000000000000 index:0x00
head:ffffea0001081000 order:3 compound_mapcount:0 compound_pincount:0
flags: 0x4fff00000010200(slab|head|node=1|zone=1|lastcpupid=0x7ff)
raw: 04fff00000010200 0000000000000000 dead000000000122 ffff888011842000
raw: 0000000000000000 0000000080080008 00000001ffffffff 0000000000000000
page dumped because: kasan: bad access detected
page_owner tracks the page as allocated
page last allocated via order 3, migratetype Unmovable, gfp_mask 0xd2a20(GFP_A4
prep_new_page+0x297/0x330
get_page_from_freelist+0x215f/0x3c50
__alloc_pages+0x321/0x710
alloc_pages+0x119/0x250
new_slab+0x2a9/0x3f0
___slab_alloc+0xd5a/0x1140
__slab_alloc.isra.0+0x4d/0xa0
__kmalloc_node_track_caller+0x321/0x440
kmalloc_reserve+0x32/0xd0
pskb_expand_head+0x148/0x1060
netlink_trim+0x1ea/0x240
netlink_broadcast+0x5b/0xd00
nlmsg_notify+0x8f/0x280
rtmsg_ifinfo_event.part.0+0xb6/0xe0
rtmsg_ifinfo+0x7f/0xa0
__dev_notify_flags+0x235/0x2c0
page last free stack trace:
free_pcp_prepare+0x51f/0xd00
free_unref_page+0x19/0x5b0
__unfreeze_partials+0x3d2/0x3f0
___cache_free+0x12c/0x140
qlist_free_all+0x6a/0x170
kasan_quarantine_reduce+0x13d/0x180
__kasan_slab_alloc+0xa2/0xc0
slab_post_alloc_hook+0x4d/0x4f0
kmem_cache_alloc+0x1be/0x460
getname_flags+0xd2/0x5b0
vfs_fstatat+0x73/0xb0
__do_sys_newlstat+0x8b/0x110
do_syscall_64+0x35/0xb0
entry_SYSCALL_64_after_hwframe+0x63/0xcd

Memory state around the buggy address:
ffff888042046e80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
ffff888042046f00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
>ffff888042046f80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
                                                                ^
ffff888042047000: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
ffff888042047080: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
 ==================================================================
Kernel panic - not syncing: panic_on_warn set ...
CPU: 1 PID: 6447 Comm: repro Not tainted 5.19.0 #2
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 4
Call Trace:
<TASK>
dump_stack_lvl+0xcd/0x134
panic+0x2d7/0x636
? panic_print_sys_info.part.0+0x10b/0x10b
? asm_sysvec_apic_timer_interrupt+0x16/0x20
? ipvlan_queue_xmit+0x16c0/0x1950
end_report.part.0+0x3f/0x7c
kasan_report.cold+0x8/0x12
? audit_kill_trees+0x2f0/0x300
? ipvlan_queue_xmit+0x16c0/0x1950
ipvlan_queue_xmit+0x16c0/0x1950
? ipvlan_handle_mode_l3+0x120/0x120
? skb_csum_hwoffload_help+0x1a0/0x1a0
? __sanitizer_cov_trace_pc+0x1a/0x40
? validate_xmit_xfrm+0x49d/0x10a0
? __sanitizer_cov_trace_pc+0x1a/0x40
? netif_skb_features+0x45b/0xbb0
? __sanitizer_cov_trace_pc+0x1a/0x40
? validate_xmit_skb+0x878/0xec0
? __sanitizer_cov_trace_pc+0x1a/0x40
ipvlan_start_xmit+0x45/0x190
__dev_direct_xmit+0x42d/0x630
? validate_xmit_skb_list+0x140/0x140
? packet_poll+0x4d0/0x4d0
? __sanitizer_cov_trace_pc+0x1a/0x40
? netdev_pick_tx+0x14f/0xbe0
packet_direct_xmit+0x1b8/0x2b0
packet_sendmsg+0x223e/0x4d50
? __sanitizer_cov_trace_pc+0x1a/0x40
? aa_label_sk_perm+0x89/0xe0
? __sanitizer_cov_trace_pc+0x1a/0x40
? aa_sk_perm+0x30f/0xa90
? tpacket_rcv+0x32c0/0x32c0
? aa_af_perm+0x230/0x230
? __sanitizer_cov_trace_pc+0x1a/0x40
? __sanitizer_cov_trace_pc+0x1a/0x40
? tpacket_rcv+0x32c0/0x32c0
sock_sendmsg+0xc3/0x120
__sys_sendto+0x21a/0x330
? __ia32_sys_getpeername+0xb0/0xb0
? x86_pmu_start+0x30/0x270
? syscall_enter_from_user_mode+0x1c/0x70
? rcu_read_lock_sched_held+0x9c/0xd0
? rcu_read_lock_bh_held+0xb0/0xb0
__x64_sys_sendto+0xdd/0x1b0
? lockdep_hardirqs_on+0x79/0x100
? syscall_enter_from_user_mode+0x21/0x70
do_syscall_64+0x35/0xb0
entry_SYSCALL_64_after_hwframe+0x63/0xcd
RIP: 0033:0x7f2dc0ae4469
Code: 00 f3 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 8
RSP: 002b:00007fff157b1d38 EFLAGS: 00000216 ORIG_RAX: 000000000000002c
RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f2dc0ae4469
RDX: 000000000000000e RSI: 0000000020000100 RDI: 0000000000000005
RBP: 00007fff157b1d60 R08: 0000000020000140 R09: 0000000000000014
R10: 0000000000000000 R11: 0000000000000216 R12: 000055dc98e00f10
R13: 00007fff157b1e70 R14: 0000000000000000 R15: 0000000000000000
</TASK>

Best Regards,
Jiacheng

^ permalink raw reply

* [PATCH v4 1/3] dt-bindings: net: ti: k3-am654-cpsw-nuss: Update bindings for J7200 CPSW5G
From: Siddharth Vadapalli @ 2022-08-16  6:01 UTC (permalink / raw)
  To: davem, edumazet, kuba, pabeni, robh+dt, krzysztof.kozlowski+dt,
	linux, vladimir.oltean, grygorii.strashko, vigneshr, nsekhar
  Cc: netdev, devicetree, linux-kernel, kishon, s-vadapalli
In-Reply-To: <20220816060139.111934-1-s-vadapalli@ti.com>

Update bindings for TI K3 J7200 SoC which contains 5 ports (4 external
ports) CPSW5G module and add compatible for it.

Changes made:
    - Add new compatible ti,j7200-cpswxg-nuss for CPSW5G.
    - Extend pattern properties for new compatible.
    - Change maximum number of CPSW ports to 4 for new compatible.

Signed-off-by: Siddharth Vadapalli <s-vadapalli@ti.com>
---
 .../bindings/net/ti,k3-am654-cpsw-nuss.yaml     | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml b/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml
index b8281d8be940..5366a367c387 100644
--- a/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml
+++ b/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml
@@ -57,6 +57,7 @@ properties:
       - ti,am654-cpsw-nuss
       - ti,j721e-cpsw-nuss
       - ti,am642-cpsw-nuss
+      - ti,j7200-cpswxg-nuss
 
   reg:
     maxItems: 1
@@ -110,7 +111,7 @@ properties:
         const: 0
 
     patternProperties:
-      port@[1-2]:
+      "^port@[1-4]$":
         type: object
         description: CPSWxG NUSS external ports
 
@@ -119,7 +120,7 @@ properties:
         properties:
           reg:
             minimum: 1
-            maximum: 2
+            maximum: 4
             description: CPSW port number
 
           phys:
@@ -151,6 +152,18 @@ properties:
 
     additionalProperties: false
 
+if:
+  not:
+    properties:
+      compatible:
+        contains:
+          const: ti,j7200-cpswxg-nuss
+then:
+  properties:
+    ethernet-ports:
+      patternProperties:
+        "^port@[3-4]$": false
+
 patternProperties:
   "^mdio@[0-9a-f]+$":
     type: object
-- 
2.25.1


^ permalink raw reply related

* [PATCH v4 3/3] net: ethernet: ti: am65-cpsw: Move phy_set_mode_ext() to correct location
From: Siddharth Vadapalli @ 2022-08-16  6:01 UTC (permalink / raw)
  To: davem, edumazet, kuba, pabeni, robh+dt, krzysztof.kozlowski+dt,
	linux, vladimir.oltean, grygorii.strashko, vigneshr, nsekhar
  Cc: netdev, devicetree, linux-kernel, kishon, s-vadapalli
In-Reply-To: <20220816060139.111934-1-s-vadapalli@ti.com>

In TI's J7200 SoC CPSW5G ports, each of the 4 ports can be configured
as a QSGMII main or QSGMII-SUB port. This configuration is performed
by phy-gmii-sel driver on invoking the phy_set_mode_ext() function.

It is necessary for the QSGMII main port to be configured before any of
the QSGMII-SUB interfaces are brought up. Currently, the QSGMII-SUB
interfaces come up before the QSGMII main port is configured.

Fix this by moving the call to phy_set_mode_ext() from
am65_cpsw_nuss_ndo_slave_open() to am65_cpsw_nuss_init_slave_ports(),
thereby ensuring that the QSGMII main port is configured before any of
the QSGMII-SUB ports are brought up.

Signed-off-by: Siddharth Vadapalli <s-vadapalli@ti.com>
---
 drivers/net/ethernet/ti/am65-cpsw-nuss.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
index 033b40649308..7ef5d8208a4e 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c
+++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
@@ -593,11 +593,6 @@ static int am65_cpsw_nuss_ndo_slave_open(struct net_device *ndev)
 	/* mac_sl should be configured via phy-link interface */
 	am65_cpsw_sl_ctl_reset(port);
 
-	ret = phy_set_mode_ext(port->slave.ifphy, PHY_MODE_ETHERNET,
-			       port->slave.phy_if);
-	if (ret)
-		goto error_cleanup;
-
 	ret = phylink_of_phy_connect(port->slave.phylink, port->slave.phy_node, 0);
 	if (ret)
 		goto error_cleanup;
@@ -1898,6 +1893,10 @@ static int am65_cpsw_nuss_init_slave_ports(struct am65_cpsw_common *common)
 			goto of_node_put;
 		}
 
+		ret = phy_set_mode_ext(port->slave.ifphy, PHY_MODE_ETHERNET, port->slave.phy_if);
+		if (ret)
+			goto of_node_put;
+
 		ret = of_get_mac_address(port_np, port->slave.mac_addr);
 		if (ret) {
 			am65_cpsw_am654_get_efuse_macid(port_np,
-- 
2.25.1


^ permalink raw reply related

* [PATCH v4 2/3] net: ethernet: ti: am65-cpsw: Add support for J7200 CPSW5G
From: Siddharth Vadapalli @ 2022-08-16  6:01 UTC (permalink / raw)
  To: davem, edumazet, kuba, pabeni, robh+dt, krzysztof.kozlowski+dt,
	linux, vladimir.oltean, grygorii.strashko, vigneshr, nsekhar
  Cc: netdev, devicetree, linux-kernel, kishon, s-vadapalli
In-Reply-To: <20220816060139.111934-1-s-vadapalli@ti.com>

CPSW5G in J7200 supports additional modes like QSGMII and SGMII.
Add new compatible for J7200 and enable QSGMII mode in am65-cpsw driver.

Signed-off-by: Siddharth Vadapalli <s-vadapalli@ti.com>
---
 drivers/net/ethernet/ti/am65-cpsw-nuss.c | 35 ++++++++++++++++++++++--
 drivers/net/ethernet/ti/am65-cpsw-nuss.h |  2 ++
 2 files changed, 35 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
index f4a6b590a1e3..033b40649308 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c
+++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
@@ -74,6 +74,9 @@
 #define AM65_CPSW_PORTN_REG_TS_VLAN_LTYPE_REG	0x318
 #define AM65_CPSW_PORTN_REG_TS_CTL_LTYPE2       0x31C
 
+#define AM65_CPSW_SGMII_CONTROL_REG		0x010
+#define AM65_CPSW_SGMII_CONTROL_MR_AN_ENABLE	BIT(0)
+
 #define AM65_CPSW_CTL_VLAN_AWARE		BIT(1)
 #define AM65_CPSW_CTL_P0_ENABLE			BIT(2)
 #define AM65_CPSW_CTL_P0_TX_CRC_REMOVE		BIT(13)
@@ -1409,7 +1412,14 @@ static const struct net_device_ops am65_cpsw_nuss_netdev_ops = {
 static void am65_cpsw_nuss_mac_config(struct phylink_config *config, unsigned int mode,
 				      const struct phylink_link_state *state)
 {
-	/* Currently not used */
+	struct am65_cpsw_slave_data *slave = container_of(config, struct am65_cpsw_slave_data,
+							  phylink_config);
+	struct am65_cpsw_port *port = container_of(slave, struct am65_cpsw_port, slave);
+	struct am65_cpsw_common *common = port->common;
+
+	if (common->pdata.extra_modes & BIT(state->interface))
+		writel(AM65_CPSW_SGMII_CONTROL_MR_AN_ENABLE,
+		       port->sgmii_base + AM65_CPSW_SGMII_CONTROL_REG);
 }
 
 static void am65_cpsw_nuss_mac_link_down(struct phylink_config *config, unsigned int mode,
@@ -1847,6 +1857,8 @@ static int am65_cpsw_nuss_init_slave_ports(struct am65_cpsw_common *common)
 		port->common = common;
 		port->port_base = common->cpsw_base + AM65_CPSW_NU_PORTS_BASE +
 				  AM65_CPSW_NU_PORTS_OFFSET * (port_id);
+		if (common->pdata.extra_modes)
+			port->sgmii_base = common->ss_base + AM65_CPSW_SGMII_BASE * (port_id);
 		port->stat_base = common->cpsw_base + AM65_CPSW_NU_STATS_BASE +
 				  (AM65_CPSW_NU_STATS_PORT_OFFSET * port_id);
 		port->name = of_get_property(port_np, "label", NULL);
@@ -1981,7 +1993,18 @@ am65_cpsw_nuss_init_port_ndev(struct am65_cpsw_common *common, u32 port_idx)
 	port->slave.phylink_config.type = PHYLINK_NETDEV;
 	port->slave.phylink_config.mac_capabilities = MAC_SYM_PAUSE | MAC_10 | MAC_100 | MAC_1000FD;
 
-	phy_interface_set_rgmii(port->slave.phylink_config.supported_interfaces);
+	if (phy_interface_mode_is_rgmii(port->slave.phy_if)) {
+		phy_interface_set_rgmii(port->slave.phylink_config.supported_interfaces);
+	} else if (port->slave.phy_if == PHY_INTERFACE_MODE_RMII) {
+		__set_bit(PHY_INTERFACE_MODE_RMII,
+			  port->slave.phylink_config.supported_interfaces);
+	} else if (common->pdata.extra_modes & BIT(port->slave.phy_if)) {
+		__set_bit(PHY_INTERFACE_MODE_QSGMII,
+			  port->slave.phylink_config.supported_interfaces);
+	} else {
+		dev_err(dev, "selected phy-mode is not supported\n");
+		return -EOPNOTSUPP;
+	}
 
 	phylink = phylink_create(&port->slave.phylink_config,
 				 of_node_to_fwnode(port->slave.phy_node),
@@ -2611,10 +2634,18 @@ static const struct am65_cpsw_pdata am64x_cpswxg_pdata = {
 	.fdqring_mode = K3_RINGACC_RING_MODE_RING,
 };
 
+static const struct am65_cpsw_pdata j7200_cpswxg_pdata = {
+	.quirks = 0,
+	.ale_dev_id = "am64-cpswxg",
+	.fdqring_mode = K3_RINGACC_RING_MODE_RING,
+	.extra_modes = BIT(PHY_INTERFACE_MODE_QSGMII),
+};
+
 static const struct of_device_id am65_cpsw_nuss_of_mtable[] = {
 	{ .compatible = "ti,am654-cpsw-nuss", .data = &am65x_sr1_0},
 	{ .compatible = "ti,j721e-cpsw-nuss", .data = &j721e_pdata},
 	{ .compatible = "ti,am642-cpsw-nuss", .data = &am64x_cpswxg_pdata},
+	{ .compatible = "ti,j7200-cpswxg-nuss", .data = &j7200_cpswxg_pdata},
 	{ /* sentinel */ },
 };
 MODULE_DEVICE_TABLE(of, am65_cpsw_nuss_of_mtable);
diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.h b/drivers/net/ethernet/ti/am65-cpsw-nuss.h
index ac945631bf2f..2c9850fdfcb6 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.h
+++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.h
@@ -46,6 +46,7 @@ struct am65_cpsw_port {
 	const char			*name;
 	u32				port_id;
 	void __iomem			*port_base;
+	void __iomem			*sgmii_base;
 	void __iomem			*stat_base;
 	void __iomem			*fetch_ram_base;
 	bool				disabled;
@@ -88,6 +89,7 @@ struct am65_cpsw_rx_chn {
 
 struct am65_cpsw_pdata {
 	u32	quirks;
+	u64	extra_modes;
 	enum k3_ring_mode fdqring_mode;
 	const char	*ale_dev_id;
 };
-- 
2.25.1


^ permalink raw reply related

* [PATCH v4 0/3] J7200: CPSW5G: Add support for QSGMII mode to am65-cpsw driver
From: Siddharth Vadapalli @ 2022-08-16  6:01 UTC (permalink / raw)
  To: davem, edumazet, kuba, pabeni, robh+dt, krzysztof.kozlowski+dt,
	linux, vladimir.oltean, grygorii.strashko, vigneshr, nsekhar
  Cc: netdev, devicetree, linux-kernel, kishon, s-vadapalli

Add support for QSGMII mode to am65-cpsw driver.

Change log:

v3 -> v4:
1. Update bindings to disallow ports based on compatible, instead of
   adding a new if/then statement for the new compatible.
2. Add Else-If condition for RMII mode in the set of supported interfaces.
   Support for RMII mode is already present in the driver and I had
   missed out adding a condition for RMII mode in the previous patches.

v2 -> v3:
1. In ti,k3-am654-cpsw-nuss.yaml, restrict if/then statement to port
   nodes.

v1 -> v2:
1. Add new compatible for CPSW5G in ti,k3-am654-cpsw-nuss.yaml and extend
   properties for new compatible.
2. Add extra_modes member to struct am65_cpsw_pdata to be used for QSGMII
   mode by new compatible.
3. Add check for phylink supported modes to ensure that only one phy mode
   is advertised as supported.
4. Check if extra_modes supports QSGMII mode in am65_cpsw_nuss_mac_config()
   for register write.
5. Add check for assigning port->sgmii_base only when extra_modes is valid.

v3: https://lore.kernel.org/r/20220606110443.30362-1-s-vadapalli@ti.com/
v2: https://lore.kernel.org/r/20220602114558.6204-1-s-vadapalli@ti.com/
v1: https://lore.kernel.org/r/20220531113058.23708-1-s-vadapalli@ti.com/

Siddharth Vadapalli (3):
  dt-bindings: net: ti: k3-am654-cpsw-nuss: Update bindings for J7200
    CPSW5G
  net: ethernet: ti: am65-cpsw: Add support for J7200 CPSW5G
  net: ethernet: ti: am65-cpsw: Move phy_set_mode_ext() to correct
    location

 .../bindings/net/ti,k3-am654-cpsw-nuss.yaml   | 17 ++++++-
 drivers/net/ethernet/ti/am65-cpsw-nuss.c      | 44 ++++++++++++++++---
 drivers/net/ethernet/ti/am65-cpsw-nuss.h      |  2 +
 3 files changed, 54 insertions(+), 9 deletions(-)

--
2.25.1


^ permalink raw reply

* Re: BUG: corrupted list in insert_work
From: syzbot @ 2022-08-16  5:58 UTC (permalink / raw)
  To: andrii, ast, bpf, brauner, cgroups, daniel, dvyukov, hannes,
	hdanton, john.fastabend, kafai, kpsingh, linux-kernel, linux-mm,
	lizefan.x, lkp, lkp, mkoutny, netdev, oliver.sang, songliubraving,
	stable, syzkaller-android-bugs, syzkaller-bugs, tadeusz.struk, tj,
	yhs
In-Reply-To: <000000000000aaac8505dc135b07@google.com>

syzbot suspects this issue was fixed by commit:

commit d007f49ab789bee8ed76021830b49745d5feaf61
Author: Al Viro <viro@zeniv.linux.org.uk>
Date:   Wed May 18 06:13:40 2022 +0000

    percpu_ref_init(): clean ->percpu_count_ref on failure

bisection log:  https://syzkaller.appspot.com/x/bisect.txt?x=10c66b6b080000
start commit:   ebc9fb07d294 ANDROID: random: fix CRC issues with the merge
git tree:       android12-5.10-lts
kernel config:  https://syzkaller.appspot.com/x/.config?x=32c952ff4a8ff8c1
dashboard link: https://syzkaller.appspot.com/bug?extid=e42ae441c3b10acf9e9d
syz repro:      https://syzkaller.appspot.com/x/repro.syz?x=172a9074080000
C reproducer:   https://syzkaller.appspot.com/x/repro.c?x=10456caa080000

If the result looks correct, please mark the issue as fixed by replying with:

#syz fix: percpu_ref_init(): clean ->percpu_count_ref on failure

For information about bisection process see: https://goo.gl/tpsmEJ#bisection

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox