LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* Re: [PATCH v4 3/8] bpf powerpc: refactor JIT compiler code
From: Christophe Leroy @ 2021-09-29 11:42 UTC (permalink / raw)
  To: Hari Bathini, naveen.n.rao, mpe, ast, daniel
  Cc: songliubraving, netdev, john.fastabend, andrii, kpsingh, paulus,
	yhs, bpf, linuxppc-dev, kafai
In-Reply-To: <20210929111855.50254-4-hbathini@linux.ibm.com>



Le 29/09/2021 à 13:18, Hari Bathini a écrit :
> Refactor powerpc LDX JITing code to simplify adding BPF_PROBE_MEM
> support.
> 
> Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>

Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>

> ---
> 
> Changes in v4:
> * Dropped the default case in the switch statement for bpf size.
> * Dropped explicit fallthrough statement for empty switch cases.
> 
> 
>   arch/powerpc/net/bpf_jit_comp32.c | 33 ++++++++++++++++++-------------
>   arch/powerpc/net/bpf_jit_comp64.c | 31 +++++++++++++++++------------
>   2 files changed, 37 insertions(+), 27 deletions(-)
> 
> diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
> index b60b59426a24..65a4d1ed97bf 100644
> --- a/arch/powerpc/net/bpf_jit_comp32.c
> +++ b/arch/powerpc/net/bpf_jit_comp32.c
> @@ -282,6 +282,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
>   		u32 src_reg = bpf_to_ppc(ctx, insn[i].src_reg);
>   		u32 src_reg_h = src_reg - 1;
>   		u32 tmp_reg = bpf_to_ppc(ctx, TMP_REG);
> +		u32 size = BPF_SIZE(code);
>   		s16 off = insn[i].off;
>   		s32 imm = insn[i].imm;
>   		bool func_addr_fixed;
> @@ -810,23 +811,27 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
>   		 * BPF_LDX
>   		 */
>   		case BPF_LDX | BPF_MEM | BPF_B: /* dst = *(u8 *)(ul) (src + off) */
> -			EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off));
> -			if (!fp->aux->verifier_zext)
> -				EMIT(PPC_RAW_LI(dst_reg_h, 0));
> -			break;
>   		case BPF_LDX | BPF_MEM | BPF_H: /* dst = *(u16 *)(ul) (src + off) */
> -			EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off));
> -			if (!fp->aux->verifier_zext)
> -				EMIT(PPC_RAW_LI(dst_reg_h, 0));
> -			break;
>   		case BPF_LDX | BPF_MEM | BPF_W: /* dst = *(u32 *)(ul) (src + off) */
> -			EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off));
> -			if (!fp->aux->verifier_zext)
> -				EMIT(PPC_RAW_LI(dst_reg_h, 0));
> -			break;
>   		case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + off) */
> -			EMIT(PPC_RAW_LWZ(dst_reg_h, src_reg, off));
> -			EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off + 4));
> +			switch (size) {
> +			case BPF_B:
> +				EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off));
> +				break;
> +			case BPF_H:
> +				EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off));
> +				break;
> +			case BPF_W:
> +				EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off));
> +				break;
> +			case BPF_DW:
> +				EMIT(PPC_RAW_LWZ(dst_reg_h, src_reg, off));
> +				EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off + 4));
> +				break;
> +			}
> +
> +			if (size != BPF_DW && !fp->aux->verifier_zext)
> +				EMIT(PPC_RAW_LI(dst_reg_h, 0));
>   			break;
>   
>   		/*
> diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
> index 2a87da50d9a4..49e6e0b6e4d2 100644
> --- a/arch/powerpc/net/bpf_jit_comp64.c
> +++ b/arch/powerpc/net/bpf_jit_comp64.c
> @@ -285,6 +285,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
>   		u32 code = insn[i].code;
>   		u32 dst_reg = b2p[insn[i].dst_reg];
>   		u32 src_reg = b2p[insn[i].src_reg];
> +		u32 size = BPF_SIZE(code);
>   		s16 off = insn[i].off;
>   		s32 imm = insn[i].imm;
>   		bool func_addr_fixed;
> @@ -716,25 +717,29 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
>   		 */
>   		/* dst = *(u8 *)(ul) (src + off) */
>   		case BPF_LDX | BPF_MEM | BPF_B:
> -			EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off));
> -			if (insn_is_zext(&insn[i + 1]))
> -				addrs[++i] = ctx->idx * 4;
> -			break;
>   		/* dst = *(u16 *)(ul) (src + off) */
>   		case BPF_LDX | BPF_MEM | BPF_H:
> -			EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off));
> -			if (insn_is_zext(&insn[i + 1]))
> -				addrs[++i] = ctx->idx * 4;
> -			break;
>   		/* dst = *(u32 *)(ul) (src + off) */
>   		case BPF_LDX | BPF_MEM | BPF_W:
> -			EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off));
> -			if (insn_is_zext(&insn[i + 1]))
> -				addrs[++i] = ctx->idx * 4;
> -			break;
>   		/* dst = *(u64 *)(ul) (src + off) */
>   		case BPF_LDX | BPF_MEM | BPF_DW:
> -			PPC_BPF_LL(dst_reg, src_reg, off);
> +			switch (size) {
> +			case BPF_B:
> +				EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off));
> +				break;
> +			case BPF_H:
> +				EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off));
> +				break;
> +			case BPF_W:
> +				EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off));
> +				break;
> +			case BPF_DW:
> +				PPC_BPF_LL(dst_reg, src_reg, off);
> +				break;
> +			}
> +
> +			if (size != BPF_DW && insn_is_zext(&insn[i + 1]))
> +				addrs[++i] = ctx->idx * 4;
>   			break;
>   
>   		/*
> 

^ permalink raw reply

* [PATCH v4 8/8] bpf ppc32: Access only if addr is kernel address
From: Hari Bathini @ 2021-09-29 11:18 UTC (permalink / raw)
  To: naveen.n.rao, christophe.leroy, mpe, ast, daniel
  Cc: songliubraving, netdev, john.fastabend, andrii, kpsingh, paulus,
	yhs, bpf, linuxppc-dev, kafai, Hari Bathini
In-Reply-To: <20210929111855.50254-1-hbathini@linux.ibm.com>

With KUAP enabled, any kernel code which wants to access userspace
needs to be surrounded by disable-enable KUAP. But that is not
happening for BPF_PROBE_MEM load instruction. Though PPC32 does not
support read protection, considering the fact that PTR_TO_BTF_ID
(which uses BPF_PROBE_MEM mode) could either be a valid kernel pointer
or NULL but should never be a pointer to userspace address, execute
BPF_PROBE_MEM load only if addr is kernel address, otherwise set
dst_reg=0 and move on.

This will catch NULL, valid or invalid userspace pointers. Only bad
kernel pointer will be handled by BPF exception table.

[Alexei suggested for x86]
Suggested-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
---

Changes in v4:
* Adjusted the emit code to avoid using temporary reg.


 arch/powerpc/net/bpf_jit_comp32.c | 34 +++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
index 6ee13a09c70d..2ac81563c78d 100644
--- a/arch/powerpc/net/bpf_jit_comp32.c
+++ b/arch/powerpc/net/bpf_jit_comp32.c
@@ -818,6 +818,40 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 		case BPF_LDX | BPF_PROBE_MEM | BPF_W:
 		case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + off) */
 		case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
+			/*
+			 * As PTR_TO_BTF_ID that uses BPF_PROBE_MEM mode could either be a valid
+			 * kernel pointer or NULL but not a userspace address, execute BPF_PROBE_MEM
+			 * load only if addr is kernel address (see is_kernel_addr()), otherwise
+			 * set dst_reg=0 and move on.
+			 */
+			if (BPF_MODE(code) == BPF_PROBE_MEM) {
+				PPC_LI32(_R0, TASK_SIZE - off);
+				EMIT(PPC_RAW_CMPLW(src_reg, _R0));
+				PPC_BCC(COND_GT, (ctx->idx + 5) * 4);
+				EMIT(PPC_RAW_LI(dst_reg, 0));
+				/*
+				 * For BPF_DW case, "li reg_h,0" would be needed when
+				 * !fp->aux->verifier_zext. Emit NOP otherwise.
+				 *
+				 * Note that "li reg_h,0" is emitted for BPF_B/H/W case,
+				 * if necessary. So, jump there insted of emitting an
+				 * additional "li reg_h,0" instruction.
+				 */
+				if (size == BPF_DW && !fp->aux->verifier_zext)
+					EMIT(PPC_RAW_LI(dst_reg_h, 0));
+				else
+					EMIT(PPC_RAW_NOP());
+				/*
+				 * Need to jump two instructions instead of one for BPF_DW case
+				 * as there are two load instructions for dst_reg_h & dst_reg
+				 * respectively.
+				 */
+				if (size == BPF_DW)
+					PPC_JMP((ctx->idx + 3) * 4);
+				else
+					PPC_JMP((ctx->idx + 2) * 4);
+			}
+
 			switch (size) {
 			case BPF_B:
 				EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off));
-- 
2.31.1


^ permalink raw reply related

* [PATCH v4 3/8] bpf powerpc: refactor JIT compiler code
From: Hari Bathini @ 2021-09-29 11:18 UTC (permalink / raw)
  To: naveen.n.rao, christophe.leroy, mpe, ast, daniel
  Cc: songliubraving, netdev, john.fastabend, andrii, kpsingh, paulus,
	yhs, bpf, linuxppc-dev, kafai, Hari Bathini
In-Reply-To: <20210929111855.50254-1-hbathini@linux.ibm.com>

Refactor powerpc LDX JITing code to simplify adding BPF_PROBE_MEM
support.

Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
---

Changes in v4:
* Dropped the default case in the switch statement for bpf size.
* Dropped explicit fallthrough statement for empty switch cases.


 arch/powerpc/net/bpf_jit_comp32.c | 33 ++++++++++++++++++-------------
 arch/powerpc/net/bpf_jit_comp64.c | 31 +++++++++++++++++------------
 2 files changed, 37 insertions(+), 27 deletions(-)

diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
index b60b59426a24..65a4d1ed97bf 100644
--- a/arch/powerpc/net/bpf_jit_comp32.c
+++ b/arch/powerpc/net/bpf_jit_comp32.c
@@ -282,6 +282,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 		u32 src_reg = bpf_to_ppc(ctx, insn[i].src_reg);
 		u32 src_reg_h = src_reg - 1;
 		u32 tmp_reg = bpf_to_ppc(ctx, TMP_REG);
+		u32 size = BPF_SIZE(code);
 		s16 off = insn[i].off;
 		s32 imm = insn[i].imm;
 		bool func_addr_fixed;
@@ -810,23 +811,27 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 		 * BPF_LDX
 		 */
 		case BPF_LDX | BPF_MEM | BPF_B: /* dst = *(u8 *)(ul) (src + off) */
-			EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off));
-			if (!fp->aux->verifier_zext)
-				EMIT(PPC_RAW_LI(dst_reg_h, 0));
-			break;
 		case BPF_LDX | BPF_MEM | BPF_H: /* dst = *(u16 *)(ul) (src + off) */
-			EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off));
-			if (!fp->aux->verifier_zext)
-				EMIT(PPC_RAW_LI(dst_reg_h, 0));
-			break;
 		case BPF_LDX | BPF_MEM | BPF_W: /* dst = *(u32 *)(ul) (src + off) */
-			EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off));
-			if (!fp->aux->verifier_zext)
-				EMIT(PPC_RAW_LI(dst_reg_h, 0));
-			break;
 		case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + off) */
-			EMIT(PPC_RAW_LWZ(dst_reg_h, src_reg, off));
-			EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off + 4));
+			switch (size) {
+			case BPF_B:
+				EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off));
+				break;
+			case BPF_H:
+				EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off));
+				break;
+			case BPF_W:
+				EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off));
+				break;
+			case BPF_DW:
+				EMIT(PPC_RAW_LWZ(dst_reg_h, src_reg, off));
+				EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off + 4));
+				break;
+			}
+
+			if (size != BPF_DW && !fp->aux->verifier_zext)
+				EMIT(PPC_RAW_LI(dst_reg_h, 0));
 			break;
 
 		/*
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 2a87da50d9a4..49e6e0b6e4d2 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -285,6 +285,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 		u32 code = insn[i].code;
 		u32 dst_reg = b2p[insn[i].dst_reg];
 		u32 src_reg = b2p[insn[i].src_reg];
+		u32 size = BPF_SIZE(code);
 		s16 off = insn[i].off;
 		s32 imm = insn[i].imm;
 		bool func_addr_fixed;
@@ -716,25 +717,29 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 		 */
 		/* dst = *(u8 *)(ul) (src + off) */
 		case BPF_LDX | BPF_MEM | BPF_B:
-			EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off));
-			if (insn_is_zext(&insn[i + 1]))
-				addrs[++i] = ctx->idx * 4;
-			break;
 		/* dst = *(u16 *)(ul) (src + off) */
 		case BPF_LDX | BPF_MEM | BPF_H:
-			EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off));
-			if (insn_is_zext(&insn[i + 1]))
-				addrs[++i] = ctx->idx * 4;
-			break;
 		/* dst = *(u32 *)(ul) (src + off) */
 		case BPF_LDX | BPF_MEM | BPF_W:
-			EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off));
-			if (insn_is_zext(&insn[i + 1]))
-				addrs[++i] = ctx->idx * 4;
-			break;
 		/* dst = *(u64 *)(ul) (src + off) */
 		case BPF_LDX | BPF_MEM | BPF_DW:
-			PPC_BPF_LL(dst_reg, src_reg, off);
+			switch (size) {
+			case BPF_B:
+				EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off));
+				break;
+			case BPF_H:
+				EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off));
+				break;
+			case BPF_W:
+				EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off));
+				break;
+			case BPF_DW:
+				PPC_BPF_LL(dst_reg, src_reg, off);
+				break;
+			}
+
+			if (size != BPF_DW && insn_is_zext(&insn[i + 1]))
+				addrs[++i] = ctx->idx * 4;
 			break;
 
 		/*
-- 
2.31.1


^ permalink raw reply related

* [PATCH v4 7/8] bpf ppc32: Add BPF_PROBE_MEM support for JIT
From: Hari Bathini @ 2021-09-29 11:18 UTC (permalink / raw)
  To: naveen.n.rao, christophe.leroy, mpe, ast, daniel
  Cc: songliubraving, netdev, john.fastabend, andrii, kpsingh, paulus,
	yhs, bpf, linuxppc-dev, kafai, Hari Bathini
In-Reply-To: <20210929111855.50254-1-hbathini@linux.ibm.com>

BPF load instruction with BPF_PROBE_MEM mode can cause a fault
inside kernel. Append exception table for such instructions
within BPF program.

Unlike other archs which uses extable 'fixup' field to pass dest_reg
and nip, BPF exception table on PowerPC follows the generic PowerPC
exception table design, where it populates both fixup and extable
sections within BPF program. fixup section contains 3 instructions,
first 2 instructions clear dest_reg (lower & higher 32-bit registers)
and last instruction jumps to next instruction in the BPF code.
extable 'insn' field contains relative offset of the instruction and
'fixup' field contains relative offset of the fixup entry. Example
layout of BPF program with extable present:

             +------------------+
             |                  |
             |                  |
   0x4020 -->| lwz   r28,4(r4)  |
             |                  |
             |                  |
   0x40ac -->| lwz  r3,0(r24)   |
             | lwz  r4,4(r24)   |
             |                  |
             |                  |
             |------------------|
   0x4278 -->| li  r28,0        |  \
             | li  r27,0        |  | fixup entry
             | b   0x4024       |  /
   0x4284 -->| li  r4,0         |
             | li  r3,0         |
             | b   0x40b4       |
             |------------------|
   0x4290 -->| insn=0xfffffd90  |  \ extable entry
             | fixup=0xffffffe4 |  /
   0x4298 -->| insn=0xfffffe14  |
             | fixup=0xffffffe8 |
             +------------------+

   (Addresses shown here are chosen random, not real)

Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
---

Changes in v4:
* Dropped explicit fallthrough statement for empty switch cases.


 arch/powerpc/net/bpf_jit.h        |  4 ++++
 arch/powerpc/net/bpf_jit_comp.c   |  2 ++
 arch/powerpc/net/bpf_jit_comp32.c | 30 ++++++++++++++++++++++++++++++
 3 files changed, 36 insertions(+)

diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index 561689a2abdf..800734056200 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -144,7 +144,11 @@ struct codegen_context {
 	unsigned int exentry_idx;
 };
 
+#ifdef CONFIG_PPC32
+#define BPF_FIXUP_LEN	3 /* Three instructions => 12 bytes */
+#else
 #define BPF_FIXUP_LEN	2 /* Two instructions => 8 bytes */
+#endif
 
 static inline void bpf_flush_icache(void *start, void *end)
 {
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index f02457c6b54f..1a0041997050 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -297,6 +297,8 @@ int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct code
 		(ctx->exentry_idx * BPF_FIXUP_LEN * 4);
 
 	fixup[0] = PPC_RAW_LI(dst_reg, 0);
+	if (IS_ENABLED(CONFIG_PPC32))
+		fixup[1] = PPC_RAW_LI(dst_reg - 1, 0); /* clear higher 32-bit register too */
 
 	fixup[BPF_FIXUP_LEN - 1] =
 		PPC_RAW_BRANCH((long)(pc + jmp_off) - (long)&fixup[BPF_FIXUP_LEN - 1]);
diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
index c04291517a7e..6ee13a09c70d 100644
--- a/arch/powerpc/net/bpf_jit_comp32.c
+++ b/arch/powerpc/net/bpf_jit_comp32.c
@@ -811,9 +811,13 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 		 * BPF_LDX
 		 */
 		case BPF_LDX | BPF_MEM | BPF_B: /* dst = *(u8 *)(ul) (src + off) */
+		case BPF_LDX | BPF_PROBE_MEM | BPF_B:
 		case BPF_LDX | BPF_MEM | BPF_H: /* dst = *(u16 *)(ul) (src + off) */
+		case BPF_LDX | BPF_PROBE_MEM | BPF_H:
 		case BPF_LDX | BPF_MEM | BPF_W: /* dst = *(u32 *)(ul) (src + off) */
+		case BPF_LDX | BPF_PROBE_MEM | BPF_W:
 		case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + off) */
+		case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
 			switch (size) {
 			case BPF_B:
 				EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off));
@@ -832,6 +836,32 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 
 			if (size != BPF_DW && !fp->aux->verifier_zext)
 				EMIT(PPC_RAW_LI(dst_reg_h, 0));
+
+			if (BPF_MODE(code) == BPF_PROBE_MEM) {
+				int insn_idx = ctx->idx - 1;
+				int jmp_off = 4;
+
+				/*
+				 * In case of BPF_DW, two lwz instructions are emitted, one
+				 * for higher 32-bit and another for lower 32-bit. So, set
+				 * ex->insn to the first of the two and jump over both
+				 * instructions in fixup.
+				 *
+				 * Similarly, with !verifier_zext, two instructions are
+				 * emitted for BPF_B/H/W case. So, set ex->insn to the
+				 * instruction that could fault and skip over both
+				 * instructions.
+				 */
+				if (size == BPF_DW || !fp->aux->verifier_zext) {
+					insn_idx -= 1;
+					jmp_off += 4;
+				}
+
+				ret = bpf_add_extable_entry(fp, image, pass, ctx, insn_idx,
+							    jmp_off, dst_reg);
+				if (ret)
+					return ret;
+			}
 			break;
 
 		/*
-- 
2.31.1


^ permalink raw reply related

* [PATCH v4 6/8] bpf ppc64: Access only if addr is kernel address
From: Hari Bathini @ 2021-09-29 11:18 UTC (permalink / raw)
  To: naveen.n.rao, christophe.leroy, mpe, ast, daniel
  Cc: Ravi Bangoria, songliubraving, netdev, john.fastabend, andrii,
	kpsingh, paulus, yhs, bpf, linuxppc-dev, kafai, Hari Bathini
In-Reply-To: <20210929111855.50254-1-hbathini@linux.ibm.com>

From: Ravi Bangoria <ravi.bangoria@linux.ibm.com>

On PPC64 with KUAP enabled, any kernel code which wants to
access userspace needs to be surrounded by disable-enable KUAP.
But that is not happening for BPF_PROBE_MEM load instruction.
So, when BPF program tries to access invalid userspace address,
page-fault handler considers it as bad KUAP fault:

  Kernel attempted to read user page (d0000000) - exploit attempt? (uid: 0)

Considering the fact that PTR_TO_BTF_ID (which uses BPF_PROBE_MEM
mode) could either be a valid kernel pointer or NULL but should
never be a pointer to userspace address, execute BPF_PROBE_MEM load
only if addr is kernel address, otherwise set dst_reg=0 and move on.

This will catch NULL, valid or invalid userspace pointers. Only bad
kernel pointer will be handled by BPF exception table.

[Alexei suggested for x86]
Suggested-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
---

Changes in v4:
* Used IS_ENABLED() instead of #ifdef.
* Dropped the else case that is not applicable for PPC64.


 arch/powerpc/net/bpf_jit_comp64.c | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 4170999371ee..e1ea64081ae1 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -727,6 +727,32 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 		/* dst = *(u64 *)(ul) (src + off) */
 		case BPF_LDX | BPF_MEM | BPF_DW:
 		case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
+			/*
+			 * As PTR_TO_BTF_ID that uses BPF_PROBE_MEM mode could either be a valid
+			 * kernel pointer or NULL but not a userspace address, execute BPF_PROBE_MEM
+			 * load only if addr is kernel address (see is_kernel_addr()), otherwise
+			 * set dst_reg=0 and move on.
+			 */
+			if (BPF_MODE(code) == BPF_PROBE_MEM) {
+				EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], src_reg, off));
+				if (IS_ENABLED(CONFIG_PPC_BOOK3E_64))
+					PPC_LI64(b2p[TMP_REG_2], 0x8000000000000000ul);
+				else /* BOOK3S_64 */
+					PPC_LI64(b2p[TMP_REG_2], PAGE_OFFSET);
+				EMIT(PPC_RAW_CMPLD(b2p[TMP_REG_1], b2p[TMP_REG_2]));
+				PPC_BCC(COND_GT, (ctx->idx + 4) * 4);
+				EMIT(PPC_RAW_LI(dst_reg, 0));
+				/*
+				 * Check if 'off' is word aligned because PPC_BPF_LL()
+				 * (BPF_DW case) generates two instructions if 'off' is not
+				 * word-aligned and one instruction otherwise.
+				 */
+				if (BPF_SIZE(code) == BPF_DW && (off & 3))
+					PPC_JMP((ctx->idx + 3) * 4);
+				else
+					PPC_JMP((ctx->idx + 2) * 4);
+			}
+
 			switch (size) {
 			case BPF_B:
 				EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off));
-- 
2.31.1


^ permalink raw reply related

* [PATCH v4 5/8] bpf ppc64: Add BPF_PROBE_MEM support for JIT
From: Hari Bathini @ 2021-09-29 11:18 UTC (permalink / raw)
  To: naveen.n.rao, christophe.leroy, mpe, ast, daniel
  Cc: Ravi Bangoria, songliubraving, netdev, john.fastabend, andrii,
	kpsingh, paulus, yhs, bpf, linuxppc-dev, kafai, Hari Bathini
In-Reply-To: <20210929111855.50254-1-hbathini@linux.ibm.com>

From: Ravi Bangoria <ravi.bangoria@linux.ibm.com>

BPF load instruction with BPF_PROBE_MEM mode can cause a fault
inside kernel. Append exception table for such instructions
within BPF program.

Unlike other archs which uses extable 'fixup' field to pass dest_reg
and nip, BPF exception table on PowerPC follows the generic PowerPC
exception table design, where it populates both fixup and extable
sections within BPF program. fixup section contains two instructions,
first instruction clears dest_reg and 2nd jumps to next instruction
in the BPF code. extable 'insn' field contains relative offset of
the instruction and 'fixup' field contains relative offset of the
fixup entry. Example layout of BPF program with extable present:

             +------------------+
             |                  |
             |                  |
   0x4020 -->| ld   r27,4(r3)   |
             |                  |
             |                  |
   0x40ac -->| lwz  r3,0(r4)    |
             |                  |
             |                  |
             |------------------|
   0x4280 -->| li  r27,0        |  \ fixup entry
             | b   0x4024       |  /
   0x4288 -->| li  r3,0         |
             | b   0x40b0       |
             |------------------|
   0x4290 -->| insn=0xfffffd90  |  \ extable entry
             | fixup=0xffffffec |  /
   0x4298 -->| insn=0xfffffe14  |
             | fixup=0xffffffec |
             +------------------+

   (Addresses shown here are chosen random, not real)

Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
---

Changes in v4:
* Dropped explicit fallthrough statement for empty switch cases.


 arch/powerpc/net/bpf_jit.h        |  8 +++-
 arch/powerpc/net/bpf_jit_comp.c   | 70 ++++++++++++++++++++++++++++---
 arch/powerpc/net/bpf_jit_comp32.c |  2 +-
 arch/powerpc/net/bpf_jit_comp64.c | 13 +++++-
 4 files changed, 84 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index 0c8f885b8f48..561689a2abdf 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -141,8 +141,11 @@ struct codegen_context {
 	unsigned int idx;
 	unsigned int stack_size;
 	int b2p[ARRAY_SIZE(b2p)];
+	unsigned int exentry_idx;
 };
 
+#define BPF_FIXUP_LEN	2 /* Two instructions => 8 bytes */
+
 static inline void bpf_flush_icache(void *start, void *end)
 {
 	smp_wmb();	/* smp write barrier */
@@ -166,11 +169,14 @@ static inline void bpf_clear_seen_register(struct codegen_context *ctx, int i)
 
 void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func);
 int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
-		       u32 *addrs);
+		       u32 *addrs, int pass);
 void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx);
 void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx);
 void bpf_jit_realloc_regs(struct codegen_context *ctx);
 
+int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct codegen_context *ctx,
+			  int insn_idx, int jmp_off, int dst_reg);
+
 #endif
 
 #endif
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index c5c9e8ad1de7..f02457c6b54f 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -101,6 +101,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 	struct bpf_prog *tmp_fp;
 	bool bpf_blinded = false;
 	bool extra_pass = false;
+	u32 extable_len;
+	u32 fixup_len;
 
 	if (!fp->jit_requested)
 		return org_fp;
@@ -131,7 +133,6 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 		image = jit_data->image;
 		bpf_hdr = jit_data->header;
 		proglen = jit_data->proglen;
-		alloclen = proglen + FUNCTION_DESCR_SIZE;
 		extra_pass = true;
 		goto skip_init_ctx;
 	}
@@ -149,7 +150,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 	cgctx.stack_size = round_up(fp->aux->stack_depth, 16);
 
 	/* Scouting faux-generate pass 0 */
-	if (bpf_jit_build_body(fp, 0, &cgctx, addrs)) {
+	if (bpf_jit_build_body(fp, 0, &cgctx, addrs, 0)) {
 		/* We hit something illegal or unsupported. */
 		fp = org_fp;
 		goto out_addrs;
@@ -162,7 +163,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 	 */
 	if (cgctx.seen & SEEN_TAILCALL) {
 		cgctx.idx = 0;
-		if (bpf_jit_build_body(fp, 0, &cgctx, addrs)) {
+		if (bpf_jit_build_body(fp, 0, &cgctx, addrs, 0)) {
 			fp = org_fp;
 			goto out_addrs;
 		}
@@ -177,8 +178,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 	bpf_jit_build_prologue(0, &cgctx);
 	bpf_jit_build_epilogue(0, &cgctx);
 
+	fixup_len = fp->aux->num_exentries * BPF_FIXUP_LEN * 4;
+	extable_len = fp->aux->num_exentries * sizeof(struct exception_table_entry);
+
 	proglen = cgctx.idx * 4;
-	alloclen = proglen + FUNCTION_DESCR_SIZE;
+	alloclen = proglen + FUNCTION_DESCR_SIZE + fixup_len + extable_len;
 
 	bpf_hdr = bpf_jit_binary_alloc(alloclen, &image, 4, bpf_jit_fill_ill_insns);
 	if (!bpf_hdr) {
@@ -186,6 +190,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 		goto out_addrs;
 	}
 
+	if (extable_len)
+		fp->aux->extable = (void *)image + FUNCTION_DESCR_SIZE + proglen + fixup_len;
+
 skip_init_ctx:
 	code_base = (u32 *)(image + FUNCTION_DESCR_SIZE);
 
@@ -210,7 +217,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 		/* Now build the prologue, body code & epilogue for real. */
 		cgctx.idx = 0;
 		bpf_jit_build_prologue(code_base, &cgctx);
-		bpf_jit_build_body(fp, code_base, &cgctx, addrs);
+		if (bpf_jit_build_body(fp, code_base, &cgctx, addrs, pass)) {
+			bpf_jit_binary_free(bpf_hdr);
+			fp = org_fp;
+			goto out_addrs;
+		}
 		bpf_jit_build_epilogue(code_base, &cgctx);
 
 		if (bpf_jit_enable > 1)
@@ -234,7 +245,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 
 	fp->bpf_func = (void *)image;
 	fp->jited = 1;
-	fp->jited_len = alloclen;
+	fp->jited_len = proglen + FUNCTION_DESCR_SIZE;
 
 	bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE));
 	bpf_jit_binary_lock_ro(bpf_hdr);
@@ -258,3 +269,50 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 
 	return fp;
 }
+
+/*
+ * The caller should check for (BPF_MODE(code) == BPF_PROBE_MEM) before calling
+ * this function, as this only applies to BPF_PROBE_MEM, for now.
+ */
+int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct codegen_context *ctx,
+			  int insn_idx, int jmp_off, int dst_reg)
+{
+	off_t offset;
+	unsigned long pc;
+	struct exception_table_entry *ex;
+	u32 *fixup;
+
+	/* Populate extable entries only in the last pass */
+	if (pass != 2)
+		return 0;
+
+	if (!fp->aux->extable ||
+	    WARN_ON_ONCE(ctx->exentry_idx >= fp->aux->num_exentries))
+		return -EINVAL;
+
+	pc = (unsigned long)&image[insn_idx];
+
+	fixup = (void *)fp->aux->extable -
+		(fp->aux->num_exentries * BPF_FIXUP_LEN * 4) +
+		(ctx->exentry_idx * BPF_FIXUP_LEN * 4);
+
+	fixup[0] = PPC_RAW_LI(dst_reg, 0);
+
+	fixup[BPF_FIXUP_LEN - 1] =
+		PPC_RAW_BRANCH((long)(pc + jmp_off) - (long)&fixup[BPF_FIXUP_LEN - 1]);
+
+	ex = &fp->aux->extable[ctx->exentry_idx];
+
+	offset = pc - (long)&ex->insn;
+	if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN))
+		return -ERANGE;
+	ex->insn = offset;
+
+	offset = (long)fixup - (long)&ex->fixup;
+	if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN))
+		return -ERANGE;
+	ex->fixup = offset;
+
+	ctx->exentry_idx++;
+	return 0;
+}
diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
index 65a4d1ed97bf..c04291517a7e 100644
--- a/arch/powerpc/net/bpf_jit_comp32.c
+++ b/arch/powerpc/net/bpf_jit_comp32.c
@@ -266,7 +266,7 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32
 
 /* Assemble the body code between the prologue & epilogue */
 int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
-		       u32 *addrs)
+		       u32 *addrs, int pass)
 {
 	const struct bpf_insn *insn = fp->insnsi;
 	int flen = fp->len;
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 49e6e0b6e4d2..4170999371ee 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -272,7 +272,7 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32
 
 /* Assemble the body code between the prologue & epilogue */
 int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
-		       u32 *addrs)
+		       u32 *addrs, int pass)
 {
 	const struct bpf_insn *insn = fp->insnsi;
 	int flen = fp->len;
@@ -717,12 +717,16 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 		 */
 		/* dst = *(u8 *)(ul) (src + off) */
 		case BPF_LDX | BPF_MEM | BPF_B:
+		case BPF_LDX | BPF_PROBE_MEM | BPF_B:
 		/* dst = *(u16 *)(ul) (src + off) */
 		case BPF_LDX | BPF_MEM | BPF_H:
+		case BPF_LDX | BPF_PROBE_MEM | BPF_H:
 		/* dst = *(u32 *)(ul) (src + off) */
 		case BPF_LDX | BPF_MEM | BPF_W:
+		case BPF_LDX | BPF_PROBE_MEM | BPF_W:
 		/* dst = *(u64 *)(ul) (src + off) */
 		case BPF_LDX | BPF_MEM | BPF_DW:
+		case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
 			switch (size) {
 			case BPF_B:
 				EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off));
@@ -740,6 +744,13 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 
 			if (size != BPF_DW && insn_is_zext(&insn[i + 1]))
 				addrs[++i] = ctx->idx * 4;
+
+			if (BPF_MODE(code) == BPF_PROBE_MEM) {
+				ret = bpf_add_extable_entry(fp, image, pass, ctx, ctx->idx - 1,
+							    4, dst_reg);
+				if (ret)
+					return ret;
+			}
 			break;
 
 		/*
-- 
2.31.1


^ permalink raw reply related

* [PATCH v4 4/8] powerpc/ppc-opcode: introduce PPC_RAW_BRANCH() macro
From: Hari Bathini @ 2021-09-29 11:18 UTC (permalink / raw)
  To: naveen.n.rao, christophe.leroy, mpe, ast, daniel
  Cc: songliubraving, netdev, john.fastabend, andrii, kpsingh, paulus,
	yhs, bpf, linuxppc-dev, kafai, Hari Bathini
In-Reply-To: <20210929111855.50254-1-hbathini@linux.ibm.com>

Define and use PPC_RAW_BRANCH() macro instead of open coding it. This
macro is used while adding BPF_PROBE_MEM support.

Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---

* No changes in v4.


 arch/powerpc/include/asm/ppc-opcode.h | 2 ++
 arch/powerpc/net/bpf_jit.h            | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index baea657bc868..f50213e2a3e0 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -566,6 +566,8 @@
 #define PPC_RAW_MTSPR(spr, d)		(0x7c0003a6 | ___PPC_RS(d) | __PPC_SPR(spr))
 #define PPC_RAW_EIEIO()			(0x7c0006ac)
 
+#define PPC_RAW_BRANCH(addr)		(PPC_INST_BRANCH | ((addr) & 0x03fffffc))
+
 /* Deal with instructions that older assemblers aren't aware of */
 #define	PPC_BCCTR_FLUSH		stringify_in_c(.long PPC_INST_BCCTR_FLUSH)
 #define	PPC_CP_ABORT		stringify_in_c(.long PPC_RAW_CP_ABORT)
diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index 411c63d945c7..0c8f885b8f48 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -24,8 +24,8 @@
 #define EMIT(instr)		PLANT_INSTR(image, ctx->idx, instr)
 
 /* Long jump; (unconditional 'branch') */
-#define PPC_JMP(dest)		EMIT(PPC_INST_BRANCH |			      \
-				     (((dest) - (ctx->idx * 4)) & 0x03fffffc))
+#define PPC_JMP(dest)		EMIT(PPC_RAW_BRANCH((dest) - (ctx->idx * 4)))
+
 /* blr; (unconditional 'branch' with link) to absolute address */
 #define PPC_BL_ABS(dest)	EMIT(PPC_INST_BL |			      \
 				     (((dest) - (unsigned long)(image + ctx->idx)) & 0x03fffffc))
-- 
2.31.1


^ permalink raw reply related

* [PATCH v4 1/8] bpf powerpc: Remove unused SEEN_STACK
From: Hari Bathini @ 2021-09-29 11:18 UTC (permalink / raw)
  To: naveen.n.rao, christophe.leroy, mpe, ast, daniel
  Cc: Ravi Bangoria, songliubraving, netdev, john.fastabend, andrii,
	kpsingh, paulus, yhs, bpf, linuxppc-dev, kafai
In-Reply-To: <20210929111855.50254-1-hbathini@linux.ibm.com>

From: Ravi Bangoria <ravi.bangoria@linux.ibm.com>

SEEN_STACK is unused on PowerPC. Remove it. Also, have
SEEN_TAILCALL use 0x40000000.

Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---

* No changes in v4.


 arch/powerpc/net/bpf_jit.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index 99fad093f43e..d6267e93027a 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -116,8 +116,7 @@ static inline bool is_nearbranch(int offset)
 #define COND_LE		(CR0_GT | COND_CMP_FALSE)
 
 #define SEEN_FUNC	0x20000000 /* might call external helpers */
-#define SEEN_STACK	0x40000000 /* uses BPF stack */
-#define SEEN_TAILCALL	0x80000000 /* uses tail calls */
+#define SEEN_TAILCALL	0x40000000 /* uses tail calls */
 
 #define SEEN_VREG_MASK	0x1ff80000 /* Volatile registers r3-r12 */
 #define SEEN_NVREG_MASK	0x0003ffff /* Non volatile registers r14-r31 */
-- 
2.31.1


^ permalink raw reply related

* [PATCH v4 0/8] bpf powerpc: Add BPF_PROBE_MEM support in powerpc JIT compiler
From: Hari Bathini @ 2021-09-29 11:18 UTC (permalink / raw)
  To: naveen.n.rao, christophe.leroy, mpe, ast, daniel
  Cc: songliubraving, netdev, john.fastabend, andrii, kpsingh, paulus,
	yhs, bpf, linuxppc-dev, kafai, Hari Bathini

Patch #1 & #2 are simple cleanup patches. Patch #3 refactors JIT
compiler code with the aim to simplify adding BPF_PROBE_MEM support.
Patch #4 introduces PPC_RAW_BRANCH() macro instead of open coding
branch instruction. Patch #5 & #7 add BPF_PROBE_MEM support for PPC64
& PPC32 JIT compilers respectively. Patch #6 & #8 handle bad userspace
pointers for PPC64 & PPC32 cases respectively.

Changes in v4:
* Addressed all the review comments from Christophe.


Hari Bathini (4):
  bpf powerpc: refactor JIT compiler code
  powerpc/ppc-opcode: introduce PPC_RAW_BRANCH() macro
  bpf ppc32: Add BPF_PROBE_MEM support for JIT
  bpf ppc32: Access only if addr is kernel address

Ravi Bangoria (4):
  bpf powerpc: Remove unused SEEN_STACK
  bpf powerpc: Remove extra_pass from bpf_jit_build_body()
  bpf ppc64: Add BPF_PROBE_MEM support for JIT
  bpf ppc64: Access only if addr is kernel address

 arch/powerpc/include/asm/ppc-opcode.h |   2 +
 arch/powerpc/net/bpf_jit.h            |  19 +++--
 arch/powerpc/net/bpf_jit_comp.c       |  72 ++++++++++++++++--
 arch/powerpc/net/bpf_jit_comp32.c     | 101 ++++++++++++++++++++++----
 arch/powerpc/net/bpf_jit_comp64.c     |  72 ++++++++++++++----
 5 files changed, 224 insertions(+), 42 deletions(-)

-- 
2.31.1


^ permalink raw reply

* [PATCH v4 2/8] bpf powerpc: Remove extra_pass from bpf_jit_build_body()
From: Hari Bathini @ 2021-09-29 11:18 UTC (permalink / raw)
  To: naveen.n.rao, christophe.leroy, mpe, ast, daniel
  Cc: Ravi Bangoria, songliubraving, netdev, john.fastabend, andrii,
	kpsingh, paulus, yhs, bpf, linuxppc-dev, kafai
In-Reply-To: <20210929111855.50254-1-hbathini@linux.ibm.com>

From: Ravi Bangoria <ravi.bangoria@linux.ibm.com>

In case of extra_pass, usual JIT passes are always skipped. So,
extra_pass is always false while calling bpf_jit_build_body() and
can be removed.

Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
---

* No changes in v4.


 arch/powerpc/net/bpf_jit.h        | 2 +-
 arch/powerpc/net/bpf_jit_comp.c   | 6 +++---
 arch/powerpc/net/bpf_jit_comp32.c | 4 ++--
 arch/powerpc/net/bpf_jit_comp64.c | 4 ++--
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index d6267e93027a..411c63d945c7 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -166,7 +166,7 @@ static inline void bpf_clear_seen_register(struct codegen_context *ctx, int i)
 
 void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func);
 int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
-		       u32 *addrs, bool extra_pass);
+		       u32 *addrs);
 void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx);
 void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx);
 void bpf_jit_realloc_regs(struct codegen_context *ctx);
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 53aefee3fe70..c5c9e8ad1de7 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -149,7 +149,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 	cgctx.stack_size = round_up(fp->aux->stack_depth, 16);
 
 	/* Scouting faux-generate pass 0 */
-	if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) {
+	if (bpf_jit_build_body(fp, 0, &cgctx, addrs)) {
 		/* We hit something illegal or unsupported. */
 		fp = org_fp;
 		goto out_addrs;
@@ -162,7 +162,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 	 */
 	if (cgctx.seen & SEEN_TAILCALL) {
 		cgctx.idx = 0;
-		if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) {
+		if (bpf_jit_build_body(fp, 0, &cgctx, addrs)) {
 			fp = org_fp;
 			goto out_addrs;
 		}
@@ -210,7 +210,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 		/* Now build the prologue, body code & epilogue for real. */
 		cgctx.idx = 0;
 		bpf_jit_build_prologue(code_base, &cgctx);
-		bpf_jit_build_body(fp, code_base, &cgctx, addrs, extra_pass);
+		bpf_jit_build_body(fp, code_base, &cgctx, addrs);
 		bpf_jit_build_epilogue(code_base, &cgctx);
 
 		if (bpf_jit_enable > 1)
diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
index beb12cbc8c29..b60b59426a24 100644
--- a/arch/powerpc/net/bpf_jit_comp32.c
+++ b/arch/powerpc/net/bpf_jit_comp32.c
@@ -266,7 +266,7 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32
 
 /* Assemble the body code between the prologue & epilogue */
 int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
-		       u32 *addrs, bool extra_pass)
+		       u32 *addrs)
 {
 	const struct bpf_insn *insn = fp->insnsi;
 	int flen = fp->len;
@@ -860,7 +860,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 		case BPF_JMP | BPF_CALL:
 			ctx->seen |= SEEN_FUNC;
 
-			ret = bpf_jit_get_func_addr(fp, &insn[i], extra_pass,
+			ret = bpf_jit_get_func_addr(fp, &insn[i], false,
 						    &func_addr, &func_addr_fixed);
 			if (ret < 0)
 				return ret;
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index b87a63dba9c8..2a87da50d9a4 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -272,7 +272,7 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32
 
 /* Assemble the body code between the prologue & epilogue */
 int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
-		       u32 *addrs, bool extra_pass)
+		       u32 *addrs)
 {
 	const struct bpf_insn *insn = fp->insnsi;
 	int flen = fp->len;
@@ -769,7 +769,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 		case BPF_JMP | BPF_CALL:
 			ctx->seen |= SEEN_FUNC;
 
-			ret = bpf_jit_get_func_addr(fp, &insn[i], extra_pass,
+			ret = bpf_jit_get_func_addr(fp, &insn[i], false,
 						    &func_addr, &func_addr_fixed);
 			if (ret < 0)
 				return ret;
-- 
2.31.1


^ permalink raw reply related

* Re: [PATCH v5 07/11] PCI: Replace pci_dev::driver usage that gets the driver name
From: Simon Horman @ 2021-09-29 10:17 UTC (permalink / raw)
  To: Uwe Kleine-König
  Cc: linux-pci, Alexander Duyck, oss-drivers, Paul Mackerras,
	Christoph Hellwig, Herbert Xu, Rafał Miłecki,
	Jesse Brandeburg, Bjorn Helgaas, Ido Schimmel, Jakub Kicinski,
	Yisen Zhuang, Vadym Kochan, Michael Buesch, Jiri Pirko,
	Salil Mehta, netdev, linux-wireless, linux-kernel, Taras Chornyi,
	Zhou Wang, linux-crypto, kernel, Oliver O'Halloran,
	linuxppc-dev, David S. Miller
In-Reply-To: <20210929085306.2203850-8-u.kleine-koenig@pengutronix.de>

On Wed, Sep 29, 2021 at 10:53:02AM +0200, Uwe Kleine-König wrote:
> struct pci_dev::driver holds (apart from a constant offset) the same
> data as struct pci_dev::dev->driver. With the goal to remove struct
> pci_dev::driver to get rid of data duplication replace getting the
> driver name by dev_driver_string() which implicitly makes use of struct
> pci_dev::dev->driver.
> 
> Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
> ---
>  drivers/crypto/hisilicon/qm.c                        | 2 +-
>  drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c   | 2 +-
>  drivers/net/ethernet/marvell/prestera/prestera_pci.c | 2 +-
>  drivers/net/ethernet/mellanox/mlxsw/pci.c            | 2 +-
>  drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c | 3 ++-
>  5 files changed, 6 insertions(+), 5 deletions(-)

Thanks Uwe.

For NFP:

Acked-by: Simon Horman <simon.horman@corigine.com>


^ permalink raw reply

* [powerpc:merge] BUILD SUCCESS 044c2d99d9f43c6d6fde8bed00672517dd9a5a57
From: kernel test robot @ 2021-09-29 10:04 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: linuxppc-dev

tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git merge
branch HEAD: 044c2d99d9f43c6d6fde8bed00672517dd9a5a57  Automatic merge of 'fixes' into merge (2021-09-19 22:18)

elapsed time: 730m

configs tested: 118
configs skipped: 3

The following configs have been built successfully.
More configs may be tested in the coming days.

gcc tested configs:
arm                                 defconfig
arm64                            allyesconfig
arm64                               defconfig
arm                              allyesconfig
arm                              allmodconfig
i386                 randconfig-c001-20210929
powerpc                    amigaone_defconfig
powerpc                     ksi8560_defconfig
sh                           se7722_defconfig
arm                            pleb_defconfig
arm                          ep93xx_defconfig
arm                     am200epdkit_defconfig
mips                      maltasmvp_defconfig
powerpc                 mpc837x_rdb_defconfig
mips                     loongson1c_defconfig
mips                         mpc30x_defconfig
riscv             nommu_k210_sdcard_defconfig
powerpc                     tqm8560_defconfig
arm                         hackkit_defconfig
mips                           ci20_defconfig
arm                            mmp2_defconfig
arm                         assabet_defconfig
m68k                            mac_defconfig
sh                           sh2007_defconfig
arm                          ixp4xx_defconfig
sh                          urquell_defconfig
mips                           ip27_defconfig
mips                      fuloong2e_defconfig
sh                          r7780mp_defconfig
m68k                        m5272c3_defconfig
powerpc                 linkstation_defconfig
sh                             shx3_defconfig
microblaze                      mmu_defconfig
x86_64               randconfig-c001-20210929
arm                  randconfig-c002-20210929
x86_64               randconfig-c001-20210928
arm                  randconfig-c002-20210928
i386                 randconfig-c001-20210928
ia64                             allmodconfig
ia64                                defconfig
ia64                             allyesconfig
m68k                             allmodconfig
m68k                                defconfig
m68k                             allyesconfig
nios2                               defconfig
arc                              allyesconfig
nds32                             allnoconfig
nds32                               defconfig
nios2                            allyesconfig
csky                                defconfig
alpha                               defconfig
alpha                            allyesconfig
xtensa                           allyesconfig
h8300                            allyesconfig
arc                                 defconfig
sh                               allmodconfig
parisc                              defconfig
s390                             allyesconfig
s390                             allmodconfig
parisc                           allyesconfig
s390                                defconfig
i386                             allyesconfig
sparc                            allyesconfig
sparc                               defconfig
i386                                defconfig
mips                             allyesconfig
mips                             allmodconfig
powerpc                          allyesconfig
powerpc                          allmodconfig
powerpc                           allnoconfig
x86_64               randconfig-a014-20210928
x86_64               randconfig-a011-20210928
x86_64               randconfig-a013-20210928
x86_64               randconfig-a012-20210928
x86_64               randconfig-a015-20210928
x86_64               randconfig-a016-20210928
i386                 randconfig-a014-20210928
i386                 randconfig-a013-20210928
i386                 randconfig-a016-20210928
i386                 randconfig-a011-20210928
i386                 randconfig-a015-20210928
i386                 randconfig-a012-20210928
arc                  randconfig-r043-20210928
riscv                randconfig-r042-20210928
s390                 randconfig-r044-20210928
riscv                    nommu_k210_defconfig
riscv                            allyesconfig
riscv                    nommu_virt_defconfig
riscv                             allnoconfig
riscv                               defconfig
riscv                          rv32_defconfig
riscv                            allmodconfig
x86_64                    rhel-8.3-kselftests
um                           x86_64_defconfig
um                             i386_defconfig
x86_64                           allyesconfig
x86_64                              defconfig
x86_64                               rhel-8.3
x86_64                                  kexec

clang tested configs:
powerpc              randconfig-c003-20210929
mips                 randconfig-c004-20210929
arm                  randconfig-c002-20210929
x86_64               randconfig-c007-20210929
riscv                randconfig-c006-20210929
s390                 randconfig-c005-20210929
i386                 randconfig-c001-20210929
x86_64               randconfig-a002-20210928
x86_64               randconfig-a005-20210928
x86_64               randconfig-a001-20210928
x86_64               randconfig-a006-20210928
x86_64               randconfig-a003-20210928
x86_64               randconfig-a004-20210928
i386                 randconfig-a001-20210928
i386                 randconfig-a005-20210928
i386                 randconfig-a002-20210928
i386                 randconfig-a006-20210928
i386                 randconfig-a004-20210928
i386                 randconfig-a003-20210928
hexagon              randconfig-r045-20210928
hexagon              randconfig-r041-20210928

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

^ permalink raw reply

* Re: [PATCH v4 4/8] PCI: replace pci_dev::driver usage that gets the driver name
From: Uwe Kleine-König @ 2021-09-29  9:04 UTC (permalink / raw)
  To: Simon Horman
  Cc: linux-pci, Alexander Duyck, oss-drivers, Paul Mackerras,
	Herbert Xu, Ido Schimmel, Rafał Miłecki,
	Jesse Brandeburg, Bjorn Helgaas, Jakub Kicinski, Yisen Zhuang,
	Uwe Kleine-König, Vadym Kochan, Michael Buesch, Jiri Pirko,
	Salil Mehta, netdev, linux-wireless, linux-kernel, Taras Chornyi,
	Zhou Wang, linux-crypto, kernel, Oliver O'Halloran,
	linuxppc-dev, David S. Miller
In-Reply-To: <20210929080541.GA13506@corigine.com>

[-- Attachment #1: Type: text/plain, Size: 3758 bytes --]

Hello Simon,

On Wed, Sep 29, 2021 at 10:05:42AM +0200, Simon Horman wrote:
> On Tue, Sep 28, 2021 at 12:31:29PM +0200, Uwe Kleine-König wrote:
> > On Tue, Sep 28, 2021 at 12:01:28PM +0200, Simon Horman wrote:
> > > On Mon, Sep 27, 2021 at 10:43:22PM +0200, Uwe Kleine-König wrote:
> > > > From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
> > > > 
> > > > struct pci_dev::driver holds (apart from a constant offset) the same
> > > > data as struct pci_dev::dev->driver. With the goal to remove struct
> > > > pci_dev::driver to get rid of data duplication replace getting the
> > > > driver name by dev_driver_string() which implicitly makes use of struct
> > > > pci_dev::dev->driver.
> > > > 
> > > > Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
> > > 
> > > ...
> > > 
> > > > diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
> > > > index 0685ece1f155..23dfb599c828 100644
> > > > --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
> > > > +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
> > > > @@ -202,7 +202,7 @@ nfp_get_drvinfo(struct nfp_app *app, struct pci_dev *pdev,
> > > >  {
> > > >  	char nsp_version[ETHTOOL_FWVERS_LEN] = {};
> > > >  
> > > > -	strlcpy(drvinfo->driver, pdev->driver->name, sizeof(drvinfo->driver));
> > > > +	strlcpy(drvinfo->driver, dev_driver_string(&pdev->dev), sizeof(drvinfo->driver));
> > > 
> > > I'd slightly prefer to maintain lines under 80 columns wide.
> > > But not nearly strongly enough to engage in a long debate about it.
> > 
> > :-)
> > 
> > Looking at the output of
> > 
> > 	git grep strlcpy.\*sizeof
> > 
> > I wonder if it would be sensible to introduce something like
> > 
> > 	#define strlcpy_array(arr, src) (strlcpy(arr, src, sizeof(arr)) + __must_be_array(arr))
> > 
> > but not sure this is possible without a long debate either (and this
> > line is over 80 chars wide, too :-).
> 
> My main motivation for the 80 char limit in nfp_net_ethtool.c is
> not that I think 80 char is universally a good limit (although that is true),
> but rather that I expect that is the prevailing style in nfp_net_ethtool.c.

I sent out v5 with an additional line break now.
 
> So a macro more than 80 car wide somewhere else is fine by me.
> 
> However, when running checkpatch --strict over the patch it told me:
> 
>     WARNING: Prefer strscpy over strlcpy - see: https://lore.kernel.org/r/CAHk-=wgfRnXz0W3D37d01q3JFkr_i_uTL=V6A6G1oUZcprmknw@mail.gmail.com/
>     #276: FILE: drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c:205:
>     +	strlcpy(drvinfo->driver, dev_driver_string(&pdev->dev), sizeof(drvinfo->driver));
> 
>     total: 0 errors, 1 warnings, 0 checks, 80 lines checked
> 
> (Amusingly, more text wider than 80 column, perhaps suggesting the folly of
>  my original comment, but lets move on from that.)
> 
> As your patch doesn't introduce the usage of strlcpy() I was considering a
> follow-up patch to change it to strscpy(). And in general the email at the
> link above suggests all usages of strlcpy() should do so. So perhaps
> creating strscpy_array is a better idea?

What I read about strscpy() is that conversions for the sake of the
conversion are not welcome. When such a conversion comes from someone
involved with the driver that is also tested this is probably fine.
 
> I have not thought about this much, and probably this just leads us to a
> deeper part of the rabbit hole.

I assume so, too.

Best regards
Uwe

-- 
Pengutronix e.K.                           | Uwe Kleine-König            |
Industrial Linux Solutions                 | https://www.pengutronix.de/ |

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply

* [PATCH v5 10/11] PCI: Replace pci_dev::driver usage by pci_dev::dev.driver
From: Uwe Kleine-König @ 2021-09-29  8:53 UTC (permalink / raw)
  To: Bjorn Helgaas
  Cc: Mark Rutland, Peter Zijlstra, H. Peter Anvin,
	Oliver O'Halloran, Jiri Olsa, Christoph Hellwig,
	Stefano Stabellini, Mathias Nyman, x86, Alexander Shishkin,
	Ingo Molnar, linux-pci, xen-devel, Juergen Gross,
	Andrew Donnellan, Arnd Bergmann, Konrad Rzeszutek Wilk,
	Arnaldo Carvalho de Melo, Borislav Petkov, Bjorn Helgaas,
	Namhyung Kim, Thomas Gleixner, Boris Ostrovsky,
	Greg Kroah-Hartman, linux-usb, linux-perf-users, kernel,
	Frederic Barrat, Paul Mackerras, linuxppc-dev
In-Reply-To: <20210929085306.2203850-1-u.kleine-koenig@pengutronix.de>

struct pci_dev::driver contains (apart from a constant offset) the same
data as struct pci_dev::dev->driver. Replace all remaining users of the
former pointer by the latter to allow removing the former.

Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
---
 arch/powerpc/kernel/eeh_driver.c | 10 ++++-----
 arch/x86/events/intel/uncore.c   |  2 +-
 arch/x86/kernel/probe_roms.c     |  2 +-
 drivers/misc/cxl/guest.c         | 24 ++++++++++++---------
 drivers/misc/cxl/pci.c           | 30 ++++++++++++++++----------
 drivers/pci/iov.c                | 25 ++++++++++++++--------
 drivers/pci/pci-driver.c         | 25 +++++++++++-----------
 drivers/pci/pci.c                |  4 ++--
 drivers/pci/pcie/err.c           | 36 ++++++++++++++++++--------------
 drivers/pci/xen-pcifront.c       |  4 ++--
 drivers/usb/host/xhci-pci.c      |  2 +-
 11 files changed, 93 insertions(+), 71 deletions(-)

diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 3eff6a4888e7..350dab18e137 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -104,13 +104,13 @@ static bool eeh_edev_actionable(struct eeh_dev *edev)
  */
 static inline struct pci_driver *eeh_pcid_get(struct pci_dev *pdev)
 {
-	if (!pdev || !pdev->driver)
+	if (!pdev || !pdev->dev.driver)
 		return NULL;
 
-	if (!try_module_get(pdev->driver->driver.owner))
+	if (!try_module_get(pdev->dev.driver->owner))
 		return NULL;
 
-	return pdev->driver;
+	return to_pci_driver(pdev->dev.driver);
 }
 
 /**
@@ -122,10 +122,10 @@ static inline struct pci_driver *eeh_pcid_get(struct pci_dev *pdev)
  */
 static inline void eeh_pcid_put(struct pci_dev *pdev)
 {
-	if (!pdev || !pdev->driver)
+	if (!pdev || !pdev->dev.driver)
 		return;
 
-	module_put(pdev->driver->driver.owner);
+	module_put(pdev->dev.driver->owner);
 }
 
 /**
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index c72e368dd164..f1ba6ab2e97e 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -1187,7 +1187,7 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
 	 * PCI slot and func to indicate the uncore box.
 	 */
 	if (id->driver_data & ~0xffff) {
-		struct pci_driver *pci_drv = pdev->driver;
+		struct pci_driver *pci_drv = to_pci_driver(pdev->dev.driver);
 
 		pmu = uncore_pci_find_dev_pmu(pdev, pci_drv->id_table);
 		if (pmu == NULL)
diff --git a/arch/x86/kernel/probe_roms.c b/arch/x86/kernel/probe_roms.c
index 9e1def3744f2..36e84d904260 100644
--- a/arch/x86/kernel/probe_roms.c
+++ b/arch/x86/kernel/probe_roms.c
@@ -80,7 +80,7 @@ static struct resource video_rom_resource = {
  */
 static bool match_id(struct pci_dev *pdev, unsigned short vendor, unsigned short device)
 {
-	struct pci_driver *drv = pdev->driver;
+	struct pci_driver *drv = to_pci_driver(pdev->dev.driver);
 	const struct pci_device_id *id;
 
 	if (pdev->vendor == vendor && pdev->device == device)
diff --git a/drivers/misc/cxl/guest.c b/drivers/misc/cxl/guest.c
index 186308f1f8eb..d997c9c3ebb5 100644
--- a/drivers/misc/cxl/guest.c
+++ b/drivers/misc/cxl/guest.c
@@ -25,28 +25,32 @@ static void pci_error_handlers(struct cxl_afu *afu,
 		return;
 
 	list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) {
-		if (!afu_dev->driver)
+		struct pci_driver *afu_drv;
+
+		if (!afu_dev->dev.driver)
 			continue;
 
+		afu_drv = to_pci_driver(afu_dev->dev.driver);
+
 		switch (bus_error_event) {
 		case CXL_ERROR_DETECTED_EVENT:
 			afu_dev->error_state = state;
 
-			if (afu_dev->driver->err_handler &&
-			    afu_dev->driver->err_handler->error_detected)
-				afu_dev->driver->err_handler->error_detected(afu_dev, state);
+			if (afu_drv->err_handler &&
+			    afu_drv->err_handler->error_detected)
+				afu_drv->err_handler->error_detected(afu_dev, state);
 		break;
 		case CXL_SLOT_RESET_EVENT:
 			afu_dev->error_state = state;
 
-			if (afu_dev->driver->err_handler &&
-			    afu_dev->driver->err_handler->slot_reset)
-				afu_dev->driver->err_handler->slot_reset(afu_dev);
+			if (afu_drv->err_handler &&
+			    afu_drv->err_handler->slot_reset)
+				afu_drv->err_handler->slot_reset(afu_dev);
 		break;
 		case CXL_RESUME_EVENT:
-			if (afu_dev->driver->err_handler &&
-			    afu_dev->driver->err_handler->resume)
-				afu_dev->driver->err_handler->resume(afu_dev);
+			if (afu_drv->err_handler &&
+			    afu_drv->err_handler->resume)
+				afu_drv->err_handler->resume(afu_dev);
 		break;
 		}
 	}
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index 2ba899f5659f..7e7545d01e27 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -1805,14 +1805,16 @@ static pci_ers_result_t cxl_vphb_error_detected(struct cxl_afu *afu,
 		return result;
 
 	list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) {
-		if (!afu_dev->driver)
+		struct pci_driver *afu_drv;
+		if (!afu_dev->dev.driver)
 			continue;
 
+		afu_drv = to_pci_driver(afu_dev->dev.driver);
+
 		afu_dev->error_state = state;
 
-		if (afu_dev->driver->err_handler)
-			afu_result = afu_dev->driver->err_handler->error_detected(afu_dev,
-										  state);
+		if (afu_drv->err_handler)
+			afu_result = afu_drv->err_handler->error_detected(afu_dev, state);
 		/* Disconnect trumps all, NONE trumps NEED_RESET */
 		if (afu_result == PCI_ERS_RESULT_DISCONNECT)
 			result = PCI_ERS_RESULT_DISCONNECT;
@@ -2003,6 +2005,8 @@ static pci_ers_result_t cxl_pci_slot_reset(struct pci_dev *pdev)
 			continue;
 
 		list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) {
+			struct pci_driver *afu_drv;
+
 			/* Reset the device context.
 			 * TODO: make this less disruptive
 			 */
@@ -2028,12 +2032,14 @@ static pci_ers_result_t cxl_pci_slot_reset(struct pci_dev *pdev)
 			 * shouldn't start new work until we call
 			 * their resume function.
 			 */
-			if (!afu_dev->driver)
+			if (!afu_dev->dev.driver)
 				continue;
 
-			if (afu_dev->driver->err_handler &&
-			    afu_dev->driver->err_handler->slot_reset)
-				afu_result = afu_dev->driver->err_handler->slot_reset(afu_dev);
+			afu_drv = to_pci_driver(afu_dev->dev.driver);
+
+			if (afu_drv->err_handler &&
+			    afu_drv->err_handler->slot_reset)
+				afu_result = afu_drv->err_handler->slot_reset(afu_dev);
 
 			if (afu_result == PCI_ERS_RESULT_DISCONNECT)
 				result = PCI_ERS_RESULT_DISCONNECT;
@@ -2074,9 +2080,11 @@ static void cxl_pci_resume(struct pci_dev *pdev)
 			continue;
 
 		list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) {
-			if (afu_dev->driver && afu_dev->driver->err_handler &&
-			    afu_dev->driver->err_handler->resume)
-				afu_dev->driver->err_handler->resume(afu_dev);
+			struct pci_driver *afu_drv;
+			if (afu_dev->dev.driver &&
+			    (afu_drv = to_pci_driver(afu_dev->dev.driver))->err_handler &&
+			    afu_drv->err_handler->resume)
+				afu_drv->err_handler->resume(afu_dev);
 		}
 	}
 	spin_unlock(&adapter->afu_list_lock);
diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
index dafdc652fcd0..f94660927544 100644
--- a/drivers/pci/iov.c
+++ b/drivers/pci/iov.c
@@ -164,13 +164,15 @@ static ssize_t sriov_vf_total_msix_show(struct device *dev,
 					char *buf)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
+	struct pci_driver *pdrv;
 	u32 vf_total_msix = 0;
 
 	device_lock(dev);
-	if (!pdev->driver || !pdev->driver->sriov_get_vf_total_msix)
+	pdrv = to_pci_driver(dev->driver);
+	if (!pdrv || !pdrv->sriov_get_vf_total_msix)
 		goto unlock;
 
-	vf_total_msix = pdev->driver->sriov_get_vf_total_msix(pdev);
+	vf_total_msix = pdrv->sriov_get_vf_total_msix(pdev);
 unlock:
 	device_unlock(dev);
 	return sysfs_emit(buf, "%u\n", vf_total_msix);
@@ -183,6 +185,7 @@ static ssize_t sriov_vf_msix_count_store(struct device *dev,
 {
 	struct pci_dev *vf_dev = to_pci_dev(dev);
 	struct pci_dev *pdev = pci_physfn(vf_dev);
+	struct pci_driver *pdrv;
 	int val, ret;
 
 	ret = kstrtoint(buf, 0, &val);
@@ -193,13 +196,14 @@ static ssize_t sriov_vf_msix_count_store(struct device *dev,
 		return -EINVAL;
 
 	device_lock(&pdev->dev);
-	if (!pdev->driver || !pdev->driver->sriov_set_msix_vec_count) {
+	pdrv = to_pci_driver(pdev->dev.driver);
+	if (!pdrv || !pdrv->sriov_set_msix_vec_count) {
 		ret = -EOPNOTSUPP;
 		goto err_pdev;
 	}
 
 	device_lock(&vf_dev->dev);
-	if (vf_dev->driver) {
+	if (vf_dev->dev.driver) {
 		/*
 		 * A driver is already attached to this VF and has configured
 		 * itself based on the current MSI-X vector count. Changing
@@ -209,7 +213,7 @@ static ssize_t sriov_vf_msix_count_store(struct device *dev,
 		goto err_dev;
 	}
 
-	ret = pdev->driver->sriov_set_msix_vec_count(vf_dev, val);
+	ret = pdrv->sriov_set_msix_vec_count(vf_dev, val);
 
 err_dev:
 	device_unlock(&vf_dev->dev);
@@ -376,6 +380,7 @@ static ssize_t sriov_numvfs_store(struct device *dev,
 				  const char *buf, size_t count)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
+	struct pci_driver *pdrv;
 	int ret;
 	u16 num_vfs;
 
@@ -392,14 +397,16 @@ static ssize_t sriov_numvfs_store(struct device *dev,
 		goto exit;
 
 	/* is PF driver loaded */
-	if (!pdev->driver) {
+	if (!pdev->dev.driver) {
 		pci_info(pdev, "no driver bound to device; cannot configure SR-IOV\n");
 		ret = -ENOENT;
 		goto exit;
 	}
 
+	pdrv = to_pci_driver(pdev->dev.driver);
+
 	/* is PF driver loaded w/callback */
-	if (!pdev->driver->sriov_configure) {
+	if (!pdrv->sriov_configure) {
 		pci_info(pdev, "driver does not support SR-IOV configuration via sysfs\n");
 		ret = -ENOENT;
 		goto exit;
@@ -407,7 +414,7 @@ static ssize_t sriov_numvfs_store(struct device *dev,
 
 	if (num_vfs == 0) {
 		/* disable VFs */
-		ret = pdev->driver->sriov_configure(pdev, 0);
+		ret = pdrv->sriov_configure(pdev, 0);
 		goto exit;
 	}
 
@@ -419,7 +426,7 @@ static ssize_t sriov_numvfs_store(struct device *dev,
 		goto exit;
 	}
 
-	ret = pdev->driver->sriov_configure(pdev, num_vfs);
+	ret = pdrv->sriov_configure(pdev, num_vfs);
 	if (ret < 0)
 		goto exit;
 
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index 50449ec622a3..4d20022b8631 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -457,7 +457,7 @@ static int pci_device_probe(struct device *dev)
 static void pci_device_remove(struct device *dev)
 {
 	struct pci_dev *pci_dev = to_pci_dev(dev);
-	struct pci_driver *drv = pci_dev->driver;
+	struct pci_driver *drv = to_pci_driver(pci_dev->dev.driver);
 
 	if (drv->remove) {
 		pm_runtime_get_sync(dev);
@@ -493,7 +493,7 @@ static void pci_device_remove(struct device *dev)
 static void pci_device_shutdown(struct device *dev)
 {
 	struct pci_dev *pci_dev = to_pci_dev(dev);
-	struct pci_driver *drv = pci_dev->driver;
+	struct pci_driver *drv = to_pci_driver(pci_dev->dev.driver);
 
 	pm_runtime_resume(dev);
 
@@ -589,7 +589,7 @@ static int pci_pm_reenable_device(struct pci_dev *pci_dev)
 static int pci_legacy_suspend(struct device *dev, pm_message_t state)
 {
 	struct pci_dev *pci_dev = to_pci_dev(dev);
-	struct pci_driver *drv = pci_dev->driver;
+	struct pci_driver *drv = to_pci_driver(dev->driver);
 
 	if (drv && drv->suspend) {
 		pci_power_t prev = pci_dev->current_state;
@@ -630,7 +630,7 @@ static int pci_legacy_suspend_late(struct device *dev, pm_message_t state)
 static int pci_legacy_resume(struct device *dev)
 {
 	struct pci_dev *pci_dev = to_pci_dev(dev);
-	struct pci_driver *drv = pci_dev->driver;
+	struct pci_driver *drv = to_pci_driver(pci_dev->dev.driver);
 
 	pci_fixup_device(pci_fixup_resume, pci_dev);
 
@@ -649,7 +649,7 @@ static void pci_pm_default_suspend(struct pci_dev *pci_dev)
 
 static bool pci_has_legacy_pm_support(struct pci_dev *pci_dev)
 {
-	struct pci_driver *drv = pci_dev->driver;
+	struct pci_driver *drv = to_pci_driver(pci_dev->dev.driver);
 	bool ret = drv && (drv->suspend || drv->resume);
 
 	/*
@@ -1242,11 +1242,11 @@ static int pci_pm_runtime_suspend(struct device *dev)
 	int error;
 
 	/*
-	 * If pci_dev->driver is not set (unbound), we leave the device in D0,
+	 * If pci_dev->dev.driver is not set (unbound), we leave the device in D0,
 	 * but it may go to D3cold when the bridge above it runtime suspends.
 	 * Save its config space in case that happens.
 	 */
-	if (!pci_dev->driver) {
+	if (!pci_dev->dev.driver) {
 		pci_save_state(pci_dev);
 		return 0;
 	}
@@ -1303,7 +1303,7 @@ static int pci_pm_runtime_resume(struct device *dev)
 	 */
 	pci_restore_standard_config(pci_dev);
 
-	if (!pci_dev->driver)
+	if (!dev->driver)
 		return 0;
 
 	pci_fixup_device(pci_fixup_resume_early, pci_dev);
@@ -1322,14 +1322,13 @@ static int pci_pm_runtime_resume(struct device *dev)
 
 static int pci_pm_runtime_idle(struct device *dev)
 {
-	struct pci_dev *pci_dev = to_pci_dev(dev);
 	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
 
 	/*
-	 * If pci_dev->driver is not set (unbound), the device should
+	 * If dev->driver is not set (unbound), the device should
 	 * always remain in D0 regardless of the runtime PM status
 	 */
-	if (!pci_dev->driver)
+	if (!dev->driver)
 		return 0;
 
 	if (!pm)
@@ -1436,8 +1435,8 @@ static struct pci_driver pci_compat_driver = {
  */
 struct pci_driver *pci_dev_driver(const struct pci_dev *dev)
 {
-	if (dev->driver)
-		return dev->driver;
+	if (dev->dev.driver)
+		return to_pci_driver(dev->dev.driver);
 	else {
 		int i;
 		for (i = 0; i <= PCI_ROM_RESOURCE; i++)
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index ce2ab62b64cf..ccecf740de59 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -5089,7 +5089,7 @@ EXPORT_SYMBOL_GPL(pci_dev_unlock);
 static void pci_dev_save_and_disable(struct pci_dev *dev)
 {
 	const struct pci_error_handlers *err_handler =
-			dev->driver ? dev->driver->err_handler : NULL;
+			dev->dev.driver ? to_pci_driver(dev->dev.driver)->err_handler : NULL;
 
 	/*
 	 * dev->driver->err_handler->reset_prepare() is protected against
@@ -5120,7 +5120,7 @@ static void pci_dev_save_and_disable(struct pci_dev *dev)
 static void pci_dev_restore(struct pci_dev *dev)
 {
 	const struct pci_error_handlers *err_handler =
-			dev->driver ? dev->driver->err_handler : NULL;
+			dev->dev.driver ? to_pci_driver(dev->dev.driver)->err_handler : NULL;
 
 	pci_restore_state(dev);
 
diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c
index b576aa890c76..b314b54f7821 100644
--- a/drivers/pci/pcie/err.c
+++ b/drivers/pci/pcie/err.c
@@ -49,14 +49,15 @@ static int report_error_detected(struct pci_dev *dev,
 				 pci_channel_state_t state,
 				 enum pci_ers_result *result)
 {
+	struct pci_driver *pdrv;
 	pci_ers_result_t vote;
 	const struct pci_error_handlers *err_handler;
 
 	device_lock(&dev->dev);
 	if (!pci_dev_set_io_state(dev, state) ||
-		!dev->driver ||
-		!dev->driver->err_handler ||
-		!dev->driver->err_handler->error_detected) {
+		!dev->dev.driver ||
+		!(pdrv = to_pci_driver(dev->dev.driver))->err_handler ||
+		!pdrv->err_handler->error_detected) {
 		/*
 		 * If any device in the subtree does not have an error_detected
 		 * callback, PCI_ERS_RESULT_NO_AER_DRIVER prevents subsequent
@@ -70,7 +71,7 @@ static int report_error_detected(struct pci_dev *dev,
 			vote = PCI_ERS_RESULT_NONE;
 		}
 	} else {
-		err_handler = dev->driver->err_handler;
+		err_handler = pdrv->err_handler;
 		vote = err_handler->error_detected(dev, state);
 	}
 	pci_uevent_ers(dev, vote);
@@ -92,15 +93,16 @@ static int report_normal_detected(struct pci_dev *dev, void *data)
 static int report_mmio_enabled(struct pci_dev *dev, void *data)
 {
 	pci_ers_result_t vote, *result = data;
+	struct pci_driver *pdrv;
 	const struct pci_error_handlers *err_handler;
 
 	device_lock(&dev->dev);
-	if (!dev->driver ||
-		!dev->driver->err_handler ||
-		!dev->driver->err_handler->mmio_enabled)
+	if (!dev->dev.driver ||
+		!(pdrv = to_pci_driver(dev->dev.driver))->err_handler ||
+		!pdrv->err_handler->mmio_enabled)
 		goto out;
 
-	err_handler = dev->driver->err_handler;
+	err_handler = pdrv->err_handler;
 	vote = err_handler->mmio_enabled(dev);
 	*result = merge_result(*result, vote);
 out:
@@ -112,14 +114,15 @@ static int report_slot_reset(struct pci_dev *dev, void *data)
 {
 	pci_ers_result_t vote, *result = data;
 	const struct pci_error_handlers *err_handler;
+	struct pci_driver *pdrv;
 
 	device_lock(&dev->dev);
-	if (!dev->driver ||
-		!dev->driver->err_handler ||
-		!dev->driver->err_handler->slot_reset)
+	if (!dev->dev.driver ||
+		!(pdrv = to_pci_driver(dev->dev.driver))->err_handler ||
+		!pdrv->err_handler->slot_reset)
 		goto out;
 
-	err_handler = dev->driver->err_handler;
+	err_handler = pdrv->err_handler;
 	vote = err_handler->slot_reset(dev);
 	*result = merge_result(*result, vote);
 out:
@@ -130,15 +133,16 @@ static int report_slot_reset(struct pci_dev *dev, void *data)
 static int report_resume(struct pci_dev *dev, void *data)
 {
 	const struct pci_error_handlers *err_handler;
+	struct pci_driver *pdrv;
 
 	device_lock(&dev->dev);
 	if (!pci_dev_set_io_state(dev, pci_channel_io_normal) ||
-		!dev->driver ||
-		!dev->driver->err_handler ||
-		!dev->driver->err_handler->resume)
+		!dev->dev.driver ||
+		!(pdrv = to_pci_driver(dev->dev.driver))->err_handler ||
+		!pdrv->err_handler->resume)
 		goto out;
 
-	err_handler = dev->driver->err_handler;
+	err_handler = pdrv->err_handler;
 	err_handler->resume(dev);
 out:
 	pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED);
diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c
index f2d7f70a7a10..73831fb87a1e 100644
--- a/drivers/pci/xen-pcifront.c
+++ b/drivers/pci/xen-pcifront.c
@@ -601,12 +601,12 @@ static pci_ers_result_t pcifront_common_process(int cmd,
 	result = PCI_ERS_RESULT_NONE;
 
 	pcidev = pci_get_domain_bus_and_slot(domain, bus, devfn);
-	if (!pcidev || !pcidev->driver) {
+	if (!pcidev || !pcidev->dev.driver) {
 		dev_err(&pdev->xdev->dev, "device or AER driver is NULL\n");
 		pci_dev_put(pcidev);
 		return result;
 	}
-	pdrv = pcidev->driver;
+	pdrv = to_pci_driver(pcidev->dev.driver);
 
 	if (pdrv->err_handler && pdrv->err_handler->error_detected) {
 		pci_dbg(pcidev, "trying to call AER service\n");
diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index 2c9f25ca8edd..2f4729f4f1e0 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -103,7 +103,7 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
 	struct xhci_driver_data         *driver_data;
 	const struct pci_device_id      *id;
 
-	id = pci_match_id(pdev->driver->id_table, pdev);
+	id = pci_match_id(to_pci_driver(pdev->dev.driver)->id_table, pdev);
 
 	if (id && id->driver_data) {
 		driver_data = (struct xhci_driver_data *)id->driver_data;
-- 
2.30.2


^ permalink raw reply related

* [PATCH v5 07/11] PCI: Replace pci_dev::driver usage that gets the driver name
From: Uwe Kleine-König @ 2021-09-29  8:53 UTC (permalink / raw)
  To: Bjorn Helgaas
  Cc: linux-pci, Alexander Duyck, oss-drivers, Paul Mackerras,
	Christoph Hellwig, Herbert Xu, Rafał Miłecki,
	Jesse Brandeburg, Ido Schimmel, Jakub Kicinski, Yisen Zhuang,
	Vadym Kochan, Michael Buesch, Jiri Pirko, Salil Mehta, netdev,
	linux-wireless, linux-kernel, Taras Chornyi, Zhou Wang,
	linux-crypto, kernel, Simon Horman, Oliver O'Halloran,
	linuxppc-dev, David S. Miller
In-Reply-To: <20210929085306.2203850-1-u.kleine-koenig@pengutronix.de>

struct pci_dev::driver holds (apart from a constant offset) the same
data as struct pci_dev::dev->driver. With the goal to remove struct
pci_dev::driver to get rid of data duplication replace getting the
driver name by dev_driver_string() which implicitly makes use of struct
pci_dev::dev->driver.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
---
 drivers/crypto/hisilicon/qm.c                        | 2 +-
 drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c   | 2 +-
 drivers/net/ethernet/marvell/prestera/prestera_pci.c | 2 +-
 drivers/net/ethernet/mellanox/mlxsw/pci.c            | 2 +-
 drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c | 3 ++-
 5 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index 369562d34d66..8f361e54e524 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -3085,7 +3085,7 @@ static int qm_alloc_uacce(struct hisi_qm *qm)
 	};
 	int ret;
 
-	ret = strscpy(interface.name, pdev->driver->name,
+	ret = strscpy(interface.name, dev_driver_string(&pdev->dev),
 		      sizeof(interface.name));
 	if (ret < 0)
 		return -ENAMETOOLONG;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
index 7ea511d59e91..f279edfce3f1 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
@@ -606,7 +606,7 @@ static void hns3_get_drvinfo(struct net_device *netdev,
 		return;
 	}
 
-	strncpy(drvinfo->driver, h->pdev->driver->name,
+	strncpy(drvinfo->driver, dev_driver_string(&h->pdev->dev),
 		sizeof(drvinfo->driver));
 	drvinfo->driver[sizeof(drvinfo->driver) - 1] = '\0';
 
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_pci.c b/drivers/net/ethernet/marvell/prestera/prestera_pci.c
index a250d394da38..a8f007f6dad2 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_pci.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_pci.c
@@ -720,7 +720,7 @@ static int prestera_fw_load(struct prestera_fw *fw)
 static int prestera_pci_probe(struct pci_dev *pdev,
 			      const struct pci_device_id *id)
 {
-	const char *driver_name = pdev->driver->name;
+	const char *driver_name = dev_driver_string(&pdev->dev);
 	struct prestera_fw *fw;
 	int err;
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c
index 13b0259f7ea6..8f306364f7bf 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c
@@ -1876,7 +1876,7 @@ static void mlxsw_pci_cmd_fini(struct mlxsw_pci *mlxsw_pci)
 
 static int mlxsw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
-	const char *driver_name = pdev->driver->name;
+	const char *driver_name = dev_driver_string(&pdev->dev);
 	struct mlxsw_pci *mlxsw_pci;
 	int err;
 
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
index 0685ece1f155..1de076f55740 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
@@ -202,7 +202,8 @@ nfp_get_drvinfo(struct nfp_app *app, struct pci_dev *pdev,
 {
 	char nsp_version[ETHTOOL_FWVERS_LEN] = {};
 
-	strlcpy(drvinfo->driver, pdev->driver->name, sizeof(drvinfo->driver));
+	strlcpy(drvinfo->driver, dev_driver_string(&pdev->dev),
+		sizeof(drvinfo->driver));
 	nfp_net_get_nspinfo(app, nsp_version);
 	snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
 		 "%s %s %s %s", vnic_version, nsp_version,
-- 
2.30.2


^ permalink raw reply related

* [PATCH v5 05/11] powerpc/eeh: Don't use driver member of struct pci_dev and further cleanups
From: Uwe Kleine-König @ 2021-09-29  8:53 UTC (permalink / raw)
  To: Bjorn Helgaas, Michael Ellerman
  Cc: linux-pci, Paul Mackerras, kernel, Oliver O'Halloran,
	linuxppc-dev, Christoph Hellwig
In-Reply-To: <20210929085306.2203850-1-u.kleine-koenig@pengutronix.de>

The driver member of struct pci_dev is to be removed as it tracks
information already present by tracking of the driver core. So replace
pdev->driver->name by dev_driver_string() for the corresponding struct
device.

Also move the function nearer to its only user and instead of the ?:
operator use a normal if which is more readable.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
---
 arch/powerpc/include/asm/ppc-pci.h | 5 -----
 arch/powerpc/kernel/eeh.c          | 8 ++++++++
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h
index 2b9edbf6e929..f6cf0159024e 100644
--- a/arch/powerpc/include/asm/ppc-pci.h
+++ b/arch/powerpc/include/asm/ppc-pci.h
@@ -55,11 +55,6 @@ void eeh_pe_dev_mode_mark(struct eeh_pe *pe, int mode);
 void eeh_sysfs_add_device(struct pci_dev *pdev);
 void eeh_sysfs_remove_device(struct pci_dev *pdev);
 
-static inline const char *eeh_driver_name(struct pci_dev *pdev)
-{
-	return (pdev && pdev->driver) ? pdev->driver->name : "<null>";
-}
-
 #endif /* CONFIG_EEH */
 
 #define PCI_BUSNO(bdfn) ((bdfn >> 8) & 0xff)
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index e9b597ed423c..4b08881c4a1e 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -399,6 +399,14 @@ static int eeh_phb_check_failure(struct eeh_pe *pe)
 	return ret;
 }
 
+static inline const char *eeh_driver_name(struct pci_dev *pdev)
+{
+	if (pdev)
+		return dev_driver_string(&pdev->dev);
+
+	return "<null>";
+}
+
 /**
  * eeh_dev_check_failure - Check if all 1's data is due to EEH slot freeze
  * @edev: eeh device
-- 
2.30.2


^ permalink raw reply related

* Re: [PATCH kernel] powerpc/dma: Fix dma_map_ops::get_required_mask
From: Alexey Kardashevskiy @ 2021-09-29  8:48 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Carol Soto, linuxppc-dev, Cédric Le Goater, Frederic Barrat
In-Reply-To: <20200909075849.GA12282@lst.de>



On 09/09/2020 17:58, Christoph Hellwig wrote:
> On Tue, Sep 08, 2020 at 11:10:03PM +1000, Alexey Kardashevskiy wrote:
>>>> a-ha, this makes more sense, thanks. Then I guess we need to revert that
>>>> one bit from yours f1565c24b596, do not we?
>>>
>>> Why?  The was the original intent of the API, but now we also use
>>> internally to check the addressing capabilities.
>>
>> The bigger mask the better, no? As it is now, it's limited by the window 
>> size which happens to be bigger than 4GB but smaller then full 64bit (48bit 
>> on my system)
> 
> Yes, the bigger mask is better.  But I don't see why you'd want to
> revert the dma bypass code for that entirely.


Ok we have another victim of this change:

https://github.com/torvalds/linux/blob/master/drivers/scsi/mpt3sas/mpt3sas_base.c#L3007


It calls dma_get_required_mask() to know "the mask that the platform
requires to operate efficiently" (from dma-api.rst). The current
upstream returns 31 for pseries which in no way is efficient, we can do
better so we need this hunk back (but just this one):
https://github.com/torvalds/linux/commit/f1565c24b5965dfd2352f209c417ff160be04db9#diff-18e87e1863bf902c6388d72ad99467b7fcec0dd37084636d96ad5a35a3e59904L156
(well, almost, move it above the !tbl check).

This does not hit us on powernv/upstream as that returns 44 (or so) and
the mpt3sas driver (which does the right thing afaict) just assumes that
">32" == ">=63". What do I miss here? Thanks,

ps:
https://patchwork.ozlabs.org/project/linuxppc-dev/patch/20200908015106.79661-1-aik@ozlabs.ru/#2528801
is the rest of the thread I am replying to.


-- 
Alexey

^ permalink raw reply

* Re: [PATCH v4 4/8] PCI: replace pci_dev::driver usage that gets the driver name
From: Simon Horman @ 2021-09-29  8:05 UTC (permalink / raw)
  To: Uwe Kleine-König
  Cc: Alexander Duyck, oss-drivers, Paul Mackerras, Herbert Xu,
	Ido Schimmel, Rafał Miłecki, Jesse Brandeburg,
	Bjorn Helgaas, linux-pci, Jakub Kicinski, Yisen Zhuang,
	Uwe Kleine-König, Vadym Kochan, Michael Buesch, Jiri Pirko,
	Salil Mehta, netdev, linux-wireless, linux-kernel, Taras Chornyi,
	Zhou Wang, linux-crypto, kernel, Oliver O'Halloran,
	linuxppc-dev, David S. Miller
In-Reply-To: <20210928103129.c3gcbnfbarezr3mm@pengutronix.de>

On Tue, Sep 28, 2021 at 12:31:29PM +0200, Uwe Kleine-König wrote:
> On Tue, Sep 28, 2021 at 12:01:28PM +0200, Simon Horman wrote:
> > On Mon, Sep 27, 2021 at 10:43:22PM +0200, Uwe Kleine-König wrote:
> > > From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
> > > 
> > > struct pci_dev::driver holds (apart from a constant offset) the same
> > > data as struct pci_dev::dev->driver. With the goal to remove struct
> > > pci_dev::driver to get rid of data duplication replace getting the
> > > driver name by dev_driver_string() which implicitly makes use of struct
> > > pci_dev::dev->driver.
> > > 
> > > Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
> > 
> > ...
> > 
> > > diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
> > > index 0685ece1f155..23dfb599c828 100644
> > > --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
> > > +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
> > > @@ -202,7 +202,7 @@ nfp_get_drvinfo(struct nfp_app *app, struct pci_dev *pdev,
> > >  {
> > >  	char nsp_version[ETHTOOL_FWVERS_LEN] = {};
> > >  
> > > -	strlcpy(drvinfo->driver, pdev->driver->name, sizeof(drvinfo->driver));
> > > +	strlcpy(drvinfo->driver, dev_driver_string(&pdev->dev), sizeof(drvinfo->driver));
> > 
> > I'd slightly prefer to maintain lines under 80 columns wide.
> > But not nearly strongly enough to engage in a long debate about it.
> 
> :-)
> 
> Looking at the output of
> 
> 	git grep strlcpy.\*sizeof
> 
> I wonder if it would be sensible to introduce something like
> 
> 	#define strlcpy_array(arr, src) (strlcpy(arr, src, sizeof(arr)) + __must_be_array(arr))
> 
> but not sure this is possible without a long debate either (and this
> line is over 80 chars wide, too :-).

My main motivation for the 80 char limit in nfp_net_ethtool.c is
not that I think 80 char is universally a good limit (although that is true),
but rather that I expect that is the prevailing style in nfp_net_ethtool.c.

So a macro more than 80 car wide somewhere else is fine by me.

However, when running checkpatch --strict over the patch it told me:

    WARNING: Prefer strscpy over strlcpy - see: https://lore.kernel.org/r/CAHk-=wgfRnXz0W3D37d01q3JFkr_i_uTL=V6A6G1oUZcprmknw@mail.gmail.com/
    #276: FILE: drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c:205:
    +	strlcpy(drvinfo->driver, dev_driver_string(&pdev->dev), sizeof(drvinfo->driver));

    total: 0 errors, 1 warnings, 0 checks, 80 lines checked

(Amusingly, more text wider than 80 column, perhaps suggesting the folly of
 my original comment, but lets move on from that.)

As your patch doesn't introduce the usage of strlcpy() I was considering a
follow-up patch to change it to strscpy(). And in general the email at the
link above suggests all usages of strlcpy() should do so. So perhaps
creating strscpy_array is a better idea?

I have not thought about this much, and probably this just leads us to a
deeper part of the rabbit hole.

^ permalink raw reply

* Re: [PATCH v2 2/2] powerpc/paravirt: correct preempt debug splat in vcpu_is_preempted()
From: Srikar Dronamraju @ 2021-09-29  7:30 UTC (permalink / raw)
  To: Nathan Lynch; +Cc: linuxppc-dev, npiggin
In-Reply-To: <20210928214147.312412-3-nathanl@linux.ibm.com>

* Nathan Lynch <nathanl@linux.ibm.com> [2021-09-28 16:41:47]:

> vcpu_is_preempted() can be used outside of preempt-disabled critical
> sections, yielding warnings such as:
> 
> BUG: using smp_processor_id() in preemptible [00000000] code: systemd-udevd/185
> caller is rwsem_spin_on_owner+0x1cc/0x2d0
> CPU: 1 PID: 185 Comm: systemd-udevd Not tainted 5.15.0-rc2+ #33
> Call Trace:
> [c000000012907ac0] [c000000000aa30a8] dump_stack_lvl+0xac/0x108 (unreliable)
> [c000000012907b00] [c000000001371f70] check_preemption_disabled+0x150/0x160
> [c000000012907b90] [c0000000001e0e8c] rwsem_spin_on_owner+0x1cc/0x2d0
> [c000000012907be0] [c0000000001e1408] rwsem_down_write_slowpath+0x478/0x9a0
> [c000000012907ca0] [c000000000576cf4] filename_create+0x94/0x1e0
> [c000000012907d10] [c00000000057ac08] do_symlinkat+0x68/0x1a0
> [c000000012907d70] [c00000000057ae18] sys_symlink+0x58/0x70
> [c000000012907da0] [c00000000002e448] system_call_exception+0x198/0x3c0
> [c000000012907e10] [c00000000000c54c] system_call_common+0xec/0x250
> 
> The result of vcpu_is_preempted() is always used speculatively, and the
> function does not access per-cpu resources in a (Linux) preempt-unsafe way.
> Use raw_smp_processor_id() to avoid such warnings, adding explanatory
> comments.
> 
> Signed-off-by: Nathan Lynch <nathanl@linux.ibm.com>
> Fixes: ca3f969dcb11 ("powerpc/paravirt: Use is_kvm_guest() in vcpu_is_preempted()")

Looks good to me.

Reviewed-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>

> ---
>  arch/powerpc/include/asm/paravirt.h | 18 +++++++++++++++++-
>  1 file changed, 17 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/powerpc/include/asm/paravirt.h b/arch/powerpc/include/asm/paravirt.h
> index 39f173961f6a..eb7df559ae74 100644
> --- a/arch/powerpc/include/asm/paravirt.h
> +++ b/arch/powerpc/include/asm/paravirt.h
> @@ -110,7 +110,23 @@ static inline bool vcpu_is_preempted(int cpu)
> 
>  #ifdef CONFIG_PPC_SPLPAR
>  	if (!is_kvm_guest()) {
> -		int first_cpu = cpu_first_thread_sibling(smp_processor_id());
> +		int first_cpu;
> +
> +		/*
> +		 * The result of vcpu_is_preempted() is used in a
> +		 * speculative way, and is always subject to invalidation
> +		 * by events internal and external to Linux. While we can
> +		 * be called in preemptable context (in the Linux sense),
> +		 * we're not accessing per-cpu resources in a way that can
> +		 * race destructively with Linux scheduler preemption and
> +		 * migration, and callers can tolerate the potential for
> +		 * error introduced by sampling the CPU index without
> +		 * pinning the task to it. So it is permissible to use
> +		 * raw_smp_processor_id() here to defeat the preempt debug
> +		 * warnings that can arise from using smp_processor_id()
> +		 * in arbitrary contexts.
> +		 */
> +		first_cpu = cpu_first_thread_sibling(raw_smp_processor_id());
> 
>  		/*
>  		 * The PowerVM hypervisor dispatches VMs on a whole core
> -- 
> 2.31.1
> 

-- 
Thanks and Regards
Srikar Dronamraju

^ permalink raw reply

* Re: [PATCH v2 1/2] powerpc/paravirt: vcpu_is_preempted() commentary
From: Srikar Dronamraju @ 2021-09-29  7:30 UTC (permalink / raw)
  To: Nathan Lynch; +Cc: linuxppc-dev, npiggin
In-Reply-To: <20210928214147.312412-2-nathanl@linux.ibm.com>

* Nathan Lynch <nathanl@linux.ibm.com> [2021-09-28 16:41:46]:

> Add comments more clearly documenting that this function determines whether
> hypervisor-level preemption of the VM has occurred.
> 
> Signed-off-by: Nathan Lynch <nathanl@linux.ibm.com>

Looks good to me.

Reviewed-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>

> ---
>  arch/powerpc/include/asm/paravirt.h | 22 ++++++++++++++++++----
>  1 file changed, 18 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/paravirt.h b/arch/powerpc/include/asm/paravirt.h
> index bcb7b5f917be..39f173961f6a 100644
> --- a/arch/powerpc/include/asm/paravirt.h
> +++ b/arch/powerpc/include/asm/paravirt.h
> @@ -21,7 +21,7 @@ static inline bool is_shared_processor(void)
>  	return static_branch_unlikely(&shared_processor);
>  }
> 
> -/* If bit 0 is set, the cpu has been preempted */
> +/* If bit 0 is set, the cpu has been ceded, conferred, or preempted */
>  static inline u32 yield_count_of(int cpu)
>  {
>  	__be32 yield_count = READ_ONCE(lppaca_of(cpu).yield_count);
> @@ -92,6 +92,19 @@ static inline void prod_cpu(int cpu)
>  #define vcpu_is_preempted vcpu_is_preempted
>  static inline bool vcpu_is_preempted(int cpu)
>  {
> +	/*
> +	 * The dispatch/yield bit alone is an imperfect indicator of
> +	 * whether the hypervisor has dispatched @cpu to run on a physical
> +	 * processor. When it is clear, @cpu is definitely not preempted.
> +	 * But when it is set, it means only that it *might* be, subject to
> +	 * other conditions. So we check other properties of the VM and
> +	 * @cpu first, resorting to the yield count last.
> +	 */
> +
> +	/*
> +	 * Hypervisor preemption isn't possible in dedicated processor
> +	 * mode by definition.
> +	 */
>  	if (!is_shared_processor())
>  		return false;
> 
> @@ -100,9 +113,10 @@ static inline bool vcpu_is_preempted(int cpu)
>  		int first_cpu = cpu_first_thread_sibling(smp_processor_id());
> 
>  		/*
> -		 * Preemption can only happen at core granularity. This CPU
> -		 * is not preempted if one of the CPU of this core is not
> -		 * preempted.
> +		 * The PowerVM hypervisor dispatches VMs on a whole core
> +		 * basis. So we know that a thread sibling of the local CPU
> +		 * cannot have been preempted by the hypervisor, even if it
> +		 * has called H_CONFER, which will set the yield bit.
>  		 */
>  		if (cpu_first_thread_sibling(cpu) == first_cpu)
>  			return false;
> -- 
> 2.31.1
> 

-- 
Thanks and Regards
Srikar Dronamraju

^ permalink raw reply

* Re: [PATCH] powerpc: fix unbalanced node refcount in check_kvm_guest()
From: Srikar Dronamraju @ 2021-09-29  7:29 UTC (permalink / raw)
  To: Nathan Lynch; +Cc: linuxppc-dev, npiggin
In-Reply-To: <20210928124550.132020-1-nathanl@linux.ibm.com>

* Nathan Lynch <nathanl@linux.ibm.com> [2021-09-28 07:45:50]:

> When check_kvm_guest() succeeds in looking up a /hypervisor OF node, it
> returns without performing a matching put for the lookup, leaving the
> node's reference count elevated.
> 
> Add the necessary call to of_node_put(), rearranging the code slightly to
> avoid repetition or goto.
> 

Looks good to me.

I do see few other cases where we call of_find_node calls without
of_node_put().

Some of them that I saw were in

find_legacy_serial_ports()  in  arch/powerpc/kernel/legacy_serial.c
proc_ppc64_create in arch/powerpc/proc/powerpc.c
update_events_in_group in arch/powerpc/perf/imc-pmu.c
cell_iommu_init_disabled in arch/powerpc/platforms/cell/iommu.c
cell_publish_devices in arch/powerpc/platforms/cell/setup.c
spufs_init_isolated_loader in arch/powerpc/platforms/cell/spufs/inode.c
holly_init_pci / holly_restart and holly_init_IRQ in arch/powerpc/platforms/embedded6xx/holly.c

Reviewed-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>

> Signed-off-by: Nathan Lynch <nathanl@linux.ibm.com>
> Fixes: 107c55005fbd ("powerpc/pseries: Add KVM guest doorbell restrictions")
> ---
>  arch/powerpc/kernel/firmware.c | 7 +++----
>  1 file changed, 3 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/firmware.c b/arch/powerpc/kernel/firmware.c
> index c7022c41cc31..20328f72f9f2 100644
> --- a/arch/powerpc/kernel/firmware.c
> +++ b/arch/powerpc/kernel/firmware.c
> @@ -31,11 +31,10 @@ int __init check_kvm_guest(void)
>  	if (!hyper_node)
>  		return 0;
> 
> -	if (!of_device_is_compatible(hyper_node, "linux,kvm"))
> -		return 0;
> -
> -	static_branch_enable(&kvm_guest);
> +	if (of_device_is_compatible(hyper_node, "linux,kvm"))
> +		static_branch_enable(&kvm_guest);
> 
> +	of_node_put(hyper_node);
>  	return 0;
>  }
>  core_initcall(check_kvm_guest); // before kvm_guest_init()
> -- 
> 2.31.1
> 

-- 
Thanks and Regards
Srikar Dronamraju

^ permalink raw reply

* Re: [PATCH kernel] powerps/pseries/dma: Add support for 2M IOMMU page size
From: Frederic Barrat @ 2021-09-29  7:00 UTC (permalink / raw)
  To: Alexey Kardashevskiy, linuxppc-dev
  Cc: Leonardo Augusto Guimaraes Garcia, Leonardo Bras, Travis Pizel,
	Brian J King, Murilo Vicentini
In-Reply-To: <20210928101521.3956331-1-aik@ozlabs.ru>



On 28/09/2021 12:15, Alexey Kardashevskiy wrote:
> The upcoming PAPR spec adds a 2M page size, bit 23 right after the 16G page
> size in the "ibm,query-pe-dma-window" call.
> 
> This adds support for the new page size. Since the new page size is out
> of sorted order, this changes the loop to not assume that shift[] is
> sorted.
> 
> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> ---


Looks ok to me
Reviewed-by: Frederic Barrat <fbarrat@linux.ibm.com>


> 
> This might not work if PHYP keeps rejecting new window requests for less
> than 32768 TCEs. This is needed:
> https://github.com/aik/linux/commit/8cc8fa5ba5b3b4a18efbc9d81d9e5b85ca7c8a95
> 
> 
> ---
>   arch/powerpc/platforms/pseries/iommu.c | 10 +++++-----
>   1 file changed, 5 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
> index c741689a5165..237bf405b0cb 100644
> --- a/arch/powerpc/platforms/pseries/iommu.c
> +++ b/arch/powerpc/platforms/pseries/iommu.c
> @@ -1159,14 +1159,15 @@ static void reset_dma_window(struct pci_dev *dev, struct device_node *par_dn)
>   /* Return largest page shift based on "IO Page Sizes" output of ibm,query-pe-dma-window. */
>   static int iommu_get_page_shift(u32 query_page_size)
>   {
> -	/* Supported IO page-sizes according to LoPAR */
> +	/* Supported IO page-sizes according to LoPAR, note that 2M is out of order */
>   	const int shift[] = {
>   		__builtin_ctzll(SZ_4K),   __builtin_ctzll(SZ_64K), __builtin_ctzll(SZ_16M),
>   		__builtin_ctzll(SZ_32M),  __builtin_ctzll(SZ_64M), __builtin_ctzll(SZ_128M),
> -		__builtin_ctzll(SZ_256M), __builtin_ctzll(SZ_16G)
> +		__builtin_ctzll(SZ_256M), __builtin_ctzll(SZ_16G), __builtin_ctzll(SZ_2M)
>   	};
>   
>   	int i = ARRAY_SIZE(shift) - 1;
> +	int ret = 0;
>   
>   	/*
>   	 * On LoPAR, ibm,query-pe-dma-window outputs "IO Page Sizes" using a bit field:
> @@ -1176,11 +1177,10 @@ static int iommu_get_page_shift(u32 query_page_size)
>   	 */
>   	for (; i >= 0 ; i--) {
>   		if (query_page_size & (1 << i))
> -			return shift[i];
> +			ret = max(ret, shift[i]);
>   	}
>   
> -	/* No valid page size found. */
> -	return 0;
> +	return ret;
>   }
>   
>   static struct property *ddw_property_create(const char *propname, u32 liobn, u64 dma_addr,
> 

^ permalink raw reply

* Re: [next-20210827][ppc][multipathd] INFO: task hung in dm_table_add_target
From: Christoph Hellwig @ 2021-09-29  6:52 UTC (permalink / raw)
  To: Abdul Haleem
  Cc: axboe, sachinp, jack, linux-scsi, linux-kernel, dm-devel,
	linux-next, dougmill, Brian King, linuxppc-dev, Christoph Hellwig
In-Reply-To: <86f8e47b-84ca-4cc3-5d5b-f5f11c1d4d1a@linux.vnet.ibm.com>

On Tue, Sep 28, 2021 at 03:53:47PM +0530, Abdul Haleem wrote:
>
> On 9/1/21 7:06 PM, Christoph Hellwig wrote:
>> On Wed, Sep 01, 2021 at 04:47:26PM +0530, Abdul Haleem wrote:
>>> Greeting's
>>>
>>> multiple task hung while adding the vfc disk back to the multipath on my
>>> powerpc box running linux-next kernel
>> Can you retry to reproduce this with lockdep enabled to see if there
>> is anything interesting holding this lock?
>
> LOCKDEP was earlier enabled by default
>
> # cat .config | grep LOCKDEP
> CONFIG_LOCKDEP_SUPPORT=y
>
> BTW, Recreated again on 5.15.0-rc2 mainline kernel and attaching the logs

It seems the reinstate is blocking on the close which is blocking on
flushing dirty data.  In other words it looks like the link blocking
looks like the symptom and not the cause.

^ permalink raw reply

* Re: [PATCH v2 4/4] powerpc/pseries/cpuhp: remove obsolete comment from pseries_cpu_die
From: Michael Ellerman @ 2021-09-29  5:45 UTC (permalink / raw)
  To: Daniel Henrique Barboza, Nathan Lynch, linuxppc-dev
  Cc: tyreld, ldufour, aneesh.kumar
In-Reply-To: <4479e869-1c98-4473-262c-3aeb37b8fca2@gmail.com>

Daniel Henrique Barboza <danielhb413@gmail.com> writes:
> On 9/27/21 17:19, Nathan Lynch wrote:
>> This comment likely refers to the obsolete DLPAR workflow where some
>> resource state transitions were driven more directly from user space
>> utilities, but it also seems to contradict itself: "Change isolate state to
>> Isolate [...]" is at odds with the preceding sentences, and it does not
>> relate at all to the code that follows.
>
> This comment was added by commit 413f7c405a34, a 2006 commit where Mike
> Ellerman moved code from platform/pseries/smp.c into hotplug-cpu.c.
>
> I checked the original code back in smp.c and this comment was added there
> by commit 1da177e4c3f41, which is Linus's initial git commit, where he mentions
> that he didn't bothered with full history (although it is available somewhere,
> allegedly).
>
> This is enough to say that we can't easily see the history behind this comment.
> I also believe that we're better of without it since it doesn't make sense
> with the current codebase.

It was added by the original CPU hotplug commit for ppc64::

https://github.com/mpe/linux-fullhistory/commit/0e9fd9441cd2113b67b14e739267c9e69761489b


The code was fairly similar:

void __cpu_die(unsigned int cpu)
{
	int tries;
	int cpu_status;
	unsigned int pcpu = get_hard_smp_processor_id(cpu);

	for (tries = 0; tries < 5; tries++) {
		cpu_status = query_cpu_stopped(pcpu);

		if (cpu_status == 0)
			break;
		set_current_state(TASK_UNINTERRUPTIBLE);
		schedule_timeout(HZ);
	}
	if (cpu_status != 0) {
		printk("Querying DEAD? cpu %i (%i) shows %i\n",
		       cpu, pcpu, cpu_status);
	}

	/* Isolation and deallocation are definatly done by
	 * drslot_chrp_cpu.  If they were not they would be
	 * done here.  Change isolate state to Isolate and
	 * change allocation-state to Unusable.
	 */
	paca[cpu].xProcStart = 0;

	/* So we can recognize if it fails to come up next time. */
	cpu_callin_map[cpu] = 0;
}


drslot_chrp_cpu() still exists in drmgr:

  https://github.com/ibm-power-utilities/powerpc-utils/blob/e798c4a09fbf0fa0f421e624cfa366a6c405c9fe/src/drmgr/drslot_chrp_cpu.c#L406


I agree the comment is no longer meaningful and can be removed.

It might be good to then add a comment explaining why we need to set
cpu_start = 0.

It's not immediately clear why we need to. When we bring a CPU back
online in smp_pSeries_kick_cpu() we ask RTAS to start it and then
immediately set cpu_start = 1, ie. there isn't a separate step that sets
cpu_start = 1 for hotplugged CPUs.

cheers

^ permalink raw reply

* [PATCH v2 2/2] powerpc:85xx: fix timebase sync issue when CONFIG_HOTPLUG_CPU=n
From: Xiaoming Ni @ 2021-09-29  3:36 UTC (permalink / raw)
  To: oss, mpe, benh, paulus, paul.gortmaker, chenhui.zhao,
	Yuantian.Tang, linuxppc-dev, linux-kernel, stable, gregkh
  Cc: liuwenliang, wangle6, nixiaoming, chenjianguo3
In-Reply-To: <20210929033646.39630-1-nixiaoming@huawei.com>

When CONFIG_SMP=y, timebase synchronization is required when the second
 kernel is started.
	arch/powerpc/kernel/smp.c:
	int __cpu_up(unsigned int cpu, struct task_struct *tidle)
	{
		...
		if (smp_ops->give_timebase)
			smp_ops->give_timebase();
		...
	}

	void start_secondary(void *unused)
	{
		...
		if (smp_ops->take_timebase)
			smp_ops->take_timebase();
		...
	}

When CONFIG_HOTPLUG_CPU=n and CONFIG_KEXEC_CORE=n,
 smp_85xx_ops.give_timebase is NULL,
 smp_85xx_ops.take_timebase is NULL,
As a result, the timebase is not synchronized.

Timebase  synchronization does not depend on CONFIG_HOTPLUG_CPU.

Fixes: 56f1ba280719 ("powerpc/mpc85xx: refactor the PM operations")
Cc: stable@vger.kernel.org #v4.6
Signed-off-by: Xiaoming Ni <nixiaoming@huawei.com>
---
 arch/powerpc/platforms/85xx/Makefile         |  4 +++-
 arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c |  4 ++++
 arch/powerpc/platforms/85xx/smp.c            | 12 ++++++------
 3 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/platforms/85xx/Makefile b/arch/powerpc/platforms/85xx/Makefile
index 60e4e97a929d..260fbad7967b 100644
--- a/arch/powerpc/platforms/85xx/Makefile
+++ b/arch/powerpc/platforms/85xx/Makefile
@@ -3,7 +3,9 @@
 # Makefile for the PowerPC 85xx linux kernel.
 #
 obj-$(CONFIG_SMP) += smp.o
-obj-$(CONFIG_FSL_PMC)		  += mpc85xx_pm_ops.o
+ifneq ($(CONFIG_FSL_CORENET_RCPM),y)
+obj-$(CONFIG_SMP) += mpc85xx_pm_ops.o
+endif
 
 obj-y += common.o
 
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c b/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c
index ffa8a7a6a2db..4a8af80011a6 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c
@@ -17,6 +17,7 @@
 
 static struct ccsr_guts __iomem *guts;
 
+#ifdef CONFIG_FSL_PMC
 static void mpc85xx_irq_mask(int cpu)
 {
 
@@ -49,6 +50,7 @@ static void mpc85xx_cpu_up_prepare(int cpu)
 {
 
 }
+#endif
 
 static void mpc85xx_freeze_time_base(bool freeze)
 {
@@ -76,10 +78,12 @@ static const struct of_device_id mpc85xx_smp_guts_ids[] = {
 
 static const struct fsl_pm_ops mpc85xx_pm_ops = {
 	.freeze_time_base = mpc85xx_freeze_time_base,
+#ifdef CONFIG_FSL_PMC
 	.irq_mask = mpc85xx_irq_mask,
 	.irq_unmask = mpc85xx_irq_unmask,
 	.cpu_die = mpc85xx_cpu_die,
 	.cpu_up_prepare = mpc85xx_cpu_up_prepare,
+#endif
 };
 
 int __init mpc85xx_setup_pmc(void)
diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c
index c6df294054fe..83f4a6389a28 100644
--- a/arch/powerpc/platforms/85xx/smp.c
+++ b/arch/powerpc/platforms/85xx/smp.c
@@ -40,7 +40,6 @@ struct epapr_spin_table {
 	u32	pir;
 };
 
-#ifdef CONFIG_HOTPLUG_CPU
 static u64 timebase;
 static int tb_req;
 static int tb_valid;
@@ -112,6 +111,7 @@ static void mpc85xx_take_timebase(void)
 	local_irq_restore(flags);
 }
 
+#ifdef CONFIG_HOTPLUG_CPU
 static void smp_85xx_cpu_offline_self(void)
 {
 	unsigned int cpu = smp_processor_id();
@@ -495,21 +495,21 @@ void __init mpc85xx_smp_init(void)
 		smp_85xx_ops.probe = NULL;
 	}
 
-#ifdef CONFIG_HOTPLUG_CPU
 #ifdef CONFIG_FSL_CORENET_RCPM
+	/* Assign a value to qoriq_pm_ops on PPC_E500MC */
 	fsl_rcpm_init();
-#endif
-
-#ifdef CONFIG_FSL_PMC
+#else
+	/* Assign a value to qoriq_pm_ops on !PPC_E500MC */
 	mpc85xx_setup_pmc();
 #endif
 	if (qoriq_pm_ops) {
 		smp_85xx_ops.give_timebase = mpc85xx_give_timebase;
 		smp_85xx_ops.take_timebase = mpc85xx_take_timebase;
+#ifdef CONFIG_HOTPLUG_CPU
 		smp_85xx_ops.cpu_offline_self = smp_85xx_cpu_offline_self;
 		smp_85xx_ops.cpu_die = qoriq_cpu_kill;
-	}
 #endif
+	}
 	smp_ops = &smp_85xx_ops;
 
 #ifdef CONFIG_KEXEC_CORE
-- 
2.27.0


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox