* [PATCH v2 1/8] bpf powerpc: Remove unused SEEN_STACK
From: Hari Bathini @ 2021-09-17 15:30 UTC (permalink / raw)
To: naveen.n.rao, mpe, ast, daniel
Cc: Ravi Bangoria, songliubraving, netdev, john.fastabend, andrii,
kpsingh, paulus, yhs, bpf, linuxppc-dev, kafai
In-Reply-To: <20210917153047.177141-1-hbathini@linux.ibm.com>
From: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
SEEN_STACK is unused on PowerPC. Remove it. Also, have
SEEN_TAILCALL use 0x40000000.
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
---
* No changes in v2.
arch/powerpc/net/bpf_jit.h | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index 99fad093f43e..d6267e93027a 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -116,8 +116,7 @@ static inline bool is_nearbranch(int offset)
#define COND_LE (CR0_GT | COND_CMP_FALSE)
#define SEEN_FUNC 0x20000000 /* might call external helpers */
-#define SEEN_STACK 0x40000000 /* uses BPF stack */
-#define SEEN_TAILCALL 0x80000000 /* uses tail calls */
+#define SEEN_TAILCALL 0x40000000 /* uses tail calls */
#define SEEN_VREG_MASK 0x1ff80000 /* Volatile registers r3-r12 */
#define SEEN_NVREG_MASK 0x0003ffff /* Non volatile registers r14-r31 */
--
2.31.1
^ permalink raw reply related
* [PATCH v2 2/8] bpf powerpc: Remove extra_pass from bpf_jit_build_body()
From: Hari Bathini @ 2021-09-17 15:30 UTC (permalink / raw)
To: naveen.n.rao, mpe, ast, daniel
Cc: Ravi Bangoria, songliubraving, netdev, john.fastabend, andrii,
kpsingh, paulus, yhs, bpf, linuxppc-dev, kafai
In-Reply-To: <20210917153047.177141-1-hbathini@linux.ibm.com>
From: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
In case of extra_pass, usual JIT passes are always skipped. So,
extra_pass is always false while calling bpf_jit_build_body() and
thus it can be removed.
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
---
Changes in v2:
* Updated the changelog wording a bit.
arch/powerpc/net/bpf_jit.h | 2 +-
arch/powerpc/net/bpf_jit_comp.c | 6 +++---
arch/powerpc/net/bpf_jit_comp32.c | 4 ++--
arch/powerpc/net/bpf_jit_comp64.c | 4 ++--
4 files changed, 8 insertions(+), 8 deletions(-)
diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index d6267e93027a..411c63d945c7 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -166,7 +166,7 @@ static inline void bpf_clear_seen_register(struct codegen_context *ctx, int i)
void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func);
int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
- u32 *addrs, bool extra_pass);
+ u32 *addrs);
void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx);
void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx);
void bpf_jit_realloc_regs(struct codegen_context *ctx);
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 53aefee3fe70..c5c9e8ad1de7 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -149,7 +149,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
cgctx.stack_size = round_up(fp->aux->stack_depth, 16);
/* Scouting faux-generate pass 0 */
- if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) {
+ if (bpf_jit_build_body(fp, 0, &cgctx, addrs)) {
/* We hit something illegal or unsupported. */
fp = org_fp;
goto out_addrs;
@@ -162,7 +162,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
*/
if (cgctx.seen & SEEN_TAILCALL) {
cgctx.idx = 0;
- if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) {
+ if (bpf_jit_build_body(fp, 0, &cgctx, addrs)) {
fp = org_fp;
goto out_addrs;
}
@@ -210,7 +210,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
/* Now build the prologue, body code & epilogue for real. */
cgctx.idx = 0;
bpf_jit_build_prologue(code_base, &cgctx);
- bpf_jit_build_body(fp, code_base, &cgctx, addrs, extra_pass);
+ bpf_jit_build_body(fp, code_base, &cgctx, addrs);
bpf_jit_build_epilogue(code_base, &cgctx);
if (bpf_jit_enable > 1)
diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
index beb12cbc8c29..b60b59426a24 100644
--- a/arch/powerpc/net/bpf_jit_comp32.c
+++ b/arch/powerpc/net/bpf_jit_comp32.c
@@ -266,7 +266,7 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32
/* Assemble the body code between the prologue & epilogue */
int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
- u32 *addrs, bool extra_pass)
+ u32 *addrs)
{
const struct bpf_insn *insn = fp->insnsi;
int flen = fp->len;
@@ -860,7 +860,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
case BPF_JMP | BPF_CALL:
ctx->seen |= SEEN_FUNC;
- ret = bpf_jit_get_func_addr(fp, &insn[i], extra_pass,
+ ret = bpf_jit_get_func_addr(fp, &insn[i], false,
&func_addr, &func_addr_fixed);
if (ret < 0)
return ret;
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index b87a63dba9c8..2a87da50d9a4 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -272,7 +272,7 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32
/* Assemble the body code between the prologue & epilogue */
int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
- u32 *addrs, bool extra_pass)
+ u32 *addrs)
{
const struct bpf_insn *insn = fp->insnsi;
int flen = fp->len;
@@ -769,7 +769,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
case BPF_JMP | BPF_CALL:
ctx->seen |= SEEN_FUNC;
- ret = bpf_jit_get_func_addr(fp, &insn[i], extra_pass,
+ ret = bpf_jit_get_func_addr(fp, &insn[i], false,
&func_addr, &func_addr_fixed);
if (ret < 0)
return ret;
--
2.31.1
^ permalink raw reply related
* [PATCH v2 4/8] powerpc/ppc-opcode: introduce PPC_RAW_BRANCH() macro
From: Hari Bathini @ 2021-09-17 15:30 UTC (permalink / raw)
To: naveen.n.rao, mpe, ast, daniel
Cc: songliubraving, netdev, john.fastabend, andrii, kpsingh, paulus,
yhs, bpf, linuxppc-dev, kafai, Hari Bathini
In-Reply-To: <20210917153047.177141-1-hbathini@linux.ibm.com>
Define and use PPC_RAW_BRANCH() macro instead of open coding it. This
macro is used while adding BPF_PROBE_MEM support.
Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
---
Changes in v2:
* New patch to introduce PPC_RAW_BRANCH() macro.
arch/powerpc/include/asm/ppc-opcode.h | 2 ++
arch/powerpc/net/bpf_jit.h | 4 ++--
2 files changed, 4 insertions(+), 2 deletions(-)
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index baea657bc868..f50213e2a3e0 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -566,6 +566,8 @@
#define PPC_RAW_MTSPR(spr, d) (0x7c0003a6 | ___PPC_RS(d) | __PPC_SPR(spr))
#define PPC_RAW_EIEIO() (0x7c0006ac)
+#define PPC_RAW_BRANCH(addr) (PPC_INST_BRANCH | ((addr) & 0x03fffffc))
+
/* Deal with instructions that older assemblers aren't aware of */
#define PPC_BCCTR_FLUSH stringify_in_c(.long PPC_INST_BCCTR_FLUSH)
#define PPC_CP_ABORT stringify_in_c(.long PPC_RAW_CP_ABORT)
diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index 411c63d945c7..0c8f885b8f48 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -24,8 +24,8 @@
#define EMIT(instr) PLANT_INSTR(image, ctx->idx, instr)
/* Long jump; (unconditional 'branch') */
-#define PPC_JMP(dest) EMIT(PPC_INST_BRANCH | \
- (((dest) - (ctx->idx * 4)) & 0x03fffffc))
+#define PPC_JMP(dest) EMIT(PPC_RAW_BRANCH((dest) - (ctx->idx * 4)))
+
/* blr; (unconditional 'branch' with link) to absolute address */
#define PPC_BL_ABS(dest) EMIT(PPC_INST_BL | \
(((dest) - (unsigned long)(image + ctx->idx)) & 0x03fffffc))
--
2.31.1
^ permalink raw reply related
* [PATCH v2 5/8] bpf ppc64: Add BPF_PROBE_MEM support for JIT
From: Hari Bathini @ 2021-09-17 15:30 UTC (permalink / raw)
To: naveen.n.rao, mpe, ast, daniel
Cc: Ravi Bangoria, songliubraving, netdev, john.fastabend, andrii,
kpsingh, paulus, yhs, bpf, linuxppc-dev, kafai, Hari Bathini
In-Reply-To: <20210917153047.177141-1-hbathini@linux.ibm.com>
From: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
BPF load instruction with BPF_PROBE_MEM mode can cause a fault
inside kernel. Append exception table for such instructions
within BPF program.
Unlike other archs which uses extable 'fixup' field to pass dest_reg
and nip, BPF exception table on PowerPC follows the generic PowerPC
exception table design, where it populates both fixup and extable
sections within BPF program. fixup section contains two instructions,
first instruction clears dest_reg and 2nd jumps to next instruction
in the BPF code. extable 'insn' field contains relative offset of
the instruction and 'fixup' field contains relative offset of the
fixup entry. Example layout of BPF program with extable present:
+------------------+
| |
| |
0x4020 -->| ld r27,4(r3) |
| |
| |
0x40ac -->| lwz r3,0(r4) |
| |
| |
|------------------|
0x4280 -->| li r27,0 | \ fixup entry
| b 0x4024 | /
0x4288 -->| li r3,0 |
| b 0x40b0 |
|------------------|
0x4290 -->| insn=0xfffffd90 | \ extable entry
| fixup=0xffffffec | /
0x4298 -->| insn=0xfffffe14 |
| fixup=0xffffffec |
+------------------+
(Addresses shown here are chosen random, not real)
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
---
Changes in v2:
* Used JITing code after refactoring.
* Replaced 'xor reg,reg,reg' with 'li reg,0' where appropriate.
* Avoided unnecessary init during declaration.
arch/powerpc/net/bpf_jit.h | 5 ++-
arch/powerpc/net/bpf_jit_comp.c | 25 ++++++++++----
arch/powerpc/net/bpf_jit_comp32.c | 2 +-
arch/powerpc/net/bpf_jit_comp64.c | 57 ++++++++++++++++++++++++++++++-
4 files changed, 80 insertions(+), 9 deletions(-)
diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index 0c8f885b8f48..6357c71c26eb 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -141,8 +141,11 @@ struct codegen_context {
unsigned int idx;
unsigned int stack_size;
int b2p[ARRAY_SIZE(b2p)];
+ unsigned int exentry_idx;
};
+#define BPF_FIXUP_LEN 8 /* Two instructions */
+
static inline void bpf_flush_icache(void *start, void *end)
{
smp_wmb(); /* smp write barrier */
@@ -166,7 +169,7 @@ static inline void bpf_clear_seen_register(struct codegen_context *ctx, int i)
void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func);
int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
- u32 *addrs);
+ u32 *addrs, int pass);
void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx);
void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx);
void bpf_jit_realloc_regs(struct codegen_context *ctx);
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index c5c9e8ad1de7..e92bd79d3bac 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -101,6 +101,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
struct bpf_prog *tmp_fp;
bool bpf_blinded = false;
bool extra_pass = false;
+ u32 extable_len;
+ u32 fixup_len;
if (!fp->jit_requested)
return org_fp;
@@ -131,7 +133,6 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
image = jit_data->image;
bpf_hdr = jit_data->header;
proglen = jit_data->proglen;
- alloclen = proglen + FUNCTION_DESCR_SIZE;
extra_pass = true;
goto skip_init_ctx;
}
@@ -149,7 +150,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
cgctx.stack_size = round_up(fp->aux->stack_depth, 16);
/* Scouting faux-generate pass 0 */
- if (bpf_jit_build_body(fp, 0, &cgctx, addrs)) {
+ if (bpf_jit_build_body(fp, 0, &cgctx, addrs, 0)) {
/* We hit something illegal or unsupported. */
fp = org_fp;
goto out_addrs;
@@ -162,7 +163,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
*/
if (cgctx.seen & SEEN_TAILCALL) {
cgctx.idx = 0;
- if (bpf_jit_build_body(fp, 0, &cgctx, addrs)) {
+ if (bpf_jit_build_body(fp, 0, &cgctx, addrs, 0)) {
fp = org_fp;
goto out_addrs;
}
@@ -177,8 +178,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
bpf_jit_build_prologue(0, &cgctx);
bpf_jit_build_epilogue(0, &cgctx);
+ fixup_len = fp->aux->num_exentries * BPF_FIXUP_LEN;
+ extable_len = fp->aux->num_exentries * sizeof(struct exception_table_entry);
+
proglen = cgctx.idx * 4;
- alloclen = proglen + FUNCTION_DESCR_SIZE;
+ alloclen = proglen + FUNCTION_DESCR_SIZE + fixup_len + extable_len;
bpf_hdr = bpf_jit_binary_alloc(alloclen, &image, 4, bpf_jit_fill_ill_insns);
if (!bpf_hdr) {
@@ -186,6 +190,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
goto out_addrs;
}
+ if (extable_len) {
+ fp->aux->extable = (void *)image + FUNCTION_DESCR_SIZE +
+ proglen + fixup_len;
+ }
+
skip_init_ctx:
code_base = (u32 *)(image + FUNCTION_DESCR_SIZE);
@@ -210,7 +219,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
/* Now build the prologue, body code & epilogue for real. */
cgctx.idx = 0;
bpf_jit_build_prologue(code_base, &cgctx);
- bpf_jit_build_body(fp, code_base, &cgctx, addrs);
+ if (bpf_jit_build_body(fp, code_base, &cgctx, addrs, pass)) {
+ bpf_jit_binary_free(bpf_hdr);
+ fp = org_fp;
+ goto out_addrs;
+ }
bpf_jit_build_epilogue(code_base, &cgctx);
if (bpf_jit_enable > 1)
@@ -234,7 +247,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
fp->bpf_func = (void *)image;
fp->jited = 1;
- fp->jited_len = alloclen;
+ fp->jited_len = proglen + FUNCTION_DESCR_SIZE;
bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE));
bpf_jit_binary_lock_ro(bpf_hdr);
diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
index c8ae14c316e3..94641b7be387 100644
--- a/arch/powerpc/net/bpf_jit_comp32.c
+++ b/arch/powerpc/net/bpf_jit_comp32.c
@@ -266,7 +266,7 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32
/* Assemble the body code between the prologue & epilogue */
int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
- u32 *addrs)
+ u32 *addrs, int pass)
{
const struct bpf_insn *insn = fp->insnsi;
int flen = fp->len;
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 78b28f25555c..2fc10995f243 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -270,9 +270,54 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32
/* out: */
}
+/*
+ * The caller should check for (BPF_MODE(code) == BPF_PROBE_MEM) before calling
+ * this function, as this only applies to BPF_PROBE_MEM, for now.
+ */
+static int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass,
+ struct codegen_context *ctx, int dst_reg)
+{
+ off_t offset;
+ unsigned long pc;
+ struct exception_table_entry *ex;
+ u32 *fixup;
+
+ /* Populate extable entries only in the last pass */
+ if (pass != 2)
+ return 0;
+
+ if (!fp->aux->extable ||
+ WARN_ON_ONCE(ctx->exentry_idx >= fp->aux->num_exentries))
+ return -EINVAL;
+
+ pc = (unsigned long)&image[ctx->idx - 1];
+
+ fixup = (void *)fp->aux->extable -
+ (fp->aux->num_exentries * BPF_FIXUP_LEN) +
+ (ctx->exentry_idx * BPF_FIXUP_LEN);
+
+ fixup[0] = PPC_RAW_LI(dst_reg, 0);
+ fixup[1] = PPC_RAW_BRANCH((long)(pc + 4) - (long)&fixup[1]);
+
+ ex = &fp->aux->extable[ctx->exentry_idx];
+
+ offset = pc - (long)&ex->insn;
+ if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN))
+ return -ERANGE;
+ ex->insn = offset;
+
+ offset = (long)fixup - (long)&ex->fixup;
+ if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN))
+ return -ERANGE;
+ ex->fixup = offset;
+
+ ctx->exentry_idx++;
+ return 0;
+}
+
/* Assemble the body code between the prologue & epilogue */
int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
- u32 *addrs)
+ u32 *addrs, int pass)
{
const struct bpf_insn *insn = fp->insnsi;
int flen = fp->len;
@@ -714,12 +759,16 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
*/
/* dst = *(u8 *)(ul) (src + off) */
case BPF_LDX | BPF_MEM | BPF_B:
+ case BPF_LDX | BPF_PROBE_MEM | BPF_B:
/* dst = *(u16 *)(ul) (src + off) */
case BPF_LDX | BPF_MEM | BPF_H:
+ case BPF_LDX | BPF_PROBE_MEM | BPF_H:
/* dst = *(u32 *)(ul) (src + off) */
case BPF_LDX | BPF_MEM | BPF_W:
+ case BPF_LDX | BPF_PROBE_MEM | BPF_W:
/* dst = *(u64 *)(ul) (src + off) */
case BPF_LDX | BPF_MEM | BPF_DW:
+ case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
switch (size) {
case BPF_B:
EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off));
@@ -737,6 +786,12 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
if ((size != BPF_DW) && insn_is_zext(&insn[i + 1]))
addrs[++i] = ctx->idx * 4;
+
+ if (BPF_MODE(code) == BPF_PROBE_MEM) {
+ ret = bpf_add_extable_entry(fp, image, pass, ctx, dst_reg);
+ if (ret)
+ return ret;
+ }
break;
/*
--
2.31.1
^ permalink raw reply related
* [PATCH v2 6/8] bpf ppc64: Add addr > TASK_SIZE_MAX explicit check
From: Hari Bathini @ 2021-09-17 15:30 UTC (permalink / raw)
To: naveen.n.rao, mpe, ast, daniel
Cc: Ravi Bangoria, songliubraving, netdev, john.fastabend, andrii,
kpsingh, paulus, yhs, bpf, linuxppc-dev, kafai, Hari Bathini
In-Reply-To: <20210917153047.177141-1-hbathini@linux.ibm.com>
From: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
On PPC64 with KUAP enabled, any kernel code which wants to
access userspace needs to be surrounded by disable-enable KUAP.
But that is not happening for BPF_PROBE_MEM load instruction.
So, when BPF program tries to access invalid userspace address,
page-fault handler considers it as bad KUAP fault:
Kernel attempted to read user page (d0000000) - exploit attempt? (uid: 0)
Considering the fact that PTR_TO_BTF_ID (which uses BPF_PROBE_MEM
mode) could either be a valid kernel pointer or NULL but should
never be a pointer to userspace address, execute BPF_PROBE_MEM load
only if addr > TASK_SIZE_MAX, otherwise set dst_reg=0 and move on.
This will catch NULL, valid or invalid userspace pointers. Only bad
kernel pointer will be handled by BPF exception table.
[Alexei suggested for x86]
Suggested-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
---
Changes in v2:
* Refactored the code based on Christophe's comments.
arch/powerpc/net/bpf_jit_comp64.c | 23 +++++++++++++++++++++++
1 file changed, 23 insertions(+)
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 2fc10995f243..eb28dbc67151 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -769,6 +769,29 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
/* dst = *(u64 *)(ul) (src + off) */
case BPF_LDX | BPF_MEM | BPF_DW:
case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
+ /*
+ * As PTR_TO_BTF_ID that uses BPF_PROBE_MEM mode could either be a valid
+ * kernel pointer or NULL but not a userspace address, execute BPF_PROBE_MEM
+ * load only if addr > TASK_SIZE_MAX, otherwise set dst_reg=0 and move on.
+ */
+ if (BPF_MODE(code) == BPF_PROBE_MEM) {
+ unsigned int adjusted_idx;
+
+ /*
+ * Check if 'off' is word aligned because PPC_BPF_LL()
+ * (BPF_DW case) generates two instructions if 'off' is not
+ * word-aligned and one instruction otherwise.
+ */
+ adjusted_idx = ((BPF_SIZE(code) == BPF_DW) && (off & 3)) ? 1 : 0;
+
+ EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], src_reg, off));
+ PPC_LI64(b2p[TMP_REG_2], TASK_SIZE_MAX);
+ EMIT(PPC_RAW_CMPLD(b2p[TMP_REG_1], b2p[TMP_REG_2]));
+ PPC_BCC(COND_GT, (ctx->idx + 4) * 4);
+ EMIT(PPC_RAW_LI(dst_reg, 0));
+ PPC_JMP((ctx->idx + 2 + adjusted_idx) * 4);
+ }
+
switch (size) {
case BPF_B:
EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off));
--
2.31.1
^ permalink raw reply related
* [PATCH v2 3/8] bpf powerpc: refactor JIT compiler code
From: Hari Bathini @ 2021-09-17 15:30 UTC (permalink / raw)
To: naveen.n.rao, mpe, ast, daniel
Cc: songliubraving, netdev, john.fastabend, andrii, kpsingh, paulus,
yhs, bpf, linuxppc-dev, kafai, Hari Bathini
In-Reply-To: <20210917153047.177141-1-hbathini@linux.ibm.com>
Refactor powerpc JITing. This simplifies adding BPF_PROBE_MEM support.
Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
---
Changes in v2:
* New patch to refactor a bit of JITing code.
arch/powerpc/net/bpf_jit_comp32.c | 50 +++++++++++---------
arch/powerpc/net/bpf_jit_comp64.c | 76 ++++++++++++++++---------------
2 files changed, 68 insertions(+), 58 deletions(-)
diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
index b60b59426a24..c8ae14c316e3 100644
--- a/arch/powerpc/net/bpf_jit_comp32.c
+++ b/arch/powerpc/net/bpf_jit_comp32.c
@@ -276,17 +276,17 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
u32 exit_addr = addrs[flen];
for (i = 0; i < flen; i++) {
- u32 code = insn[i].code;
u32 dst_reg = bpf_to_ppc(ctx, insn[i].dst_reg);
- u32 dst_reg_h = dst_reg - 1;
u32 src_reg = bpf_to_ppc(ctx, insn[i].src_reg);
- u32 src_reg_h = src_reg - 1;
u32 tmp_reg = bpf_to_ppc(ctx, TMP_REG);
+ u32 true_cond, code = insn[i].code;
+ u32 dst_reg_h = dst_reg - 1;
+ u32 src_reg_h = src_reg - 1;
+ u32 size = BPF_SIZE(code);
s16 off = insn[i].off;
s32 imm = insn[i].imm;
bool func_addr_fixed;
u64 func_addr;
- u32 true_cond;
/*
* addrs[] maps a BPF bytecode address into a real offset from
@@ -809,25 +809,33 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
/*
* BPF_LDX
*/
- case BPF_LDX | BPF_MEM | BPF_B: /* dst = *(u8 *)(ul) (src + off) */
- EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off));
- if (!fp->aux->verifier_zext)
- EMIT(PPC_RAW_LI(dst_reg_h, 0));
- break;
- case BPF_LDX | BPF_MEM | BPF_H: /* dst = *(u16 *)(ul) (src + off) */
- EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off));
- if (!fp->aux->verifier_zext)
- EMIT(PPC_RAW_LI(dst_reg_h, 0));
- break;
- case BPF_LDX | BPF_MEM | BPF_W: /* dst = *(u32 *)(ul) (src + off) */
- EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off));
- if (!fp->aux->verifier_zext)
+ /* dst = *(u8 *)(ul) (src + off) */
+ case BPF_LDX | BPF_MEM | BPF_B:
+ /* dst = *(u16 *)(ul) (src + off) */
+ case BPF_LDX | BPF_MEM | BPF_H:
+ /* dst = *(u32 *)(ul) (src + off) */
+ case BPF_LDX | BPF_MEM | BPF_W:
+ /* dst = *(u64 *)(ul) (src + off) */
+ case BPF_LDX | BPF_MEM | BPF_DW:
+ switch (size) {
+ case BPF_B:
+ EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off));
+ break;
+ case BPF_H:
+ EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off));
+ break;
+ case BPF_W:
+ EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off));
+ break;
+ case BPF_DW:
+ EMIT(PPC_RAW_LWZ(dst_reg_h, src_reg, off));
+ EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off + 4));
+ break;
+ }
+
+ if ((size != BPF_DW) && !fp->aux->verifier_zext)
EMIT(PPC_RAW_LI(dst_reg_h, 0));
break;
- case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + off) */
- EMIT(PPC_RAW_LWZ(dst_reg_h, src_reg, off));
- EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off + 4));
- break;
/*
* Doubleword load
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 2a87da50d9a4..78b28f25555c 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -282,16 +282,15 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
u32 exit_addr = addrs[flen];
for (i = 0; i < flen; i++) {
- u32 code = insn[i].code;
u32 dst_reg = b2p[insn[i].dst_reg];
u32 src_reg = b2p[insn[i].src_reg];
+ u32 tmp_idx, code = insn[i].code;
+ u32 size = BPF_SIZE(code);
s16 off = insn[i].off;
s32 imm = insn[i].imm;
bool func_addr_fixed;
- u64 func_addr;
- u64 imm64;
+ u64 func_addr, imm64;
u32 true_cond;
- u32 tmp_idx;
/*
* addrs[] maps a BPF bytecode address into a real offset from
@@ -638,35 +637,34 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
*/
case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src */
case BPF_ST | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = imm */
- if (BPF_CLASS(code) == BPF_ST) {
- EMIT(PPC_RAW_LI(b2p[TMP_REG_1], imm));
- src_reg = b2p[TMP_REG_1];
- }
- EMIT(PPC_RAW_STB(src_reg, dst_reg, off));
- break;
case BPF_STX | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = src */
case BPF_ST | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = imm */
- if (BPF_CLASS(code) == BPF_ST) {
- EMIT(PPC_RAW_LI(b2p[TMP_REG_1], imm));
- src_reg = b2p[TMP_REG_1];
- }
- EMIT(PPC_RAW_STH(src_reg, dst_reg, off));
- break;
case BPF_STX | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = src */
case BPF_ST | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = imm */
- if (BPF_CLASS(code) == BPF_ST) {
- PPC_LI32(b2p[TMP_REG_1], imm);
- src_reg = b2p[TMP_REG_1];
- }
- EMIT(PPC_RAW_STW(src_reg, dst_reg, off));
- break;
case BPF_STX | BPF_MEM | BPF_DW: /* (u64 *)(dst + off) = src */
case BPF_ST | BPF_MEM | BPF_DW: /* *(u64 *)(dst + off) = imm */
if (BPF_CLASS(code) == BPF_ST) {
- PPC_LI32(b2p[TMP_REG_1], imm);
+ if ((size == BPF_B) || (size == BPF_H))
+ EMIT(PPC_RAW_LI(b2p[TMP_REG_1], imm));
+ else /* size == BPF_W || size == BPF_DW */
+ PPC_LI32(b2p[TMP_REG_1], imm);
src_reg = b2p[TMP_REG_1];
}
- PPC_BPF_STL(src_reg, dst_reg, off);
+
+ switch (size) {
+ case BPF_B:
+ EMIT(PPC_RAW_STB(src_reg, dst_reg, off));
+ break;
+ case BPF_H:
+ EMIT(PPC_RAW_STH(src_reg, dst_reg, off));
+ break;
+ case BPF_W:
+ EMIT(PPC_RAW_STW(src_reg, dst_reg, off));
+ break;
+ case BPF_DW:
+ PPC_BPF_STL(src_reg, dst_reg, off);
+ break;
+ }
break;
/*
@@ -716,25 +714,29 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
*/
/* dst = *(u8 *)(ul) (src + off) */
case BPF_LDX | BPF_MEM | BPF_B:
- EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off));
- if (insn_is_zext(&insn[i + 1]))
- addrs[++i] = ctx->idx * 4;
- break;
/* dst = *(u16 *)(ul) (src + off) */
case BPF_LDX | BPF_MEM | BPF_H:
- EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off));
- if (insn_is_zext(&insn[i + 1]))
- addrs[++i] = ctx->idx * 4;
- break;
/* dst = *(u32 *)(ul) (src + off) */
case BPF_LDX | BPF_MEM | BPF_W:
- EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off));
- if (insn_is_zext(&insn[i + 1]))
- addrs[++i] = ctx->idx * 4;
- break;
/* dst = *(u64 *)(ul) (src + off) */
case BPF_LDX | BPF_MEM | BPF_DW:
- PPC_BPF_LL(dst_reg, src_reg, off);
+ switch (size) {
+ case BPF_B:
+ EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off));
+ break;
+ case BPF_H:
+ EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off));
+ break;
+ case BPF_W:
+ EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off));
+ break;
+ case BPF_DW:
+ PPC_BPF_LL(dst_reg, src_reg, off);
+ break;
+ }
+
+ if ((size != BPF_DW) && insn_is_zext(&insn[i + 1]))
+ addrs[++i] = ctx->idx * 4;
break;
/*
--
2.31.1
^ permalink raw reply related
* [PATCH v2 7/8] bpf ppc32: Add BPF_PROBE_MEM support for JIT
From: Hari Bathini @ 2021-09-17 15:30 UTC (permalink / raw)
To: naveen.n.rao, mpe, ast, daniel
Cc: songliubraving, netdev, john.fastabend, andrii, kpsingh, paulus,
yhs, bpf, linuxppc-dev, kafai, Hari Bathini
In-Reply-To: <20210917153047.177141-1-hbathini@linux.ibm.com>
BPF load instruction with BPF_PROBE_MEM mode can cause a fault
inside kernel. Append exception table for such instructions
within BPF program.
Unlike other archs which uses extable 'fixup' field to pass dest_reg
and nip, BPF exception table on PowerPC follows the generic PowerPC
exception table design, where it populates both fixup and extable
sections within BPF program. fixup section contains 3 instructions,
first 2 instructions clear dest_reg (lower & higher 32-bit registers)
and last instruction jumps to next instruction in the BPF code.
extable 'insn' field contains relative offset of the instruction and
'fixup' field contains relative offset of the fixup entry. Example
layout of BPF program with extable present:
+------------------+
| |
| |
0x4020 -->| lwz r28,4(r4) |
| |
| |
0x40ac -->| lwz r3,0(r24) |
| lwz r4,4(r24) |
| |
| |
|------------------|
0x4278 -->| li r28,0 | \
| li r27,0 | | fixup entry
| b 0x4024 | /
0x4284 -->| li r4,0 |
| li r3,0 |
| b 0x40b4 |
|------------------|
0x4290 -->| insn=0xfffffd90 | \ extable entry
| fixup=0xffffffe4 | /
0x4298 -->| insn=0xfffffe14 |
| fixup=0xffffffe8 |
+------------------+
(Addresses shown here are chosen random, not real)
Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
---
Changes in v2:
* New patch to add BPF_PROBE_MEM support for PPC32.
arch/powerpc/net/bpf_jit.h | 7 +++++
arch/powerpc/net/bpf_jit_comp.c | 50 +++++++++++++++++++++++++++++++
arch/powerpc/net/bpf_jit_comp32.c | 30 +++++++++++++++++++
arch/powerpc/net/bpf_jit_comp64.c | 48 ++---------------------------
4 files changed, 89 insertions(+), 46 deletions(-)
diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index 6357c71c26eb..6a591ef88006 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -144,7 +144,11 @@ struct codegen_context {
unsigned int exentry_idx;
};
+#ifdef CONFIG_PPC32
+#define BPF_FIXUP_LEN 12 /* Three instructions */
+#else
#define BPF_FIXUP_LEN 8 /* Two instructions */
+#endif
static inline void bpf_flush_icache(void *start, void *end)
{
@@ -174,6 +178,9 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx);
void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx);
void bpf_jit_realloc_regs(struct codegen_context *ctx);
+int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct codegen_context *ctx,
+ int insn_idx, int jmp_off, int dst_reg);
+
#endif
#endif
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index e92bd79d3bac..a1753b8c78c8 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -271,3 +271,53 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
return fp;
}
+
+/*
+ * The caller should check for (BPF_MODE(code) == BPF_PROBE_MEM) before calling
+ * this function, as this only applies to BPF_PROBE_MEM, for now.
+ */
+int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct codegen_context *ctx,
+ int insn_idx, int jmp_off, int dst_reg)
+{
+ off_t offset;
+ unsigned long pc;
+ struct exception_table_entry *ex;
+ u32 *fixup;
+
+ /* Populate extable entries only in the last pass */
+ if (pass != 2)
+ return 0;
+
+ if (!fp->aux->extable ||
+ WARN_ON_ONCE(ctx->exentry_idx >= fp->aux->num_exentries))
+ return -EINVAL;
+
+ pc = (unsigned long)&image[insn_idx];
+
+ fixup = (void *)fp->aux->extable -
+ (fp->aux->num_exentries * BPF_FIXUP_LEN) +
+ (ctx->exentry_idx * BPF_FIXUP_LEN);
+
+ fixup[0] = PPC_RAW_LI(dst_reg, 0);
+#ifdef CONFIG_PPC32
+ fixup[1] = PPC_RAW_LI(dst_reg - 1, 0); /* clear higher 32-bit register too */
+ fixup[2] = PPC_RAW_BRANCH((long)(pc + jmp_off) - (long)&fixup[2]);
+#else
+ fixup[1] = PPC_RAW_BRANCH((long)(pc + jmp_off) - (long)&fixup[1]);
+#endif
+
+ ex = &fp->aux->extable[ctx->exentry_idx];
+
+ offset = pc - (long)&ex->insn;
+ if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN))
+ return -ERANGE;
+ ex->insn = offset;
+
+ offset = (long)fixup - (long)&ex->fixup;
+ if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN))
+ return -ERANGE;
+ ex->fixup = offset;
+
+ ctx->exentry_idx++;
+ return 0;
+}
diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
index 94641b7be387..c6262289dcc4 100644
--- a/arch/powerpc/net/bpf_jit_comp32.c
+++ b/arch/powerpc/net/bpf_jit_comp32.c
@@ -811,12 +811,16 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
*/
/* dst = *(u8 *)(ul) (src + off) */
case BPF_LDX | BPF_MEM | BPF_B:
+ case BPF_LDX | BPF_PROBE_MEM | BPF_B:
/* dst = *(u16 *)(ul) (src + off) */
case BPF_LDX | BPF_MEM | BPF_H:
+ case BPF_LDX | BPF_PROBE_MEM | BPF_H:
/* dst = *(u32 *)(ul) (src + off) */
case BPF_LDX | BPF_MEM | BPF_W:
+ case BPF_LDX | BPF_PROBE_MEM | BPF_W:
/* dst = *(u64 *)(ul) (src + off) */
case BPF_LDX | BPF_MEM | BPF_DW:
+ case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
switch (size) {
case BPF_B:
EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off));
@@ -835,6 +839,32 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
if ((size != BPF_DW) && !fp->aux->verifier_zext)
EMIT(PPC_RAW_LI(dst_reg_h, 0));
+
+ if (BPF_MODE(code) == BPF_PROBE_MEM) {
+ int insn_idx = ctx->idx - 1;
+ int jmp_off = 4;
+
+ /*
+ * In case of BPF_DW, two lwz instructions are emitted, one
+ * for higher 32-bit and another for lower 32-bit. So, set
+ * ex->insn to the first of the two and jump over both
+ * instructions in fixup.
+ *
+ * Similarly, with !verifier_zext, two instructions are
+ * emitted for BPF_B/H/W case. So, set ex-insn to the
+ * instruction that could fault and skip over both
+ * instructions.
+ */
+ if ((size == BPF_DW) || !fp->aux->verifier_zext) {
+ insn_idx -= 1;
+ jmp_off += 4;
+ }
+
+ ret = bpf_add_extable_entry(fp, image, pass, ctx, insn_idx,
+ jmp_off, dst_reg);
+ if (ret)
+ return ret;
+ }
break;
/*
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index eb28dbc67151..10cc9f04843c 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -270,51 +270,6 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32
/* out: */
}
-/*
- * The caller should check for (BPF_MODE(code) == BPF_PROBE_MEM) before calling
- * this function, as this only applies to BPF_PROBE_MEM, for now.
- */
-static int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass,
- struct codegen_context *ctx, int dst_reg)
-{
- off_t offset;
- unsigned long pc;
- struct exception_table_entry *ex;
- u32 *fixup;
-
- /* Populate extable entries only in the last pass */
- if (pass != 2)
- return 0;
-
- if (!fp->aux->extable ||
- WARN_ON_ONCE(ctx->exentry_idx >= fp->aux->num_exentries))
- return -EINVAL;
-
- pc = (unsigned long)&image[ctx->idx - 1];
-
- fixup = (void *)fp->aux->extable -
- (fp->aux->num_exentries * BPF_FIXUP_LEN) +
- (ctx->exentry_idx * BPF_FIXUP_LEN);
-
- fixup[0] = PPC_RAW_LI(dst_reg, 0);
- fixup[1] = PPC_RAW_BRANCH((long)(pc + 4) - (long)&fixup[1]);
-
- ex = &fp->aux->extable[ctx->exentry_idx];
-
- offset = pc - (long)&ex->insn;
- if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN))
- return -ERANGE;
- ex->insn = offset;
-
- offset = (long)fixup - (long)&ex->fixup;
- if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN))
- return -ERANGE;
- ex->fixup = offset;
-
- ctx->exentry_idx++;
- return 0;
-}
-
/* Assemble the body code between the prologue & epilogue */
int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
u32 *addrs, int pass)
@@ -811,7 +766,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
addrs[++i] = ctx->idx * 4;
if (BPF_MODE(code) == BPF_PROBE_MEM) {
- ret = bpf_add_extable_entry(fp, image, pass, ctx, dst_reg);
+ ret = bpf_add_extable_entry(fp, image, pass, ctx, ctx->idx - 1,
+ 4, dst_reg);
if (ret)
return ret;
}
--
2.31.1
^ permalink raw reply related
* [PATCH v2 8/8] bpf ppc32: Add addr > TASK_SIZE_MAX explicit check
From: Hari Bathini @ 2021-09-17 15:30 UTC (permalink / raw)
To: naveen.n.rao, mpe, ast, daniel
Cc: songliubraving, netdev, john.fastabend, andrii, kpsingh, paulus,
yhs, bpf, linuxppc-dev, kafai, Hari Bathini
In-Reply-To: <20210917153047.177141-1-hbathini@linux.ibm.com>
With KUAP enabled, any kernel code which wants to access userspace
needs to be surrounded by disable-enable KUAP. But that is not
happening for BPF_PROBE_MEM load instruction. Though PPC32 does not
support read protection, considering the fact that PTR_TO_BTF_ID
(which uses BPF_PROBE_MEM mode) could either be a valid kernel pointer
or NULL but should never be a pointer to userspace address, execute
BPF_PROBE_MEM load only if addr > TASK_SIZE_MAX, otherwise set
dst_reg=0 and move on.
This will catch NULL, valid or invalid userspace pointers. Only bad
kernel pointer will be handled by BPF exception table.
[Alexei suggested for x86]
Suggested-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
---
Changes in v2:
* New patch to handle bad userspace pointers on PPC32.
arch/powerpc/net/bpf_jit_comp32.c | 39 +++++++++++++++++++++++++++++++
1 file changed, 39 insertions(+)
diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
index c6262289dcc4..faa8047fcf4a 100644
--- a/arch/powerpc/net/bpf_jit_comp32.c
+++ b/arch/powerpc/net/bpf_jit_comp32.c
@@ -821,6 +821,45 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
/* dst = *(u64 *)(ul) (src + off) */
case BPF_LDX | BPF_MEM | BPF_DW:
case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
+ /*
+ * As PTR_TO_BTF_ID that uses BPF_PROBE_MEM mode could either be a valid
+ * kernel pointer or NULL but not a userspace address, execute BPF_PROBE_MEM
+ * load only if addr > TASK_SIZE_MAX, otherwise set dst_reg=0 and move on.
+ */
+ if (BPF_MODE(code) == BPF_PROBE_MEM) {
+ bool extra_insn_needed = false;
+ unsigned int adjusted_idx;
+
+ /*
+ * For BPF_DW case, "li reg_h,0" would be needed when
+ * !fp->aux->verifier_zext. Adjust conditional branch'ing
+ * address accordingly.
+ */
+ if ((size == BPF_DW) && !fp->aux->verifier_zext)
+ extra_insn_needed = true;
+
+ /*
+ * Need to jump two instructions instead of one for BPF_DW case
+ * as there are two load instructions for dst_reg_h & dst_reg
+ * respectively.
+ */
+ adjusted_idx = (size == BPF_DW) ? 1 : 0;
+
+ EMIT(PPC_RAW_ADDI(b2p[TMP_REG], src_reg, off));
+ PPC_LI32(_R0, TASK_SIZE_MAX);
+ EMIT(PPC_RAW_CMPLW(b2p[TMP_REG], _R0));
+ PPC_BCC(COND_GT, (ctx->idx + 4 + (extra_insn_needed ? 1 : 0)) * 4);
+ EMIT(PPC_RAW_LI(dst_reg, 0));
+ /*
+ * Note that "li reg_h,0" is emitted for BPF_B/H/W case,
+ * if necessary. So, jump there insted of emitting an
+ * additional "li reg_h,0" instruction.
+ */
+ if (extra_insn_needed)
+ EMIT(PPC_RAW_LI(dst_reg_h, 0));
+ PPC_JMP((ctx->idx + 2 + adjusted_idx) * 4);
+ }
+
switch (size) {
case BPF_B:
EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off));
--
2.31.1
^ permalink raw reply related
* Re: [PATCH v2 1/8] bpf powerpc: Remove unused SEEN_STACK
From: Christophe Leroy @ 2021-09-17 16:02 UTC (permalink / raw)
To: Hari Bathini, naveen.n.rao, mpe, ast, daniel
Cc: Ravi Bangoria, songliubraving, netdev, john.fastabend, andrii,
kpsingh, paulus, yhs, bpf, linuxppc-dev, kafai
In-Reply-To: <20210917153047.177141-2-hbathini@linux.ibm.com>
Le 17/09/2021 à 17:30, Hari Bathini a écrit :
> From: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
>
> SEEN_STACK is unused on PowerPC. Remove it. Also, have
> SEEN_TAILCALL use 0x40000000.
>
> Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
> ---
>
> * No changes in v2.
>
>
> arch/powerpc/net/bpf_jit.h | 3 +--
> 1 file changed, 1 insertion(+), 2 deletions(-)
>
> diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
> index 99fad093f43e..d6267e93027a 100644
> --- a/arch/powerpc/net/bpf_jit.h
> +++ b/arch/powerpc/net/bpf_jit.h
> @@ -116,8 +116,7 @@ static inline bool is_nearbranch(int offset)
> #define COND_LE (CR0_GT | COND_CMP_FALSE)
>
> #define SEEN_FUNC 0x20000000 /* might call external helpers */
> -#define SEEN_STACK 0x40000000 /* uses BPF stack */
> -#define SEEN_TAILCALL 0x80000000 /* uses tail calls */
> +#define SEEN_TAILCALL 0x40000000 /* uses tail calls */
>
> #define SEEN_VREG_MASK 0x1ff80000 /* Volatile registers r3-r12 */
> #define SEEN_NVREG_MASK 0x0003ffff /* Non volatile registers r14-r31 */
>
^ permalink raw reply
* Re: [PATCH v2 3/8] bpf powerpc: refactor JIT compiler code
From: Christophe Leroy @ 2021-09-17 16:10 UTC (permalink / raw)
To: Hari Bathini, naveen.n.rao, mpe, ast, daniel
Cc: songliubraving, netdev, john.fastabend, andrii, kpsingh, paulus,
yhs, bpf, linuxppc-dev, kafai
In-Reply-To: <20210917153047.177141-4-hbathini@linux.ibm.com>
Le 17/09/2021 à 17:30, Hari Bathini a écrit :
> Refactor powerpc JITing. This simplifies adding BPF_PROBE_MEM support.
Could you describe a bit more what you are refactoring exactly ?
>
> Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
> ---
>
> Changes in v2:
> * New patch to refactor a bit of JITing code.
>
>
> arch/powerpc/net/bpf_jit_comp32.c | 50 +++++++++++---------
> arch/powerpc/net/bpf_jit_comp64.c | 76 ++++++++++++++++---------------
> 2 files changed, 68 insertions(+), 58 deletions(-)
>
> diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
> index b60b59426a24..c8ae14c316e3 100644
> --- a/arch/powerpc/net/bpf_jit_comp32.c
> +++ b/arch/powerpc/net/bpf_jit_comp32.c
> @@ -276,17 +276,17 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
> u32 exit_addr = addrs[flen];
>
> for (i = 0; i < flen; i++) {
> - u32 code = insn[i].code;
> u32 dst_reg = bpf_to_ppc(ctx, insn[i].dst_reg);
> - u32 dst_reg_h = dst_reg - 1;
> u32 src_reg = bpf_to_ppc(ctx, insn[i].src_reg);
> - u32 src_reg_h = src_reg - 1;
> u32 tmp_reg = bpf_to_ppc(ctx, TMP_REG);
> + u32 true_cond, code = insn[i].code;
> + u32 dst_reg_h = dst_reg - 1;
> + u32 src_reg_h = src_reg - 1;
All changes above seems unneeded and not linked to the current patch.
Please leave cosmetic changes outside and focus on necessary changes.
> + u32 size = BPF_SIZE(code);
> s16 off = insn[i].off;
> s32 imm = insn[i].imm;
> bool func_addr_fixed;
> u64 func_addr;
> - u32 true_cond;
>
> /*
> * addrs[] maps a BPF bytecode address into a real offset from
> @@ -809,25 +809,33 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
> /*
> * BPF_LDX
> */
> - case BPF_LDX | BPF_MEM | BPF_B: /* dst = *(u8 *)(ul) (src + off) */
> - EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off));
> - if (!fp->aux->verifier_zext)
> - EMIT(PPC_RAW_LI(dst_reg_h, 0));
> - break;
> - case BPF_LDX | BPF_MEM | BPF_H: /* dst = *(u16 *)(ul) (src + off) */
> - EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off));
> - if (!fp->aux->verifier_zext)
> - EMIT(PPC_RAW_LI(dst_reg_h, 0));
> - break;
> - case BPF_LDX | BPF_MEM | BPF_W: /* dst = *(u32 *)(ul) (src + off) */
> - EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off));
> - if (!fp->aux->verifier_zext)
> + /* dst = *(u8 *)(ul) (src + off) */
> + case BPF_LDX | BPF_MEM | BPF_B:
> + /* dst = *(u16 *)(ul) (src + off) */
> + case BPF_LDX | BPF_MEM | BPF_H:
> + /* dst = *(u32 *)(ul) (src + off) */
> + case BPF_LDX | BPF_MEM | BPF_W:
> + /* dst = *(u64 *)(ul) (src + off) */
> + case BPF_LDX | BPF_MEM | BPF_DW:
Why changing the location of the comments ? I found it more readable before.
> + switch (size) {
> + case BPF_B:
> + EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off));
> + break;
> + case BPF_H:
> + EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off));
> + break;
> + case BPF_W:
> + EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off));
> + break;
> + case BPF_DW:
> + EMIT(PPC_RAW_LWZ(dst_reg_h, src_reg, off));
> + EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off + 4));
> + break;
> + }
BPF_B, BPF_H, ... are not part of an enum. Are you sure GCC is happy to
have no default ?
> +
> + if ((size != BPF_DW) && !fp->aux->verifier_zext)
You don't need () around size != BPF_DW
> EMIT(PPC_RAW_LI(dst_reg_h, 0));
> break;
> - case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + off) */
> - EMIT(PPC_RAW_LWZ(dst_reg_h, src_reg, off));
> - EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off + 4));
> - break;
>
> /*
> * Doubleword load
> diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
> index 2a87da50d9a4..78b28f25555c 100644
> --- a/arch/powerpc/net/bpf_jit_comp64.c
> +++ b/arch/powerpc/net/bpf_jit_comp64.c
> @@ -282,16 +282,15 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
> u32 exit_addr = addrs[flen];
>
> for (i = 0; i < flen; i++) {
> - u32 code = insn[i].code;
> u32 dst_reg = b2p[insn[i].dst_reg];
> u32 src_reg = b2p[insn[i].src_reg];
> + u32 tmp_idx, code = insn[i].code;
> + u32 size = BPF_SIZE(code);
> s16 off = insn[i].off;
> s32 imm = insn[i].imm;
> bool func_addr_fixed;
> - u64 func_addr;
> - u64 imm64;
> + u64 func_addr, imm64;
> u32 true_cond;
> - u32 tmp_idx;
All changes other than the addition of 'size' seems unneeded and not
linked to the current patch.
>
> /*
> * addrs[] maps a BPF bytecode address into a real offset from
> @@ -638,35 +637,34 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
> */
> case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src */
> case BPF_ST | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = imm */
> - if (BPF_CLASS(code) == BPF_ST) {
> - EMIT(PPC_RAW_LI(b2p[TMP_REG_1], imm));
> - src_reg = b2p[TMP_REG_1];
> - }
> - EMIT(PPC_RAW_STB(src_reg, dst_reg, off));
> - break;
> case BPF_STX | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = src */
> case BPF_ST | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = imm */
> - if (BPF_CLASS(code) == BPF_ST) {
> - EMIT(PPC_RAW_LI(b2p[TMP_REG_1], imm));
> - src_reg = b2p[TMP_REG_1];
> - }
> - EMIT(PPC_RAW_STH(src_reg, dst_reg, off));
> - break;
> case BPF_STX | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = src */
> case BPF_ST | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = imm */
> - if (BPF_CLASS(code) == BPF_ST) {
> - PPC_LI32(b2p[TMP_REG_1], imm);
> - src_reg = b2p[TMP_REG_1];
> - }
> - EMIT(PPC_RAW_STW(src_reg, dst_reg, off));
> - break;
> case BPF_STX | BPF_MEM | BPF_DW: /* (u64 *)(dst + off) = src */
> case BPF_ST | BPF_MEM | BPF_DW: /* *(u64 *)(dst + off) = imm */
> if (BPF_CLASS(code) == BPF_ST) {
> - PPC_LI32(b2p[TMP_REG_1], imm);
> + if ((size == BPF_B) || (size == BPF_H))
> + EMIT(PPC_RAW_LI(b2p[TMP_REG_1], imm));
> + else /* size == BPF_W || size == BPF_DW */
> + PPC_LI32(b2p[TMP_REG_1], imm);
I think you can use PPC_LI32() for all cases, it should generate the
same code.
> src_reg = b2p[TMP_REG_1];
> }
> - PPC_BPF_STL(src_reg, dst_reg, off);
> +
> + switch (size) {
> + case BPF_B:
> + EMIT(PPC_RAW_STB(src_reg, dst_reg, off));
> + break;
> + case BPF_H:
> + EMIT(PPC_RAW_STH(src_reg, dst_reg, off));
> + break;
> + case BPF_W:
> + EMIT(PPC_RAW_STW(src_reg, dst_reg, off));
> + break;
> + case BPF_DW:
> + PPC_BPF_STL(src_reg, dst_reg, off);
> + break;
> + }
> break;
>
> /*
> @@ -716,25 +714,29 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
> */
> /* dst = *(u8 *)(ul) (src + off) */
> case BPF_LDX | BPF_MEM | BPF_B:
> - EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off));
> - if (insn_is_zext(&insn[i + 1]))
> - addrs[++i] = ctx->idx * 4;
> - break;
> /* dst = *(u16 *)(ul) (src + off) */
> case BPF_LDX | BPF_MEM | BPF_H:
> - EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off));
> - if (insn_is_zext(&insn[i + 1]))
> - addrs[++i] = ctx->idx * 4;
> - break;
> /* dst = *(u32 *)(ul) (src + off) */
> case BPF_LDX | BPF_MEM | BPF_W:
> - EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off));
> - if (insn_is_zext(&insn[i + 1]))
> - addrs[++i] = ctx->idx * 4;
> - break;
> /* dst = *(u64 *)(ul) (src + off) */
> case BPF_LDX | BPF_MEM | BPF_DW:
> - PPC_BPF_LL(dst_reg, src_reg, off);
> + switch (size) {
> + case BPF_B:
> + EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off));
> + break;
> + case BPF_H:
> + EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off));
> + break;
> + case BPF_W:
> + EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off));
> + break;
> + case BPF_DW:
> + PPC_BPF_LL(dst_reg, src_reg, off);
> + break;
> + }
> +
> + if ((size != BPF_DW) && insn_is_zext(&insn[i + 1]))
> + addrs[++i] = ctx->idx * 4;
> break;
>
> /*
>
^ permalink raw reply
* Re: [PATCH v2 4/8] powerpc/ppc-opcode: introduce PPC_RAW_BRANCH() macro
From: LEROY Christophe @ 2021-09-17 16:14 UTC (permalink / raw)
To: Hari Bathini, naveen.n.rao@linux.ibm.com, mpe@ellerman.id.au,
ast@kernel.org, daniel@iogearbox.net
Cc: songliubraving@fb.com, netdev@vger.kernel.org,
john.fastabend@gmail.com, andrii@kernel.org, kpsingh@kernel.org,
paulus@samba.org, yhs@fb.com, bpf@vger.kernel.org,
linuxppc-dev@lists.ozlabs.org, kafai@fb.com
In-Reply-To: <20210917153047.177141-5-hbathini@linux.ibm.com>
Le 17/09/2021 à 17:30, Hari Bathini a écrit :
> Define and use PPC_RAW_BRANCH() macro instead of open coding it. This
> macro is used while adding BPF_PROBE_MEM support.
>
> Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
> ---
>
> Changes in v2:
> * New patch to introduce PPC_RAW_BRANCH() macro.
>
>
> arch/powerpc/include/asm/ppc-opcode.h | 2 ++
> arch/powerpc/net/bpf_jit.h | 4 ++--
> 2 files changed, 4 insertions(+), 2 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
> index baea657bc868..f50213e2a3e0 100644
> --- a/arch/powerpc/include/asm/ppc-opcode.h
> +++ b/arch/powerpc/include/asm/ppc-opcode.h
> @@ -566,6 +566,8 @@
> #define PPC_RAW_MTSPR(spr, d) (0x7c0003a6 | ___PPC_RS(d) | __PPC_SPR(spr))
> #define PPC_RAW_EIEIO() (0x7c0006ac)
>
> +#define PPC_RAW_BRANCH(addr) (PPC_INST_BRANCH | ((addr) & 0x03fffffc))
> +
> /* Deal with instructions that older assemblers aren't aware of */
> #define PPC_BCCTR_FLUSH stringify_in_c(.long PPC_INST_BCCTR_FLUSH)
> #define PPC_CP_ABORT stringify_in_c(.long PPC_RAW_CP_ABORT)
> diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
> index 411c63d945c7..0c8f885b8f48 100644
> --- a/arch/powerpc/net/bpf_jit.h
> +++ b/arch/powerpc/net/bpf_jit.h
> @@ -24,8 +24,8 @@
> #define EMIT(instr) PLANT_INSTR(image, ctx->idx, instr)
>
> /* Long jump; (unconditional 'branch') */
> -#define PPC_JMP(dest) EMIT(PPC_INST_BRANCH | \
> - (((dest) - (ctx->idx * 4)) & 0x03fffffc))
> +#define PPC_JMP(dest) EMIT(PPC_RAW_BRANCH((dest) - (ctx->idx * 4)))
> +
> /* blr; (unconditional 'branch' with link) to absolute address */
> #define PPC_BL_ABS(dest) EMIT(PPC_INST_BL | \
> (((dest) - (unsigned long)(image + ctx->idx)) & 0x03fffffc))
>
^ permalink raw reply
* Re: [PATCH v2 5/8] bpf ppc64: Add BPF_PROBE_MEM support for JIT
From: Christophe Leroy @ 2021-09-17 16:20 UTC (permalink / raw)
To: Hari Bathini, naveen.n.rao, mpe, ast, daniel
Cc: Ravi Bangoria, songliubraving, netdev, john.fastabend, andrii,
kpsingh, paulus, yhs, bpf, linuxppc-dev, kafai
In-Reply-To: <20210917153047.177141-6-hbathini@linux.ibm.com>
Le 17/09/2021 à 17:30, Hari Bathini a écrit :
> From: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
>
> BPF load instruction with BPF_PROBE_MEM mode can cause a fault
> inside kernel. Append exception table for such instructions
> within BPF program.
>
> Unlike other archs which uses extable 'fixup' field to pass dest_reg
> and nip, BPF exception table on PowerPC follows the generic PowerPC
> exception table design, where it populates both fixup and extable
> sections within BPF program. fixup section contains two instructions,
> first instruction clears dest_reg and 2nd jumps to next instruction
> in the BPF code. extable 'insn' field contains relative offset of
> the instruction and 'fixup' field contains relative offset of the
> fixup entry. Example layout of BPF program with extable present:
>
> +------------------+
> | |
> | |
> 0x4020 -->| ld r27,4(r3) |
> | |
> | |
> 0x40ac -->| lwz r3,0(r4) |
> | |
> | |
> |------------------|
> 0x4280 -->| li r27,0 | \ fixup entry
> | b 0x4024 | /
> 0x4288 -->| li r3,0 |
> | b 0x40b0 |
> |------------------|
> 0x4290 -->| insn=0xfffffd90 | \ extable entry
> | fixup=0xffffffec | /
> 0x4298 -->| insn=0xfffffe14 |
> | fixup=0xffffffec |
> +------------------+
>
> (Addresses shown here are chosen random, not real)
>
> Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
> Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
> ---
>
> Changes in v2:
> * Used JITing code after refactoring.
> * Replaced 'xor reg,reg,reg' with 'li reg,0' where appropriate.
> * Avoided unnecessary init during declaration.
>
>
> arch/powerpc/net/bpf_jit.h | 5 ++-
> arch/powerpc/net/bpf_jit_comp.c | 25 ++++++++++----
> arch/powerpc/net/bpf_jit_comp32.c | 2 +-
> arch/powerpc/net/bpf_jit_comp64.c | 57 ++++++++++++++++++++++++++++++-
> 4 files changed, 80 insertions(+), 9 deletions(-)
>
> diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
> index 0c8f885b8f48..6357c71c26eb 100644
> --- a/arch/powerpc/net/bpf_jit.h
> +++ b/arch/powerpc/net/bpf_jit.h
> @@ -141,8 +141,11 @@ struct codegen_context {
> unsigned int idx;
> unsigned int stack_size;
> int b2p[ARRAY_SIZE(b2p)];
> + unsigned int exentry_idx;
> };
>
> +#define BPF_FIXUP_LEN 8 /* Two instructions */
> +
> static inline void bpf_flush_icache(void *start, void *end)
> {
> smp_wmb(); /* smp write barrier */
> @@ -166,7 +169,7 @@ static inline void bpf_clear_seen_register(struct codegen_context *ctx, int i)
>
> void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func);
> int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
> - u32 *addrs);
> + u32 *addrs, int pass);
> void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx);
> void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx);
> void bpf_jit_realloc_regs(struct codegen_context *ctx);
> diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
> index c5c9e8ad1de7..e92bd79d3bac 100644
> --- a/arch/powerpc/net/bpf_jit_comp.c
> +++ b/arch/powerpc/net/bpf_jit_comp.c
> @@ -101,6 +101,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
> struct bpf_prog *tmp_fp;
> bool bpf_blinded = false;
> bool extra_pass = false;
> + u32 extable_len;
> + u32 fixup_len;
>
> if (!fp->jit_requested)
> return org_fp;
> @@ -131,7 +133,6 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
> image = jit_data->image;
> bpf_hdr = jit_data->header;
> proglen = jit_data->proglen;
> - alloclen = proglen + FUNCTION_DESCR_SIZE;
> extra_pass = true;
> goto skip_init_ctx;
> }
> @@ -149,7 +150,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
> cgctx.stack_size = round_up(fp->aux->stack_depth, 16);
>
> /* Scouting faux-generate pass 0 */
> - if (bpf_jit_build_body(fp, 0, &cgctx, addrs)) {
> + if (bpf_jit_build_body(fp, 0, &cgctx, addrs, 0)) {
> /* We hit something illegal or unsupported. */
> fp = org_fp;
> goto out_addrs;
> @@ -162,7 +163,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
> */
> if (cgctx.seen & SEEN_TAILCALL) {
> cgctx.idx = 0;
> - if (bpf_jit_build_body(fp, 0, &cgctx, addrs)) {
> + if (bpf_jit_build_body(fp, 0, &cgctx, addrs, 0)) {
> fp = org_fp;
> goto out_addrs;
> }
> @@ -177,8 +178,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
> bpf_jit_build_prologue(0, &cgctx);
> bpf_jit_build_epilogue(0, &cgctx);
>
> + fixup_len = fp->aux->num_exentries * BPF_FIXUP_LEN;
> + extable_len = fp->aux->num_exentries * sizeof(struct exception_table_entry);
> +
> proglen = cgctx.idx * 4;
> - alloclen = proglen + FUNCTION_DESCR_SIZE;
> + alloclen = proglen + FUNCTION_DESCR_SIZE + fixup_len + extable_len;
>
> bpf_hdr = bpf_jit_binary_alloc(alloclen, &image, 4, bpf_jit_fill_ill_insns);
> if (!bpf_hdr) {
> @@ -186,6 +190,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
> goto out_addrs;
> }
>
> + if (extable_len) {
> + fp->aux->extable = (void *)image + FUNCTION_DESCR_SIZE +
> + proglen + fixup_len;
> + }
No { } for single lines statements (See kernel coding style)
> +
> skip_init_ctx:
> code_base = (u32 *)(image + FUNCTION_DESCR_SIZE);
>
> @@ -210,7 +219,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
> /* Now build the prologue, body code & epilogue for real. */
> cgctx.idx = 0;
> bpf_jit_build_prologue(code_base, &cgctx);
> - bpf_jit_build_body(fp, code_base, &cgctx, addrs);
> + if (bpf_jit_build_body(fp, code_base, &cgctx, addrs, pass)) {
> + bpf_jit_binary_free(bpf_hdr);
> + fp = org_fp;
> + goto out_addrs;
> + }
> bpf_jit_build_epilogue(code_base, &cgctx);
>
> if (bpf_jit_enable > 1)
> @@ -234,7 +247,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>
> fp->bpf_func = (void *)image;
> fp->jited = 1;
> - fp->jited_len = alloclen;
> + fp->jited_len = proglen + FUNCTION_DESCR_SIZE;
>
> bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE));
> bpf_jit_binary_lock_ro(bpf_hdr);
> diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
> index c8ae14c316e3..94641b7be387 100644
> --- a/arch/powerpc/net/bpf_jit_comp32.c
> +++ b/arch/powerpc/net/bpf_jit_comp32.c
> @@ -266,7 +266,7 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32
>
> /* Assemble the body code between the prologue & epilogue */
> int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
> - u32 *addrs)
> + u32 *addrs, int pass)
> {
> const struct bpf_insn *insn = fp->insnsi;
> int flen = fp->len;
> diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
> index 78b28f25555c..2fc10995f243 100644
> --- a/arch/powerpc/net/bpf_jit_comp64.c
> +++ b/arch/powerpc/net/bpf_jit_comp64.c
> @@ -270,9 +270,54 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32
> /* out: */
> }
>
> +/*
> + * The caller should check for (BPF_MODE(code) == BPF_PROBE_MEM) before calling
> + * this function, as this only applies to BPF_PROBE_MEM, for now.
> + */
> +static int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass,
> + struct codegen_context *ctx, int dst_reg)
Patch 7 will reuse this function so put it in
arch/powerpc/net/bpf_jit_comp.c now instead of moving it later.
> +{
> + off_t offset;
> + unsigned long pc;
> + struct exception_table_entry *ex;
> + u32 *fixup;
> +
> + /* Populate extable entries only in the last pass */
> + if (pass != 2)
> + return 0;
> +
> + if (!fp->aux->extable ||
> + WARN_ON_ONCE(ctx->exentry_idx >= fp->aux->num_exentries))
> + return -EINVAL;
> +
> + pc = (unsigned long)&image[ctx->idx - 1];
> +
> + fixup = (void *)fp->aux->extable -
> + (fp->aux->num_exentries * BPF_FIXUP_LEN) +
> + (ctx->exentry_idx * BPF_FIXUP_LEN);
> +
> + fixup[0] = PPC_RAW_LI(dst_reg, 0);
> + fixup[1] = PPC_RAW_BRANCH((long)(pc + 4) - (long)&fixup[1]);
> +
> + ex = &fp->aux->extable[ctx->exentry_idx];
> +
> + offset = pc - (long)&ex->insn;
> + if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN))
> + return -ERANGE;
> + ex->insn = offset;
> +
> + offset = (long)fixup - (long)&ex->fixup;
> + if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN))
> + return -ERANGE;
> + ex->fixup = offset;
> +
> + ctx->exentry_idx++;
> + return 0;
> +}
> +
> /* Assemble the body code between the prologue & epilogue */
> int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
> - u32 *addrs)
> + u32 *addrs, int pass)
> {
> const struct bpf_insn *insn = fp->insnsi;
> int flen = fp->len;
> @@ -714,12 +759,16 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
> */
> /* dst = *(u8 *)(ul) (src + off) */
> case BPF_LDX | BPF_MEM | BPF_B:
> + case BPF_LDX | BPF_PROBE_MEM | BPF_B:
> /* dst = *(u16 *)(ul) (src + off) */
> case BPF_LDX | BPF_MEM | BPF_H:
> + case BPF_LDX | BPF_PROBE_MEM | BPF_H:
> /* dst = *(u32 *)(ul) (src + off) */
> case BPF_LDX | BPF_MEM | BPF_W:
> + case BPF_LDX | BPF_PROBE_MEM | BPF_W:
> /* dst = *(u64 *)(ul) (src + off) */
> case BPF_LDX | BPF_MEM | BPF_DW:
> + case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
> switch (size) {
> case BPF_B:
> EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off));
> @@ -737,6 +786,12 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
>
> if ((size != BPF_DW) && insn_is_zext(&insn[i + 1]))
> addrs[++i] = ctx->idx * 4;
> +
> + if (BPF_MODE(code) == BPF_PROBE_MEM) {
> + ret = bpf_add_extable_entry(fp, image, pass, ctx, dst_reg);
> + if (ret)
> + return ret;
> + }
> break;
>
> /*
>
^ permalink raw reply
* Re: [PATCH v2 3/8] bpf powerpc: refactor JIT compiler code
From: Christophe Leroy @ 2021-09-17 16:22 UTC (permalink / raw)
To: Hari Bathini, naveen.n.rao, mpe, ast, daniel
Cc: songliubraving, netdev, john.fastabend, andrii, kpsingh, paulus,
yhs, bpf, linuxppc-dev, kafai
In-Reply-To: <20210917153047.177141-4-hbathini@linux.ibm.com>
Le 17/09/2021 à 17:30, Hari Bathini a écrit :
> Refactor powerpc JITing. This simplifies adding BPF_PROBE_MEM support.
>
> Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
> ---
>
> Changes in v2:
> * New patch to refactor a bit of JITing code.
>
>
> arch/powerpc/net/bpf_jit_comp32.c | 50 +++++++++++---------
> arch/powerpc/net/bpf_jit_comp64.c | 76 ++++++++++++++++---------------
> 2 files changed, 68 insertions(+), 58 deletions(-)
>
> diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
> index b60b59426a24..c8ae14c316e3 100644
> --- a/arch/powerpc/net/bpf_jit_comp32.c
> +++ b/arch/powerpc/net/bpf_jit_comp32.c
> @@ -276,17 +276,17 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
> u32 exit_addr = addrs[flen];
>
> for (i = 0; i < flen; i++) {
> - u32 code = insn[i].code;
> u32 dst_reg = bpf_to_ppc(ctx, insn[i].dst_reg);
> - u32 dst_reg_h = dst_reg - 1;
> u32 src_reg = bpf_to_ppc(ctx, insn[i].src_reg);
> - u32 src_reg_h = src_reg - 1;
> u32 tmp_reg = bpf_to_ppc(ctx, TMP_REG);
> + u32 true_cond, code = insn[i].code;
> + u32 dst_reg_h = dst_reg - 1;
> + u32 src_reg_h = src_reg - 1;
> + u32 size = BPF_SIZE(code);
> s16 off = insn[i].off;
> s32 imm = insn[i].imm;
> bool func_addr_fixed;
> u64 func_addr;
> - u32 true_cond;
>
> /*
> * addrs[] maps a BPF bytecode address into a real offset from
> @@ -809,25 +809,33 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
> /*
> * BPF_LDX
> */
> - case BPF_LDX | BPF_MEM | BPF_B: /* dst = *(u8 *)(ul) (src + off) */
> - EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off));
> - if (!fp->aux->verifier_zext)
> - EMIT(PPC_RAW_LI(dst_reg_h, 0));
> - break;
> - case BPF_LDX | BPF_MEM | BPF_H: /* dst = *(u16 *)(ul) (src + off) */
> - EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off));
> - if (!fp->aux->verifier_zext)
> - EMIT(PPC_RAW_LI(dst_reg_h, 0));
> - break;
> - case BPF_LDX | BPF_MEM | BPF_W: /* dst = *(u32 *)(ul) (src + off) */
> - EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off));
> - if (!fp->aux->verifier_zext)
> + /* dst = *(u8 *)(ul) (src + off) */
> + case BPF_LDX | BPF_MEM | BPF_B:
> + /* dst = *(u16 *)(ul) (src + off) */
> + case BPF_LDX | BPF_MEM | BPF_H:
> + /* dst = *(u32 *)(ul) (src + off) */
> + case BPF_LDX | BPF_MEM | BPF_W:
> + /* dst = *(u64 *)(ul) (src + off) */
> + case BPF_LDX | BPF_MEM | BPF_DW:
You have to add the 'fallthrough;' keyword to avoid build failure with
later versions of GCC.
> + switch (size) {
> + case BPF_B:
> + EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off));
> + break;
> + case BPF_H:
> + EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off));
> + break;
> + case BPF_W:
> + EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off));
> + break;
> + case BPF_DW:
> + EMIT(PPC_RAW_LWZ(dst_reg_h, src_reg, off));
> + EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off + 4));
> + break;
> + }
> +
> + if ((size != BPF_DW) && !fp->aux->verifier_zext)
> EMIT(PPC_RAW_LI(dst_reg_h, 0));
> break;
> - case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + off) */
> - EMIT(PPC_RAW_LWZ(dst_reg_h, src_reg, off));
> - EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off + 4));
> - break;
>
> /*
> * Doubleword load
> diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
> index 2a87da50d9a4..78b28f25555c 100644
> --- a/arch/powerpc/net/bpf_jit_comp64.c
> +++ b/arch/powerpc/net/bpf_jit_comp64.c
> @@ -282,16 +282,15 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
> u32 exit_addr = addrs[flen];
>
> for (i = 0; i < flen; i++) {
> - u32 code = insn[i].code;
> u32 dst_reg = b2p[insn[i].dst_reg];
> u32 src_reg = b2p[insn[i].src_reg];
> + u32 tmp_idx, code = insn[i].code;
> + u32 size = BPF_SIZE(code);
> s16 off = insn[i].off;
> s32 imm = insn[i].imm;
> bool func_addr_fixed;
> - u64 func_addr;
> - u64 imm64;
> + u64 func_addr, imm64;
> u32 true_cond;
> - u32 tmp_idx;
>
> /*
> * addrs[] maps a BPF bytecode address into a real offset from
> @@ -638,35 +637,34 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
> */
> case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src */
> case BPF_ST | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = imm */
> - if (BPF_CLASS(code) == BPF_ST) {
> - EMIT(PPC_RAW_LI(b2p[TMP_REG_1], imm));
> - src_reg = b2p[TMP_REG_1];
> - }
> - EMIT(PPC_RAW_STB(src_reg, dst_reg, off));
> - break;
> case BPF_STX | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = src */
> case BPF_ST | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = imm */
> - if (BPF_CLASS(code) == BPF_ST) {
> - EMIT(PPC_RAW_LI(b2p[TMP_REG_1], imm));
> - src_reg = b2p[TMP_REG_1];
> - }
> - EMIT(PPC_RAW_STH(src_reg, dst_reg, off));
> - break;
> case BPF_STX | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = src */
> case BPF_ST | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = imm */
> - if (BPF_CLASS(code) == BPF_ST) {
> - PPC_LI32(b2p[TMP_REG_1], imm);
> - src_reg = b2p[TMP_REG_1];
> - }
> - EMIT(PPC_RAW_STW(src_reg, dst_reg, off));
> - break;
You have to add the 'fallthrough;' keyword to avoid build failure with
later versions of GCC.
> case BPF_STX | BPF_MEM | BPF_DW: /* (u64 *)(dst + off) = src */
> case BPF_ST | BPF_MEM | BPF_DW: /* *(u64 *)(dst + off) = imm */
> if (BPF_CLASS(code) == BPF_ST) {
> - PPC_LI32(b2p[TMP_REG_1], imm);
> + if ((size == BPF_B) || (size == BPF_H))
> + EMIT(PPC_RAW_LI(b2p[TMP_REG_1], imm));
> + else /* size == BPF_W || size == BPF_DW */
> + PPC_LI32(b2p[TMP_REG_1], imm);
> src_reg = b2p[TMP_REG_1];
> }
> - PPC_BPF_STL(src_reg, dst_reg, off);
> +
> + switch (size) {
> + case BPF_B:
> + EMIT(PPC_RAW_STB(src_reg, dst_reg, off));
> + break;
> + case BPF_H:
> + EMIT(PPC_RAW_STH(src_reg, dst_reg, off));
> + break;
> + case BPF_W:
> + EMIT(PPC_RAW_STW(src_reg, dst_reg, off));
> + break;
> + case BPF_DW:
> + PPC_BPF_STL(src_reg, dst_reg, off);
> + break;
> + }
> break;
>
> /*
> @@ -716,25 +714,29 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
> */
> /* dst = *(u8 *)(ul) (src + off) */
> case BPF_LDX | BPF_MEM | BPF_B:
> - EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off));
> - if (insn_is_zext(&insn[i + 1]))
> - addrs[++i] = ctx->idx * 4;
> - break;
> /* dst = *(u16 *)(ul) (src + off) */
> case BPF_LDX | BPF_MEM | BPF_H:
> - EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off));
> - if (insn_is_zext(&insn[i + 1]))
> - addrs[++i] = ctx->idx * 4;
> - break;
> /* dst = *(u32 *)(ul) (src + off) */
> case BPF_LDX | BPF_MEM | BPF_W:
> - EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off));
> - if (insn_is_zext(&insn[i + 1]))
> - addrs[++i] = ctx->idx * 4;
> - break;
> /* dst = *(u64 *)(ul) (src + off) */
> case BPF_LDX | BPF_MEM | BPF_DW:
> - PPC_BPF_LL(dst_reg, src_reg, off);
> + switch (size) {
> + case BPF_B:
> + EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off));
> + break;
> + case BPF_H:
> + EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off));
> + break;
> + case BPF_W:
> + EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off));
> + break;
> + case BPF_DW:
> + PPC_BPF_LL(dst_reg, src_reg, off);
> + break;
> + }
> +
> + if ((size != BPF_DW) && insn_is_zext(&insn[i + 1]))
> + addrs[++i] = ctx->idx * 4;
> break;
>
> /*
>
^ permalink raw reply
* Re: [PATCH v2 7/8] bpf ppc32: Add BPF_PROBE_MEM support for JIT
From: Christophe Leroy @ 2021-09-17 16:39 UTC (permalink / raw)
To: Hari Bathini, naveen.n.rao, mpe, ast, daniel
Cc: songliubraving, netdev, john.fastabend, andrii, kpsingh, paulus,
yhs, bpf, linuxppc-dev, kafai
In-Reply-To: <20210917153047.177141-8-hbathini@linux.ibm.com>
Le 17/09/2021 à 17:30, Hari Bathini a écrit :
> BPF load instruction with BPF_PROBE_MEM mode can cause a fault
> inside kernel. Append exception table for such instructions
> within BPF program.
>
> Unlike other archs which uses extable 'fixup' field to pass dest_reg
> and nip, BPF exception table on PowerPC follows the generic PowerPC
> exception table design, where it populates both fixup and extable
> sections within BPF program. fixup section contains 3 instructions,
> first 2 instructions clear dest_reg (lower & higher 32-bit registers)
> and last instruction jumps to next instruction in the BPF code.
> extable 'insn' field contains relative offset of the instruction and
> 'fixup' field contains relative offset of the fixup entry. Example
> layout of BPF program with extable present:
>
> +------------------+
> | |
> | |
> 0x4020 -->| lwz r28,4(r4) |
> | |
> | |
> 0x40ac -->| lwz r3,0(r24) |
> | lwz r4,4(r24) |
> | |
> | |
> |------------------|
> 0x4278 -->| li r28,0 | \
> | li r27,0 | | fixup entry
> | b 0x4024 | /
> 0x4284 -->| li r4,0 |
> | li r3,0 |
> | b 0x40b4 |
> |------------------|
> 0x4290 -->| insn=0xfffffd90 | \ extable entry
> | fixup=0xffffffe4 | /
> 0x4298 -->| insn=0xfffffe14 |
> | fixup=0xffffffe8 |
> +------------------+
>
> (Addresses shown here are chosen random, not real)
>
> Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
> ---
>
> Changes in v2:
> * New patch to add BPF_PROBE_MEM support for PPC32.
>
>
> arch/powerpc/net/bpf_jit.h | 7 +++++
> arch/powerpc/net/bpf_jit_comp.c | 50 +++++++++++++++++++++++++++++++
> arch/powerpc/net/bpf_jit_comp32.c | 30 +++++++++++++++++++
> arch/powerpc/net/bpf_jit_comp64.c | 48 ++---------------------------
> 4 files changed, 89 insertions(+), 46 deletions(-)
>
> diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
> index 6357c71c26eb..6a591ef88006 100644
> --- a/arch/powerpc/net/bpf_jit.h
> +++ b/arch/powerpc/net/bpf_jit.h
> @@ -144,7 +144,11 @@ struct codegen_context {
> unsigned int exentry_idx;
> };
>
> +#ifdef CONFIG_PPC32
> +#define BPF_FIXUP_LEN 12 /* Three instructions */
Could use 3 and 2 instead of 12 and 8, see later why.
> +#else
> #define BPF_FIXUP_LEN 8 /* Two instructions */
> +#endif
>
> static inline void bpf_flush_icache(void *start, void *end)
> {
> @@ -174,6 +178,9 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx);
> void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx);
> void bpf_jit_realloc_regs(struct codegen_context *ctx);
>
> +int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct codegen_context *ctx,
> + int insn_idx, int jmp_off, int dst_reg);
> +
> #endif
>
> #endif
> diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
> index e92bd79d3bac..a1753b8c78c8 100644
> --- a/arch/powerpc/net/bpf_jit_comp.c
> +++ b/arch/powerpc/net/bpf_jit_comp.c
> @@ -271,3 +271,53 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>
> return fp;
> }
> +
> +/*
> + * The caller should check for (BPF_MODE(code) == BPF_PROBE_MEM) before calling
> + * this function, as this only applies to BPF_PROBE_MEM, for now.
> + */
> +int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct codegen_context *ctx,
> + int insn_idx, int jmp_off, int dst_reg)
> +{
Modify patch 5 to get that function already in
arch/powerpc/net/bpf_jit_comp.c, so that only changes/additions to the
function appear here.
And you can have the prototype ready for the final version in patch 5
instead of adding new arguments here and having to change ppc64 call site.
And in fact you can use them already in patch 5, like jmp_off.
> + off_t offset;
> + unsigned long pc;
> + struct exception_table_entry *ex;
> + u32 *fixup;
> +
> + /* Populate extable entries only in the last pass */
> + if (pass != 2)
> + return 0;
> +
> + if (!fp->aux->extable ||
> + WARN_ON_ONCE(ctx->exentry_idx >= fp->aux->num_exentries))
> + return -EINVAL;
> +
> + pc = (unsigned long)&image[insn_idx];
> +
> + fixup = (void *)fp->aux->extable -
> + (fp->aux->num_exentries * BPF_FIXUP_LEN) +
> + (ctx->exentry_idx * BPF_FIXUP_LEN);
Use 2 or 3 for BPF_FIXUP_LEN and multiply by 4 here.
> +
> + fixup[0] = PPC_RAW_LI(dst_reg, 0);
> +#ifdef CONFIG_PPC32
You should use 'if (IS_ENABLED(CONFIG_PPC32)' instead of a #ifdef here.
> + fixup[1] = PPC_RAW_LI(dst_reg - 1, 0); /* clear higher 32-bit register too */
> + fixup[2] = PPC_RAW_BRANCH((long)(pc + jmp_off) - (long)&fixup[2]);
> +#else
> + fixup[1] = PPC_RAW_BRANCH((long)(pc + jmp_off) - (long)&fixup[1]);
> +#endif
Use 2 or 3 for BPF_FIXUP_LEN and you can do
if (IS_ENABLED(CONFIG_PPC32)
fixup[1] = PPC_RAW_LI(dst_reg - 1, 0); /* clear higher 32-bit register
too */
fixup[BPF_FIXUP_LEN - 1] = PPC_RAW_BRANCH((long)(pc + jmp_off) -
(long)&fixup[BPF_FIXUP_LEN - 1]);
> +
> + ex = &fp->aux->extable[ctx->exentry_idx];
> +
> + offset = pc - (long)&ex->insn;
> + if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN))
> + return -ERANGE;
> + ex->insn = offset;
> +
> + offset = (long)fixup - (long)&ex->fixup;
> + if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN))
> + return -ERANGE;
> + ex->fixup = offset;
> +
> + ctx->exentry_idx++;
> + return 0;
> +}
> diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
> index 94641b7be387..c6262289dcc4 100644
> --- a/arch/powerpc/net/bpf_jit_comp32.c
> +++ b/arch/powerpc/net/bpf_jit_comp32.c
> @@ -811,12 +811,16 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
> */
> /* dst = *(u8 *)(ul) (src + off) */
> case BPF_LDX | BPF_MEM | BPF_B:
> + case BPF_LDX | BPF_PROBE_MEM | BPF_B:
> /* dst = *(u16 *)(ul) (src + off) */
> case BPF_LDX | BPF_MEM | BPF_H:
> + case BPF_LDX | BPF_PROBE_MEM | BPF_H:
> /* dst = *(u32 *)(ul) (src + off) */
> case BPF_LDX | BPF_MEM | BPF_W:
> + case BPF_LDX | BPF_PROBE_MEM | BPF_W:
> /* dst = *(u64 *)(ul) (src + off) */
> case BPF_LDX | BPF_MEM | BPF_DW:
> + case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
> switch (size) {
> case BPF_B:
> EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off));
> @@ -835,6 +839,32 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
>
> if ((size != BPF_DW) && !fp->aux->verifier_zext)
> EMIT(PPC_RAW_LI(dst_reg_h, 0));
> +
> + if (BPF_MODE(code) == BPF_PROBE_MEM) {
> + int insn_idx = ctx->idx - 1;
> + int jmp_off = 4;
> +
> + /*
> + * In case of BPF_DW, two lwz instructions are emitted, one
> + * for higher 32-bit and another for lower 32-bit. So, set
> + * ex->insn to the first of the two and jump over both
> + * instructions in fixup.
> + *
> + * Similarly, with !verifier_zext, two instructions are
> + * emitted for BPF_B/H/W case. So, set ex-insn to the
> + * instruction that could fault and skip over both
> + * instructions.
> + */
> + if ((size == BPF_DW) || !fp->aux->verifier_zext) {
No need of ( ) around 'size == BPF_DW'
> + insn_idx -= 1;
> + jmp_off += 4;
> + }
> +
> + ret = bpf_add_extable_entry(fp, image, pass, ctx, insn_idx,
> + jmp_off, dst_reg);
> + if (ret)
> + return ret;
> + }
> break;
>
> /*
> diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
> index eb28dbc67151..10cc9f04843c 100644
> --- a/arch/powerpc/net/bpf_jit_comp64.c
> +++ b/arch/powerpc/net/bpf_jit_comp64.c
> @@ -270,51 +270,6 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32
> /* out: */
> }
>
> -/*
> - * The caller should check for (BPF_MODE(code) == BPF_PROBE_MEM) before calling
> - * this function, as this only applies to BPF_PROBE_MEM, for now.
> - */
> -static int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass,
> - struct codegen_context *ctx, int dst_reg)
> -{
> - off_t offset;
> - unsigned long pc;
> - struct exception_table_entry *ex;
> - u32 *fixup;
> -
> - /* Populate extable entries only in the last pass */
> - if (pass != 2)
> - return 0;
> -
> - if (!fp->aux->extable ||
> - WARN_ON_ONCE(ctx->exentry_idx >= fp->aux->num_exentries))
> - return -EINVAL;
> -
> - pc = (unsigned long)&image[ctx->idx - 1];
> -
> - fixup = (void *)fp->aux->extable -
> - (fp->aux->num_exentries * BPF_FIXUP_LEN) +
> - (ctx->exentry_idx * BPF_FIXUP_LEN);
> -
> - fixup[0] = PPC_RAW_LI(dst_reg, 0);
> - fixup[1] = PPC_RAW_BRANCH((long)(pc + 4) - (long)&fixup[1]);
> -
> - ex = &fp->aux->extable[ctx->exentry_idx];
> -
> - offset = pc - (long)&ex->insn;
> - if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN))
> - return -ERANGE;
> - ex->insn = offset;
> -
> - offset = (long)fixup - (long)&ex->fixup;
> - if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN))
> - return -ERANGE;
> - ex->fixup = offset;
> -
> - ctx->exentry_idx++;
> - return 0;
> -}
> -
> /* Assemble the body code between the prologue & epilogue */
> int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
> u32 *addrs, int pass)
> @@ -811,7 +766,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
> addrs[++i] = ctx->idx * 4;
>
> if (BPF_MODE(code) == BPF_PROBE_MEM) {
> - ret = bpf_add_extable_entry(fp, image, pass, ctx, dst_reg);
> + ret = bpf_add_extable_entry(fp, image, pass, ctx, ctx->idx - 1,
> + 4, dst_reg);
Make that ready in patch 5 so that you don't need to change here in patch 7.
> if (ret)
> return ret;
> }
>
^ permalink raw reply
* Re: [PATCH v2 6/8] bpf ppc64: Add addr > TASK_SIZE_MAX explicit check
From: Christophe Leroy @ 2021-09-17 16:50 UTC (permalink / raw)
To: Hari Bathini, naveen.n.rao, mpe, ast, daniel
Cc: Ravi Bangoria, songliubraving, netdev, john.fastabend, andrii,
kpsingh, paulus, yhs, bpf, linuxppc-dev, kafai
In-Reply-To: <20210917153047.177141-7-hbathini@linux.ibm.com>
Le 17/09/2021 à 17:30, Hari Bathini a écrit :
> From: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
>
> On PPC64 with KUAP enabled, any kernel code which wants to
> access userspace needs to be surrounded by disable-enable KUAP.
> But that is not happening for BPF_PROBE_MEM load instruction.
> So, when BPF program tries to access invalid userspace address,
> page-fault handler considers it as bad KUAP fault:
>
> Kernel attempted to read user page (d0000000) - exploit attempt? (uid: 0)
>
> Considering the fact that PTR_TO_BTF_ID (which uses BPF_PROBE_MEM
> mode) could either be a valid kernel pointer or NULL but should
> never be a pointer to userspace address, execute BPF_PROBE_MEM load
> only if addr > TASK_SIZE_MAX, otherwise set dst_reg=0 and move on.
You should do like copy_from_kernel_nofault_allowed() and use the same
criterias as is_kernel_addr() instead of using TASK_SIZE_MAX.
>
> This will catch NULL, valid or invalid userspace pointers. Only bad
> kernel pointer will be handled by BPF exception table.
>
> [Alexei suggested for x86]
> Suggested-by: Alexei Starovoitov <ast@kernel.org>
> Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
> Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
> ---
>
> Changes in v2:
> * Refactored the code based on Christophe's comments.
>
>
> arch/powerpc/net/bpf_jit_comp64.c | 23 +++++++++++++++++++++++
> 1 file changed, 23 insertions(+)
>
> diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
> index 2fc10995f243..eb28dbc67151 100644
> --- a/arch/powerpc/net/bpf_jit_comp64.c
> +++ b/arch/powerpc/net/bpf_jit_comp64.c
> @@ -769,6 +769,29 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
> /* dst = *(u64 *)(ul) (src + off) */
> case BPF_LDX | BPF_MEM | BPF_DW:
> case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
> + /*
> + * As PTR_TO_BTF_ID that uses BPF_PROBE_MEM mode could either be a valid
> + * kernel pointer or NULL but not a userspace address, execute BPF_PROBE_MEM
> + * load only if addr > TASK_SIZE_MAX, otherwise set dst_reg=0 and move on.
> + */
> + if (BPF_MODE(code) == BPF_PROBE_MEM) {
> + unsigned int adjusted_idx;
> +
> + /*
> + * Check if 'off' is word aligned because PPC_BPF_LL()
> + * (BPF_DW case) generates two instructions if 'off' is not
> + * word-aligned and one instruction otherwise.
> + */
> + adjusted_idx = ((BPF_SIZE(code) == BPF_DW) && (off & 3)) ? 1 : 0;
No need of ( ) around 'BPF_SIZE(code) == BPF_DW'
> +
> + EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], src_reg, off));
> + PPC_LI64(b2p[TMP_REG_2], TASK_SIZE_MAX);
> + EMIT(PPC_RAW_CMPLD(b2p[TMP_REG_1], b2p[TMP_REG_2]));
> + PPC_BCC(COND_GT, (ctx->idx + 4) * 4);
> + EMIT(PPC_RAW_LI(dst_reg, 0));
> + PPC_JMP((ctx->idx + 2 + adjusted_idx) * 4);
I think it would be more explicit if you drop adjusted_idx and do :
if (BPF_SIZE(code) == BPF_DW) && (off & 3)
PPC_JMP((ctx->idx + 3) * 4);
else
PPC_JMP((ctx->idx + 2) * 4);
> + }
> +
> switch (size) {
> case BPF_B:
> EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off));
>
^ permalink raw reply
* Re: [PATCH v2 8/8] bpf ppc32: Add addr > TASK_SIZE_MAX explicit check
From: Christophe Leroy @ 2021-09-17 16:57 UTC (permalink / raw)
To: Hari Bathini, naveen.n.rao, mpe, ast, daniel
Cc: songliubraving, netdev, john.fastabend, andrii, kpsingh, paulus,
yhs, bpf, linuxppc-dev, kafai
In-Reply-To: <20210917153047.177141-9-hbathini@linux.ibm.com>
Le 17/09/2021 à 17:30, Hari Bathini a écrit :
> With KUAP enabled, any kernel code which wants to access userspace
> needs to be surrounded by disable-enable KUAP. But that is not
> happening for BPF_PROBE_MEM load instruction. Though PPC32 does not
> support read protection, considering the fact that PTR_TO_BTF_ID
> (which uses BPF_PROBE_MEM mode) could either be a valid kernel pointer
> or NULL but should never be a pointer to userspace address, execute
> BPF_PROBE_MEM load only if addr > TASK_SIZE_MAX, otherwise set
> dst_reg=0 and move on.
Same comment as patch 6.
>
> This will catch NULL, valid or invalid userspace pointers. Only bad
> kernel pointer will be handled by BPF exception table.
>
> [Alexei suggested for x86]
> Suggested-by: Alexei Starovoitov <ast@kernel.org>
> Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
> ---
>
> Changes in v2:
> * New patch to handle bad userspace pointers on PPC32.
>
>
> arch/powerpc/net/bpf_jit_comp32.c | 39 +++++++++++++++++++++++++++++++
> 1 file changed, 39 insertions(+)
>
> diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
> index c6262289dcc4..faa8047fcf4a 100644
> --- a/arch/powerpc/net/bpf_jit_comp32.c
> +++ b/arch/powerpc/net/bpf_jit_comp32.c
> @@ -821,6 +821,45 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
> /* dst = *(u64 *)(ul) (src + off) */
> case BPF_LDX | BPF_MEM | BPF_DW:
> case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
> + /*
> + * As PTR_TO_BTF_ID that uses BPF_PROBE_MEM mode could either be a valid
> + * kernel pointer or NULL but not a userspace address, execute BPF_PROBE_MEM
> + * load only if addr > TASK_SIZE_MAX, otherwise set dst_reg=0 and move on.
> + */
> + if (BPF_MODE(code) == BPF_PROBE_MEM) {
> + bool extra_insn_needed = false;
> + unsigned int adjusted_idx;
> +
> + /*
> + * For BPF_DW case, "li reg_h,0" would be needed when
> + * !fp->aux->verifier_zext. Adjust conditional branch'ing
> + * address accordingly.
> + */
> + if ((size == BPF_DW) && !fp->aux->verifier_zext)
> + extra_insn_needed = true;
Don't make it too complicated. That's a fallback that should never
happen, no need to optimise. You can put that instruction all the time
(or put a NOP) and keep the jumps always the same.
> +
> + /*
> + * Need to jump two instructions instead of one for BPF_DW case
> + * as there are two load instructions for dst_reg_h & dst_reg
> + * respectively.
> + */
> + adjusted_idx = (size == BPF_DW) ? 1 : 0;
Same comment as patch 6, drop adjusted_idx and do an if/else directly
for the PPC_JMP.
> +
> + EMIT(PPC_RAW_ADDI(b2p[TMP_REG], src_reg, off));
> + PPC_LI32(_R0, TASK_SIZE_MAX);
> + EMIT(PPC_RAW_CMPLW(b2p[TMP_REG], _R0));
> + PPC_BCC(COND_GT, (ctx->idx + 4 + (extra_insn_needed ? 1 : 0)) * 4);
> + EMIT(PPC_RAW_LI(dst_reg, 0));
> + /*
> + * Note that "li reg_h,0" is emitted for BPF_B/H/W case,
> + * if necessary. So, jump there insted of emitting an
> + * additional "li reg_h,0" instruction.
> + */
> + if (extra_insn_needed)
> + EMIT(PPC_RAW_LI(dst_reg_h, 0));
> + PPC_JMP((ctx->idx + 2 + adjusted_idx) * 4);
> + }
> +
> switch (size) {
> case BPF_B:
> EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off));
>
^ permalink raw reply
* [powerpc:fixes-test] BUILD SUCCESS c006a06508db4841d256d82f42da392d6391f3d9
From: kernel test robot @ 2021-09-17 17:46 UTC (permalink / raw)
To: Michael Ellerman; +Cc: linuxppc-dev
tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git fixes-test
branch HEAD: c006a06508db4841d256d82f42da392d6391f3d9 powerpc/xics: Set the IRQ chip data for the ICS native backend
elapsed time: 1508m
configs tested: 138
configs skipped: 3
The following configs have been built successfully.
More configs may be tested in the coming days.
gcc tested configs:
arm defconfig
arm64 allyesconfig
arm64 defconfig
arm allyesconfig
arm allmodconfig
i386 randconfig-c001-20210916
sh rsk7269_defconfig
powerpc ep8248e_defconfig
riscv nommu_virt_defconfig
powerpc chrp32_defconfig
arm bcm2835_defconfig
arm at91_dt_defconfig
s390 allyesconfig
arm hisi_defconfig
sh se7780_defconfig
arm qcom_defconfig
nios2 10m50_defconfig
arc nsimosci_hs_smp_defconfig
mips ar7_defconfig
arm netwinder_defconfig
sh landisk_defconfig
um x86_64_defconfig
arm mvebu_v7_defconfig
arc nsimosci_defconfig
mips loongson1b_defconfig
sh se7712_defconfig
powerpc warp_defconfig
arm integrator_defconfig
powerpc mpc837x_rdb_defconfig
powerpc tqm8541_defconfig
powerpc klondike_defconfig
powerpc tqm8548_defconfig
powerpc kmeter1_defconfig
powerpc mpc885_ads_defconfig
arm cm_x300_defconfig
sh alldefconfig
m68k amcore_defconfig
arm oxnas_v6_defconfig
ia64 tiger_defconfig
powerpc katmai_defconfig
arm mini2440_defconfig
arm trizeps4_defconfig
xtensa cadence_csp_defconfig
arm alldefconfig
powerpc powernv_defconfig
mips rm200_defconfig
mips cu1000-neo_defconfig
arm ep93xx_defconfig
xtensa audio_kc705_defconfig
xtensa iss_defconfig
sh defconfig
x86_64 allyesconfig
powerpc storcenter_defconfig
arm keystone_defconfig
arm ezx_defconfig
mips ip27_defconfig
arm imx_v6_v7_defconfig
arm am200epdkit_defconfig
m68k mac_defconfig
mips nlm_xlr_defconfig
x86_64 randconfig-c001-20210916
arm randconfig-c002-20210916
ia64 allmodconfig
ia64 allyesconfig
ia64 defconfig
m68k allmodconfig
m68k allyesconfig
m68k defconfig
nios2 defconfig
nds32 allnoconfig
arc allyesconfig
nds32 defconfig
nios2 allyesconfig
csky defconfig
alpha defconfig
alpha allyesconfig
h8300 allyesconfig
arc defconfig
sh allmodconfig
xtensa allyesconfig
parisc allyesconfig
parisc defconfig
s390 allmodconfig
s390 defconfig
sparc allyesconfig
sparc defconfig
i386 defconfig
i386 allyesconfig
mips allyesconfig
mips allmodconfig
powerpc allyesconfig
powerpc allnoconfig
powerpc allmodconfig
x86_64 randconfig-a016-20210916
x86_64 randconfig-a013-20210916
x86_64 randconfig-a012-20210916
x86_64 randconfig-a011-20210916
x86_64 randconfig-a014-20210916
x86_64 randconfig-a015-20210916
i386 randconfig-a016-20210916
i386 randconfig-a015-20210916
i386 randconfig-a011-20210916
i386 randconfig-a012-20210916
i386 randconfig-a013-20210916
i386 randconfig-a014-20210916
riscv randconfig-r042-20210916
s390 randconfig-r044-20210916
arc randconfig-r043-20210916
riscv nommu_k210_defconfig
riscv allnoconfig
riscv defconfig
riscv rv32_defconfig
riscv allyesconfig
riscv allmodconfig
x86_64 rhel-8.3-kselftests
um i386_defconfig
x86_64 defconfig
x86_64 rhel-8.3
x86_64 kexec
clang tested configs:
riscv randconfig-c006-20210916
x86_64 randconfig-c007-20210916
mips randconfig-c004-20210916
powerpc randconfig-c003-20210916
arm randconfig-c002-20210916
i386 randconfig-c001-20210916
s390 randconfig-c005-20210916
x86_64 randconfig-a002-20210916
x86_64 randconfig-a003-20210916
x86_64 randconfig-a006-20210916
x86_64 randconfig-a004-20210916
x86_64 randconfig-a005-20210916
x86_64 randconfig-a001-20210916
i386 randconfig-a004-20210916
i386 randconfig-a005-20210916
i386 randconfig-a006-20210916
i386 randconfig-a002-20210916
i386 randconfig-a003-20210916
i386 randconfig-a001-20210916
hexagon randconfig-r045-20210916
hexagon randconfig-r041-20210916
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
^ permalink raw reply
* [PATCH 3/3] powerpc/inst: Define ppc_inst_t as u32 on PPC32
From: Christophe Leroy @ 2021-09-17 18:37 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
Cc: linuxppc-dev, linux-kernel
In-Reply-To: <9607dfbecab2ecccb712bbd25d2d5da882239d4c.1631903846.git.christophe.leroy@csgroup.eu>
Unlike PPC64 ABI, PPC32 uses the stack to pass a parameter defined
as a struct, even when the struct has a single simple element.
To avoid that, define ppc_inst_t as u32 on PPC32.
Keep it as 'struct ppc_inst' when __CHECKER__ is defined so that
sparse can perform type checking.
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
arch/powerpc/include/asm/inst.h | 15 +++++++++++++--
1 file changed, 13 insertions(+), 2 deletions(-)
diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h
index c4775f4c93a4..2f8d189e4d76 100644
--- a/arch/powerpc/include/asm/inst.h
+++ b/arch/powerpc/include/asm/inst.h
@@ -41,6 +41,7 @@ struct ppc_inst {
#endif
} __packed;
+#if defined(CONFIG_PPC64) || defined(__CHECKER__)
typedef struct ppc_inst ppc_inst_t;
static inline u32 ppc_inst_val(ppc_inst_t x)
@@ -48,13 +49,23 @@ static inline u32 ppc_inst_val(ppc_inst_t x)
return x.val;
}
+#define ppc_inst(x) ((ppc_inst_t){ .val = (x) })
+
+#else
+typedef u32 ppc_inst_t;
+
+static inline u32 ppc_inst_val(ppc_inst_t x)
+{
+ return x;
+}
+#define ppc_inst(x) (x)
+#endif
+
static inline int ppc_inst_primary_opcode(ppc_inst_t x)
{
return ppc_inst_val(x) >> 26;
}
-#define ppc_inst(x) ((ppc_inst_t){ .val = (x) })
-
#ifdef CONFIG_PPC64
#define ppc_inst_prefix(x, y) ((ppc_inst_t){ .val = (x), .suffix = (y) })
--
2.31.1
^ permalink raw reply related
* [PATCH 1/3] powerpc/inst: Refactor ___get_user_instr()
From: Christophe Leroy @ 2021-09-17 18:37 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
Cc: linuxppc-dev, linux-kernel
PPC64 version of ___get_user_instr() can be used for PPC32 as well,
by simply disabling the suffix part with IS_ENABLED(CONFIG_PPC64).
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
arch/powerpc/include/asm/inst.h | 11 +----------
1 file changed, 1 insertion(+), 10 deletions(-)
diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h
index b11c0e2f9639..fea4d46155a9 100644
--- a/arch/powerpc/include/asm/inst.h
+++ b/arch/powerpc/include/asm/inst.h
@@ -4,8 +4,6 @@
#include <asm/ppc-opcode.h>
-#ifdef CONFIG_PPC64
-
#define ___get_user_instr(gu_op, dest, ptr) \
({ \
long __gui_ret; \
@@ -16,7 +14,7 @@
__chk_user_ptr(ptr); \
__gui_ret = gu_op(__prefix, __gui_ptr); \
if (__gui_ret == 0) { \
- if ((__prefix >> 26) == OP_PREFIX) { \
+ if (IS_ENABLED(CONFIG_PPC64) && (__prefix >> 26) == OP_PREFIX) { \
__gui_ret = gu_op(__suffix, __gui_ptr + 1); \
__gui_inst = ppc_inst_prefix(__prefix, __suffix); \
} else { \
@@ -27,13 +25,6 @@
} \
__gui_ret; \
})
-#else /* !CONFIG_PPC64 */
-#define ___get_user_instr(gu_op, dest, ptr) \
-({ \
- __chk_user_ptr(ptr); \
- gu_op((dest).val, (u32 __user *)(ptr)); \
-})
-#endif /* CONFIG_PPC64 */
#define get_user_instr(x, ptr) ___get_user_instr(get_user, x, ptr)
--
2.31.1
^ permalink raw reply related
* [PATCH 2/3] powerpc/inst: Define ppc_inst_t
From: Christophe Leroy @ 2021-09-17 18:37 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
Cc: linuxppc-dev, linux-kernel
In-Reply-To: <9607dfbecab2ecccb712bbd25d2d5da882239d4c.1631903846.git.christophe.leroy@csgroup.eu>
In order to stop using 'struct ppc_inst' on PPC32,
define a ppc_inst_t typedef.
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
arch/powerpc/include/asm/code-patching.h | 18 +++----
arch/powerpc/include/asm/hw_breakpoint.h | 4 +-
arch/powerpc/include/asm/inst.h | 34 ++++++------
arch/powerpc/include/asm/sstep.h | 4 +-
arch/powerpc/kernel/align.c | 4 +-
arch/powerpc/kernel/epapr_paravirt.c | 2 +-
arch/powerpc/kernel/hw_breakpoint.c | 4 +-
.../kernel/hw_breakpoint_constraints.c | 4 +-
arch/powerpc/kernel/kprobes.c | 4 +-
arch/powerpc/kernel/mce_power.c | 2 +-
arch/powerpc/kernel/optprobes.c | 4 +-
arch/powerpc/kernel/process.c | 2 +-
arch/powerpc/kernel/setup_32.c | 2 +-
arch/powerpc/kernel/trace/ftrace.c | 54 +++++++++----------
arch/powerpc/kernel/vecemu.c | 2 +-
arch/powerpc/lib/code-patching.c | 38 ++++++-------
arch/powerpc/lib/feature-fixups.c | 4 +-
arch/powerpc/lib/sstep.c | 4 +-
arch/powerpc/lib/test_emulate_step.c | 10 ++--
arch/powerpc/mm/maccess.c | 2 +-
arch/powerpc/perf/8xx-pmu.c | 2 +-
arch/powerpc/xmon/xmon.c | 14 ++---
arch/powerpc/xmon/xmon_bpts.h | 4 +-
23 files changed, 112 insertions(+), 110 deletions(-)
diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h
index a95f63788c6b..a4ba9a5982fc 100644
--- a/arch/powerpc/include/asm/code-patching.h
+++ b/arch/powerpc/include/asm/code-patching.h
@@ -23,20 +23,20 @@
#define BRANCH_ABSOLUTE 0x2
bool is_offset_in_branch_range(long offset);
-int create_branch(struct ppc_inst *instr, const u32 *addr,
+int create_branch(ppc_inst_t *instr, const u32 *addr,
unsigned long target, int flags);
-int create_cond_branch(struct ppc_inst *instr, const u32 *addr,
+int create_cond_branch(ppc_inst_t *instr, const u32 *addr,
unsigned long target, int flags);
int patch_branch(u32 *addr, unsigned long target, int flags);
-int patch_instruction(u32 *addr, struct ppc_inst instr);
-int raw_patch_instruction(u32 *addr, struct ppc_inst instr);
+int patch_instruction(u32 *addr, ppc_inst_t instr);
+int raw_patch_instruction(u32 *addr, ppc_inst_t instr);
static inline unsigned long patch_site_addr(s32 *site)
{
return (unsigned long)site + *site;
}
-static inline int patch_instruction_site(s32 *site, struct ppc_inst instr)
+static inline int patch_instruction_site(s32 *site, ppc_inst_t instr)
{
return patch_instruction((u32 *)patch_site_addr(site), instr);
}
@@ -57,11 +57,11 @@ static inline int modify_instruction_site(s32 *site, unsigned int clr, unsigned
return modify_instruction((unsigned int *)patch_site_addr(site), clr, set);
}
-int instr_is_relative_branch(struct ppc_inst instr);
-int instr_is_relative_link_branch(struct ppc_inst instr);
+int instr_is_relative_branch(ppc_inst_t instr);
+int instr_is_relative_link_branch(ppc_inst_t instr);
unsigned long branch_target(const u32 *instr);
-int translate_branch(struct ppc_inst *instr, const u32 *dest, const u32 *src);
-extern bool is_conditional_branch(struct ppc_inst instr);
+int translate_branch(ppc_inst_t *instr, const u32 *dest, const u32 *src);
+extern bool is_conditional_branch(ppc_inst_t instr);
#ifdef CONFIG_PPC_BOOK3E_64
void __patch_exception(int exc, unsigned long addr);
#define patch_exception(exc, name) do { \
diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h
index abebfbee5b1c..88053d3c68e6 100644
--- a/arch/powerpc/include/asm/hw_breakpoint.h
+++ b/arch/powerpc/include/asm/hw_breakpoint.h
@@ -56,11 +56,11 @@ static inline int nr_wp_slots(void)
return cpu_has_feature(CPU_FTR_DAWR1) ? 2 : 1;
}
-bool wp_check_constraints(struct pt_regs *regs, struct ppc_inst instr,
+bool wp_check_constraints(struct pt_regs *regs, ppc_inst_t instr,
unsigned long ea, int type, int size,
struct arch_hw_breakpoint *info);
-void wp_get_instr_detail(struct pt_regs *regs, struct ppc_inst *instr,
+void wp_get_instr_detail(struct pt_regs *regs, ppc_inst_t *instr,
int *type, int *size, unsigned long *ea);
#ifdef CONFIG_HAVE_HW_BREAKPOINT
diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h
index fea4d46155a9..c4775f4c93a4 100644
--- a/arch/powerpc/include/asm/inst.h
+++ b/arch/powerpc/include/asm/inst.h
@@ -8,7 +8,7 @@
({ \
long __gui_ret; \
u32 __user *__gui_ptr = (u32 __user *)ptr; \
- struct ppc_inst __gui_inst; \
+ ppc_inst_t __gui_inst; \
unsigned int __prefix, __suffix; \
\
__chk_user_ptr(ptr); \
@@ -41,22 +41,24 @@ struct ppc_inst {
#endif
} __packed;
-static inline u32 ppc_inst_val(struct ppc_inst x)
+typedef struct ppc_inst ppc_inst_t;
+
+static inline u32 ppc_inst_val(ppc_inst_t x)
{
return x.val;
}
-static inline int ppc_inst_primary_opcode(struct ppc_inst x)
+static inline int ppc_inst_primary_opcode(ppc_inst_t x)
{
return ppc_inst_val(x) >> 26;
}
-#define ppc_inst(x) ((struct ppc_inst){ .val = (x) })
+#define ppc_inst(x) ((ppc_inst_t){ .val = (x) })
#ifdef CONFIG_PPC64
-#define ppc_inst_prefix(x, y) ((struct ppc_inst){ .val = (x), .suffix = (y) })
+#define ppc_inst_prefix(x, y) ((ppc_inst_t){ .val = (x), .suffix = (y) })
-static inline u32 ppc_inst_suffix(struct ppc_inst x)
+static inline u32 ppc_inst_suffix(ppc_inst_t x)
{
return x.suffix;
}
@@ -64,14 +66,14 @@ static inline u32 ppc_inst_suffix(struct ppc_inst x)
#else
#define ppc_inst_prefix(x, y) ppc_inst(x)
-static inline u32 ppc_inst_suffix(struct ppc_inst x)
+static inline u32 ppc_inst_suffix(ppc_inst_t x)
{
return 0;
}
#endif /* CONFIG_PPC64 */
-static inline struct ppc_inst ppc_inst_read(const u32 *ptr)
+static inline ppc_inst_t ppc_inst_read(const u32 *ptr)
{
if (IS_ENABLED(CONFIG_PPC64) && (*ptr >> 26) == OP_PREFIX)
return ppc_inst_prefix(*ptr, *(ptr + 1));
@@ -79,17 +81,17 @@ static inline struct ppc_inst ppc_inst_read(const u32 *ptr)
return ppc_inst(*ptr);
}
-static inline bool ppc_inst_prefixed(struct ppc_inst x)
+static inline bool ppc_inst_prefixed(ppc_inst_t x)
{
return IS_ENABLED(CONFIG_PPC64) && ppc_inst_primary_opcode(x) == OP_PREFIX;
}
-static inline struct ppc_inst ppc_inst_swab(struct ppc_inst x)
+static inline ppc_inst_t ppc_inst_swab(ppc_inst_t x)
{
return ppc_inst_prefix(swab32(ppc_inst_val(x)), swab32(ppc_inst_suffix(x)));
}
-static inline bool ppc_inst_equal(struct ppc_inst x, struct ppc_inst y)
+static inline bool ppc_inst_equal(ppc_inst_t x, ppc_inst_t y)
{
if (ppc_inst_val(x) != ppc_inst_val(y))
return false;
@@ -98,7 +100,7 @@ static inline bool ppc_inst_equal(struct ppc_inst x, struct ppc_inst y)
return ppc_inst_suffix(x) == ppc_inst_suffix(y);
}
-static inline int ppc_inst_len(struct ppc_inst x)
+static inline int ppc_inst_len(ppc_inst_t x)
{
return ppc_inst_prefixed(x) ? 8 : 4;
}
@@ -109,14 +111,14 @@ static inline int ppc_inst_len(struct ppc_inst x)
*/
static inline u32 *ppc_inst_next(u32 *location, u32 *value)
{
- struct ppc_inst tmp;
+ ppc_inst_t tmp;
tmp = ppc_inst_read(value);
return (void *)location + ppc_inst_len(tmp);
}
-static inline unsigned long ppc_inst_as_ulong(struct ppc_inst x)
+static inline unsigned long ppc_inst_as_ulong(ppc_inst_t x)
{
if (IS_ENABLED(CONFIG_PPC32))
return ppc_inst_val(x);
@@ -128,7 +130,7 @@ static inline unsigned long ppc_inst_as_ulong(struct ppc_inst x)
#define PPC_INST_STR_LEN sizeof("00000000 00000000")
-static inline char *__ppc_inst_as_str(char str[PPC_INST_STR_LEN], struct ppc_inst x)
+static inline char *__ppc_inst_as_str(char str[PPC_INST_STR_LEN], ppc_inst_t x)
{
if (ppc_inst_prefixed(x))
sprintf(str, "%08x %08x", ppc_inst_val(x), ppc_inst_suffix(x));
@@ -145,6 +147,6 @@ static inline char *__ppc_inst_as_str(char str[PPC_INST_STR_LEN], struct ppc_ins
__str; \
})
-int copy_inst_from_kernel_nofault(struct ppc_inst *inst, u32 *src);
+int copy_inst_from_kernel_nofault(ppc_inst_t *inst, u32 *src);
#endif /* _ASM_POWERPC_INST_H */
diff --git a/arch/powerpc/include/asm/sstep.h b/arch/powerpc/include/asm/sstep.h
index 1df867c2e054..9940f077d581 100644
--- a/arch/powerpc/include/asm/sstep.h
+++ b/arch/powerpc/include/asm/sstep.h
@@ -145,7 +145,7 @@ union vsx_reg {
* otherwise.
*/
extern int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
- struct ppc_inst instr);
+ ppc_inst_t instr);
/*
* Emulate an instruction that can be executed just by updating
@@ -162,7 +162,7 @@ void emulate_update_regs(struct pt_regs *reg, struct instruction_op *op);
* 0 if it could not be emulated, or -1 for an instruction that
* should not be emulated (rfid, mtmsrd clearing MSR_RI, etc.).
*/
-extern int emulate_step(struct pt_regs *regs, struct ppc_inst instr);
+extern int emulate_step(struct pt_regs *regs, ppc_inst_t instr);
/*
* Emulate a load or store instruction by reading/writing the
diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c
index bbb4181621dd..c0c9ce1dbfa6 100644
--- a/arch/powerpc/kernel/align.c
+++ b/arch/powerpc/kernel/align.c
@@ -105,7 +105,7 @@ static struct aligninfo spe_aligninfo[32] = {
* so we don't need the address swizzling.
*/
static int emulate_spe(struct pt_regs *regs, unsigned int reg,
- struct ppc_inst ppc_instr)
+ ppc_inst_t ppc_instr)
{
union {
u64 ll;
@@ -300,7 +300,7 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg,
int fix_alignment(struct pt_regs *regs)
{
- struct ppc_inst instr;
+ ppc_inst_t instr;
struct instruction_op op;
int r, type;
diff --git a/arch/powerpc/kernel/epapr_paravirt.c b/arch/powerpc/kernel/epapr_paravirt.c
index 93b0f3ec8fb0..d4b8aff20815 100644
--- a/arch/powerpc/kernel/epapr_paravirt.c
+++ b/arch/powerpc/kernel/epapr_paravirt.c
@@ -37,7 +37,7 @@ static int __init early_init_dt_scan_epapr(unsigned long node,
return -1;
for (i = 0; i < (len / 4); i++) {
- struct ppc_inst inst = ppc_inst(be32_to_cpu(insts[i]));
+ ppc_inst_t inst = ppc_inst(be32_to_cpu(insts[i]));
patch_instruction(epapr_hypercall_start + i, inst);
#if !defined(CONFIG_64BIT) || defined(CONFIG_PPC_BOOK3E_64)
patch_instruction(epapr_ev_idle_start + i, inst);
diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index 91a3be14808b..2669f80b3a49 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -523,7 +523,7 @@ static void larx_stcx_err(struct perf_event *bp, struct arch_hw_breakpoint *info
static bool stepping_handler(struct pt_regs *regs, struct perf_event **bp,
struct arch_hw_breakpoint **info, int *hit,
- struct ppc_inst instr)
+ ppc_inst_t instr)
{
int i;
int stepped;
@@ -616,7 +616,7 @@ int hw_breakpoint_handler(struct die_args *args)
int hit[HBP_NUM_MAX] = {0};
int nr_hit = 0;
bool ptrace_bp = false;
- struct ppc_inst instr = ppc_inst(0);
+ ppc_inst_t instr = ppc_inst(0);
int type = 0;
int size = 0;
unsigned long ea;
diff --git a/arch/powerpc/kernel/hw_breakpoint_constraints.c b/arch/powerpc/kernel/hw_breakpoint_constraints.c
index 675d1f66ab72..484d285c4b86 100644
--- a/arch/powerpc/kernel/hw_breakpoint_constraints.c
+++ b/arch/powerpc/kernel/hw_breakpoint_constraints.c
@@ -80,7 +80,7 @@ static bool check_dawrx_constraints(struct pt_regs *regs, int type,
* Return true if the event is valid wrt dawr configuration,
* including extraneous exception. Otherwise return false.
*/
-bool wp_check_constraints(struct pt_regs *regs, struct ppc_inst instr,
+bool wp_check_constraints(struct pt_regs *regs, ppc_inst_t instr,
unsigned long ea, int type, int size,
struct arch_hw_breakpoint *info)
{
@@ -136,7 +136,7 @@ static int cache_op_size(void)
#endif
}
-void wp_get_instr_detail(struct pt_regs *regs, struct ppc_inst *instr,
+void wp_get_instr_detail(struct pt_regs *regs, ppc_inst_t *instr,
int *type, int *size, unsigned long *ea)
{
struct instruction_op op;
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index 7a7cd6bda53e..89173338e2fa 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -124,7 +124,7 @@ int arch_prepare_kprobe(struct kprobe *p)
{
int ret = 0;
struct kprobe *prev;
- struct ppc_inst insn = ppc_inst_read(p->addr);
+ ppc_inst_t insn = ppc_inst_read(p->addr);
if ((unsigned long)p->addr & 0x03) {
printk("Attempt to register kprobe at an unaligned address\n");
@@ -244,7 +244,7 @@ NOKPROBE_SYMBOL(arch_prepare_kretprobe);
static int try_to_emulate(struct kprobe *p, struct pt_regs *regs)
{
int ret;
- struct ppc_inst insn = ppc_inst_read(p->ainsn.insn);
+ ppc_inst_t insn = ppc_inst_read(p->ainsn.insn);
/* regs->nip is also adjusted if emulate_step returns 1 */
ret = emulate_step(regs, insn);
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index c2f55fe7092d..f2dd4daeddf8 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -455,7 +455,7 @@ static int mce_find_instr_ea_and_phys(struct pt_regs *regs, uint64_t *addr,
* in real-mode is tricky and can lead to recursive
* faults
*/
- struct ppc_inst instr;
+ ppc_inst_t instr;
unsigned long pfn, instr_addr;
struct instruction_op op;
struct pt_regs tmp = *regs;
diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c
index c79899abcec8..04b540c12800 100644
--- a/arch/powerpc/kernel/optprobes.c
+++ b/arch/powerpc/kernel/optprobes.c
@@ -153,7 +153,7 @@ static void patch_imm_load_insns(unsigned long val, int reg, kprobe_opcode_t *ad
int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p)
{
- struct ppc_inst branch_op_callback, branch_emulate_step, temp;
+ ppc_inst_t branch_op_callback, branch_emulate_step, temp;
unsigned long op_callback_addr, emulate_step_addr;
kprobe_opcode_t *buff;
long b_offset;
@@ -269,7 +269,7 @@ int arch_check_optimized_kprobe(struct optimized_kprobe *op)
void arch_optimize_kprobes(struct list_head *oplist)
{
- struct ppc_inst instr;
+ ppc_inst_t instr;
struct optimized_kprobe *op;
struct optimized_kprobe *tmp;
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 50436b52c213..bcb7bfaea011 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -628,7 +628,7 @@ static void do_break_handler(struct pt_regs *regs)
{
struct arch_hw_breakpoint null_brk = {0};
struct arch_hw_breakpoint *info;
- struct ppc_inst instr = ppc_inst(0);
+ ppc_inst_t instr = ppc_inst(0);
int type = 0;
int size = 0;
unsigned long ea;
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 7ec5c47fce0e..4017b05ef643 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -75,7 +75,7 @@ EXPORT_SYMBOL(DMA_MODE_WRITE);
notrace void __init machine_init(u64 dt_ptr)
{
u32 *addr = (u32 *)patch_site_addr(&patch__memset_nocache);
- struct ppc_inst insn;
+ ppc_inst_t insn;
/* Configure static keys first, now that we're relocated. */
setup_feature_keys();
diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c
index d89c5df4f206..f293294ef5da 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -41,10 +41,10 @@
#define NUM_FTRACE_TRAMPS 8
static unsigned long ftrace_tramps[NUM_FTRACE_TRAMPS];
-static struct ppc_inst
+static ppc_inst_t
ftrace_call_replace(unsigned long ip, unsigned long addr, int link)
{
- struct ppc_inst op;
+ ppc_inst_t op;
addr = ppc_function_entry((void *)addr);
@@ -55,9 +55,9 @@ ftrace_call_replace(unsigned long ip, unsigned long addr, int link)
}
static int
-ftrace_modify_code(unsigned long ip, struct ppc_inst old, struct ppc_inst new)
+ftrace_modify_code(unsigned long ip, ppc_inst_t old, ppc_inst_t new)
{
- struct ppc_inst replaced;
+ ppc_inst_t replaced;
/*
* Note:
@@ -90,24 +90,24 @@ ftrace_modify_code(unsigned long ip, struct ppc_inst old, struct ppc_inst new)
*/
static int test_24bit_addr(unsigned long ip, unsigned long addr)
{
- struct ppc_inst op;
+ ppc_inst_t op;
addr = ppc_function_entry((void *)addr);
/* use the create_branch to verify that this offset can be branched */
return create_branch(&op, (u32 *)ip, addr, 0) == 0;
}
-static int is_bl_op(struct ppc_inst op)
+static int is_bl_op(ppc_inst_t op)
{
return (ppc_inst_val(op) & 0xfc000003) == 0x48000001;
}
-static int is_b_op(struct ppc_inst op)
+static int is_b_op(ppc_inst_t op)
{
return (ppc_inst_val(op) & 0xfc000003) == 0x48000000;
}
-static unsigned long find_bl_target(unsigned long ip, struct ppc_inst op)
+static unsigned long find_bl_target(unsigned long ip, ppc_inst_t op)
{
int offset;
@@ -127,7 +127,7 @@ __ftrace_make_nop(struct module *mod,
{
unsigned long entry, ptr, tramp;
unsigned long ip = rec->ip;
- struct ppc_inst op, pop;
+ ppc_inst_t op, pop;
/* read where this goes */
if (copy_inst_from_kernel_nofault(&op, (void *)ip)) {
@@ -221,7 +221,7 @@ static int
__ftrace_make_nop(struct module *mod,
struct dyn_ftrace *rec, unsigned long addr)
{
- struct ppc_inst op;
+ ppc_inst_t op;
unsigned int jmp[4];
unsigned long ip = rec->ip;
unsigned long tramp;
@@ -291,7 +291,7 @@ __ftrace_make_nop(struct module *mod,
static unsigned long find_ftrace_tramp(unsigned long ip)
{
int i;
- struct ppc_inst instr;
+ ppc_inst_t instr;
/*
* We have the compiler generated long_branch tramps at the end
@@ -329,9 +329,9 @@ static int add_ftrace_tramp(unsigned long tramp)
static int setup_mcount_compiler_tramp(unsigned long tramp)
{
int i;
- struct ppc_inst op;
+ ppc_inst_t op;
unsigned long ptr;
- struct ppc_inst instr;
+ ppc_inst_t instr;
static unsigned long ftrace_plt_tramps[NUM_FTRACE_TRAMPS];
/* Is this a known long jump tramp? */
@@ -396,7 +396,7 @@ static int setup_mcount_compiler_tramp(unsigned long tramp)
static int __ftrace_make_nop_kernel(struct dyn_ftrace *rec, unsigned long addr)
{
unsigned long tramp, ip = rec->ip;
- struct ppc_inst op;
+ ppc_inst_t op;
/* Read where this goes */
if (copy_inst_from_kernel_nofault(&op, (void *)ip)) {
@@ -436,7 +436,7 @@ int ftrace_make_nop(struct module *mod,
struct dyn_ftrace *rec, unsigned long addr)
{
unsigned long ip = rec->ip;
- struct ppc_inst old, new;
+ ppc_inst_t old, new;
/*
* If the calling address is more that 24 bits away,
@@ -489,7 +489,7 @@ int ftrace_make_nop(struct module *mod,
*/
#ifndef CONFIG_MPROFILE_KERNEL
static int
-expected_nop_sequence(void *ip, struct ppc_inst op0, struct ppc_inst op1)
+expected_nop_sequence(void *ip, ppc_inst_t op0, ppc_inst_t op1)
{
/*
* We expect to see:
@@ -507,7 +507,7 @@ expected_nop_sequence(void *ip, struct ppc_inst op0, struct ppc_inst op1)
}
#else
static int
-expected_nop_sequence(void *ip, struct ppc_inst op0, struct ppc_inst op1)
+expected_nop_sequence(void *ip, ppc_inst_t op0, ppc_inst_t op1)
{
/* look for patched "NOP" on ppc64 with -mprofile-kernel */
if (!ppc_inst_equal(op0, ppc_inst(PPC_RAW_NOP())))
@@ -519,8 +519,8 @@ expected_nop_sequence(void *ip, struct ppc_inst op0, struct ppc_inst op1)
static int
__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
{
- struct ppc_inst op[2];
- struct ppc_inst instr;
+ ppc_inst_t op[2];
+ ppc_inst_t instr;
void *ip = (void *)rec->ip;
unsigned long entry, ptr, tramp;
struct module *mod = rec->arch.mod;
@@ -588,7 +588,7 @@ static int
__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
{
int err;
- struct ppc_inst op;
+ ppc_inst_t op;
u32 *ip = (u32 *)rec->ip;
/* read where this goes */
@@ -626,7 +626,7 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr)
{
- struct ppc_inst op;
+ ppc_inst_t op;
void *ip = (void *)rec->ip;
unsigned long tramp, entry, ptr;
@@ -674,7 +674,7 @@ static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr)
int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
{
unsigned long ip = rec->ip;
- struct ppc_inst old, new;
+ ppc_inst_t old, new;
/*
* If the calling address is more that 24 bits away,
@@ -713,7 +713,7 @@ static int
__ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
unsigned long addr)
{
- struct ppc_inst op;
+ ppc_inst_t op;
unsigned long ip = rec->ip;
unsigned long entry, ptr, tramp;
struct module *mod = rec->arch.mod;
@@ -807,7 +807,7 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
unsigned long addr)
{
unsigned long ip = rec->ip;
- struct ppc_inst old, new;
+ ppc_inst_t old, new;
/*
* If the calling address is more that 24 bits away,
@@ -847,7 +847,7 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
int ftrace_update_ftrace_func(ftrace_func_t func)
{
unsigned long ip = (unsigned long)(&ftrace_call);
- struct ppc_inst old, new;
+ ppc_inst_t old, new;
int ret;
old = ppc_inst_read((u32 *)&ftrace_call);
@@ -932,7 +932,7 @@ int ftrace_enable_ftrace_graph_caller(void)
unsigned long ip = (unsigned long)(&ftrace_graph_call);
unsigned long addr = (unsigned long)(&ftrace_graph_caller);
unsigned long stub = (unsigned long)(&ftrace_graph_stub);
- struct ppc_inst old, new;
+ ppc_inst_t old, new;
old = ftrace_call_replace(ip, stub, 0);
new = ftrace_call_replace(ip, addr, 0);
@@ -945,7 +945,7 @@ int ftrace_disable_ftrace_graph_caller(void)
unsigned long ip = (unsigned long)(&ftrace_graph_call);
unsigned long addr = (unsigned long)(&ftrace_graph_caller);
unsigned long stub = (unsigned long)(&ftrace_graph_stub);
- struct ppc_inst old, new;
+ ppc_inst_t old, new;
old = ftrace_call_replace(ip, addr, 0);
new = ftrace_call_replace(ip, stub, 0);
diff --git a/arch/powerpc/kernel/vecemu.c b/arch/powerpc/kernel/vecemu.c
index ae632569446f..fd9432875ebc 100644
--- a/arch/powerpc/kernel/vecemu.c
+++ b/arch/powerpc/kernel/vecemu.c
@@ -261,7 +261,7 @@ static unsigned int rfin(unsigned int x)
int emulate_altivec(struct pt_regs *regs)
{
- struct ppc_inst instr;
+ ppc_inst_t instr;
unsigned int i, word;
unsigned int va, vb, vc, vd;
vector128 *vrs;
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index f9a3019e37b4..d1fd21d254a1 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -18,7 +18,7 @@
#include <asm/setup.h>
#include <asm/inst.h>
-static int __patch_instruction(u32 *exec_addr, struct ppc_inst instr, u32 *patch_addr)
+static int __patch_instruction(u32 *exec_addr, ppc_inst_t instr, u32 *patch_addr)
{
if (!ppc_inst_prefixed(instr)) {
u32 val = ppc_inst_val(instr);
@@ -39,7 +39,7 @@ static int __patch_instruction(u32 *exec_addr, struct ppc_inst instr, u32 *patch
return -EFAULT;
}
-int raw_patch_instruction(u32 *addr, struct ppc_inst instr)
+int raw_patch_instruction(u32 *addr, ppc_inst_t instr)
{
return __patch_instruction(addr, instr, addr);
}
@@ -141,7 +141,7 @@ static inline int unmap_patch_area(unsigned long addr)
return 0;
}
-static int do_patch_instruction(u32 *addr, struct ppc_inst instr)
+static int do_patch_instruction(u32 *addr, ppc_inst_t instr)
{
int err;
u32 *patch_addr = NULL;
@@ -180,14 +180,14 @@ static int do_patch_instruction(u32 *addr, struct ppc_inst instr)
}
#else /* !CONFIG_STRICT_KERNEL_RWX */
-static int do_patch_instruction(u32 *addr, struct ppc_inst instr)
+static int do_patch_instruction(u32 *addr, ppc_inst_t instr)
{
return raw_patch_instruction(addr, instr);
}
#endif /* CONFIG_STRICT_KERNEL_RWX */
-int patch_instruction(u32 *addr, struct ppc_inst instr)
+int patch_instruction(u32 *addr, ppc_inst_t instr)
{
/* Make sure we aren't patching a freed init section */
if (init_mem_is_free && init_section_contains(addr, 4)) {
@@ -200,7 +200,7 @@ NOKPROBE_SYMBOL(patch_instruction);
int patch_branch(u32 *addr, unsigned long target, int flags)
{
- struct ppc_inst instr;
+ ppc_inst_t instr;
create_branch(&instr, addr, target, flags);
return patch_instruction(addr, instr);
@@ -232,7 +232,7 @@ bool is_offset_in_branch_range(long offset)
* Helper to check if a given instruction is a conditional branch
* Derived from the conditional checks in analyse_instr()
*/
-bool is_conditional_branch(struct ppc_inst instr)
+bool is_conditional_branch(ppc_inst_t instr)
{
unsigned int opcode = ppc_inst_primary_opcode(instr);
@@ -250,7 +250,7 @@ bool is_conditional_branch(struct ppc_inst instr)
}
NOKPROBE_SYMBOL(is_conditional_branch);
-int create_branch(struct ppc_inst *instr, const u32 *addr,
+int create_branch(ppc_inst_t *instr, const u32 *addr,
unsigned long target, int flags)
{
long offset;
@@ -270,7 +270,7 @@ int create_branch(struct ppc_inst *instr, const u32 *addr,
return 0;
}
-int create_cond_branch(struct ppc_inst *instr, const u32 *addr,
+int create_cond_branch(ppc_inst_t *instr, const u32 *addr,
unsigned long target, int flags)
{
long offset;
@@ -289,22 +289,22 @@ int create_cond_branch(struct ppc_inst *instr, const u32 *addr,
return 0;
}
-static unsigned int branch_opcode(struct ppc_inst instr)
+static unsigned int branch_opcode(ppc_inst_t instr)
{
return ppc_inst_primary_opcode(instr) & 0x3F;
}
-static int instr_is_branch_iform(struct ppc_inst instr)
+static int instr_is_branch_iform(ppc_inst_t instr)
{
return branch_opcode(instr) == 18;
}
-static int instr_is_branch_bform(struct ppc_inst instr)
+static int instr_is_branch_bform(ppc_inst_t instr)
{
return branch_opcode(instr) == 16;
}
-int instr_is_relative_branch(struct ppc_inst instr)
+int instr_is_relative_branch(ppc_inst_t instr)
{
if (ppc_inst_val(instr) & BRANCH_ABSOLUTE)
return 0;
@@ -312,7 +312,7 @@ int instr_is_relative_branch(struct ppc_inst instr)
return instr_is_branch_iform(instr) || instr_is_branch_bform(instr);
}
-int instr_is_relative_link_branch(struct ppc_inst instr)
+int instr_is_relative_link_branch(ppc_inst_t instr)
{
return instr_is_relative_branch(instr) && (ppc_inst_val(instr) & BRANCH_SET_LINK);
}
@@ -359,7 +359,7 @@ unsigned long branch_target(const u32 *instr)
return 0;
}
-int translate_branch(struct ppc_inst *instr, const u32 *dest, const u32 *src)
+int translate_branch(ppc_inst_t *instr, const u32 *dest, const u32 *src)
{
unsigned long target;
target = branch_target(src);
@@ -412,7 +412,7 @@ static void __init test_trampoline(void)
static void __init test_branch_iform(void)
{
int err;
- struct ppc_inst instr;
+ ppc_inst_t instr;
u32 tmp[2];
u32 *iptr = tmp;
unsigned long addr = (unsigned long)tmp;
@@ -494,7 +494,7 @@ static void __init test_create_function_call(void)
{
u32 *iptr;
unsigned long dest;
- struct ppc_inst instr;
+ ppc_inst_t instr;
/* Check we can create a function call */
iptr = (u32 *)ppc_function_entry(test_trampoline);
@@ -508,7 +508,7 @@ static void __init test_branch_bform(void)
{
int err;
unsigned long addr;
- struct ppc_inst instr;
+ ppc_inst_t instr;
u32 tmp[2];
u32 *iptr = tmp;
unsigned int flags;
@@ -586,7 +586,7 @@ static void __init test_translate_branch(void)
{
unsigned long addr;
void *p, *q;
- struct ppc_inst instr;
+ ppc_inst_t instr;
void *buf;
buf = vmalloc(PAGE_ALIGN(0x2000000 + 1));
diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index cda17bee5afe..e4502ab949ae 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -47,7 +47,7 @@ static u32 *calc_addr(struct fixup_entry *fcur, long offset)
static int patch_alt_instruction(u32 *src, u32 *dest, u32 *alt_start, u32 *alt_end)
{
int err;
- struct ppc_inst instr;
+ ppc_inst_t instr;
instr = ppc_inst_read(src);
@@ -613,7 +613,7 @@ void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end)
static void do_final_fixups(void)
{
#if defined(CONFIG_PPC64) && defined(CONFIG_RELOCATABLE)
- struct ppc_inst inst;
+ ppc_inst_t inst;
u32 *src, *dest, *end;
if (PHYSICAL_START == 0)
diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index d8d5f901cee1..da8cd6a21ae2 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -1271,7 +1271,7 @@ static nokprobe_inline int trap_compare(long v1, long v2)
* otherwise.
*/
int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
- struct ppc_inst instr)
+ ppc_inst_t instr)
{
#ifdef CONFIG_PPC64
unsigned int suffixopcode, prefixtype, prefix_r;
@@ -3495,7 +3495,7 @@ NOKPROBE_SYMBOL(emulate_loadstore);
* or -1 if the instruction is one that should not be stepped,
* such as an rfid, or a mtmsrd that would clear MSR_RI.
*/
-int emulate_step(struct pt_regs *regs, struct ppc_inst instr)
+int emulate_step(struct pt_regs *regs, ppc_inst_t instr)
{
struct instruction_op op;
int r, err, type;
diff --git a/arch/powerpc/lib/test_emulate_step.c b/arch/powerpc/lib/test_emulate_step.c
index 8b4f6b3e96c4..4f141daafcff 100644
--- a/arch/powerpc/lib/test_emulate_step.c
+++ b/arch/powerpc/lib/test_emulate_step.c
@@ -792,7 +792,7 @@ static void __init test_lxvpx_stxvpx(void)
#ifdef CONFIG_VSX
static void __init test_plxvp_pstxvp(void)
{
- struct ppc_inst instr;
+ ppc_inst_t instr;
struct pt_regs regs;
union {
vector128 a;
@@ -906,7 +906,7 @@ struct compute_test {
struct {
char *descr;
unsigned long flags;
- struct ppc_inst instr;
+ ppc_inst_t instr;
struct pt_regs regs;
} subtests[MAX_SUBTESTS + 1];
};
@@ -1600,7 +1600,7 @@ static struct compute_test compute_tests[] = {
};
static int __init emulate_compute_instr(struct pt_regs *regs,
- struct ppc_inst instr,
+ ppc_inst_t instr,
bool negative)
{
int analysed;
@@ -1627,7 +1627,7 @@ static int __init emulate_compute_instr(struct pt_regs *regs,
}
static int __init execute_compute_instr(struct pt_regs *regs,
- struct ppc_inst instr)
+ ppc_inst_t instr)
{
extern int exec_instr(struct pt_regs *regs);
@@ -1658,7 +1658,7 @@ static void __init run_tests_compute(void)
struct compute_test *test;
struct pt_regs *regs, exp, got;
unsigned int i, j, k;
- struct ppc_inst instr;
+ ppc_inst_t instr;
bool ignore_gpr, ignore_xer, ignore_ccr, passed, rc, negative;
for (i = 0; i < ARRAY_SIZE(compute_tests); i++) {
diff --git a/arch/powerpc/mm/maccess.c b/arch/powerpc/mm/maccess.c
index aad7c47e0030..5abae96b2b46 100644
--- a/arch/powerpc/mm/maccess.c
+++ b/arch/powerpc/mm/maccess.c
@@ -12,7 +12,7 @@ bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size)
return is_kernel_addr((unsigned long)unsafe_src);
}
-int copy_inst_from_kernel_nofault(struct ppc_inst *inst, u32 *src)
+int copy_inst_from_kernel_nofault(ppc_inst_t *inst, u32 *src)
{
unsigned int val, suffix;
int err;
diff --git a/arch/powerpc/perf/8xx-pmu.c b/arch/powerpc/perf/8xx-pmu.c
index f970d1510d3d..4738c4dbf567 100644
--- a/arch/powerpc/perf/8xx-pmu.c
+++ b/arch/powerpc/perf/8xx-pmu.c
@@ -153,7 +153,7 @@ static void mpc8xx_pmu_read(struct perf_event *event)
static void mpc8xx_pmu_del(struct perf_event *event, int flags)
{
- struct ppc_inst insn = ppc_inst(PPC_RAW_MFSPR(10, SPRN_SPRG_SCRATCH2));
+ ppc_inst_t insn = ppc_inst(PPC_RAW_MFSPR(10, SPRN_SPRG_SCRATCH2));
mpc8xx_pmu_read(event);
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index dd8241c009e5..da9e89485d7a 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -125,7 +125,7 @@ static unsigned bpinstr = 0x7fe00008; /* trap */
static int cmds(struct pt_regs *);
static int mread(unsigned long, void *, int);
static int mwrite(unsigned long, void *, int);
-static int mread_instr(unsigned long, struct ppc_inst *);
+static int mread_instr(unsigned long, ppc_inst_t *);
static int handle_fault(struct pt_regs *);
static void byterev(unsigned char *, int);
static void memex(void);
@@ -908,7 +908,7 @@ static struct bpt *new_breakpoint(unsigned long a)
static void insert_bpts(void)
{
int i;
- struct ppc_inst instr, instr2;
+ ppc_inst_t instr, instr2;
struct bpt *bp, *bp2;
bp = bpts;
@@ -988,7 +988,7 @@ static void remove_bpts(void)
{
int i;
struct bpt *bp;
- struct ppc_inst instr;
+ ppc_inst_t instr;
bp = bpts;
for (i = 0; i < NBPTS; ++i, ++bp) {
@@ -1204,7 +1204,7 @@ static int do_step(struct pt_regs *regs)
*/
static int do_step(struct pt_regs *regs)
{
- struct ppc_inst instr;
+ ppc_inst_t instr;
int stepped;
force_enable_xmon();
@@ -1459,7 +1459,7 @@ csum(void)
*/
static long check_bp_loc(unsigned long addr)
{
- struct ppc_inst instr;
+ ppc_inst_t instr;
addr &= ~3;
if (!is_kernel_addr(addr)) {
@@ -2300,7 +2300,7 @@ mwrite(unsigned long adrs, void *buf, int size)
}
static int
-mread_instr(unsigned long adrs, struct ppc_inst *instr)
+mread_instr(unsigned long adrs, ppc_inst_t *instr)
{
volatile int n;
@@ -3020,7 +3020,7 @@ generic_inst_dump(unsigned long adr, long count, int praddr,
{
int nr, dotted;
unsigned long first_adr;
- struct ppc_inst inst, last_inst = ppc_inst(0);
+ ppc_inst_t inst, last_inst = ppc_inst(0);
dotted = 0;
for (first_adr = adr; count > 0; --count, adr += ppc_inst_len(inst)) {
diff --git a/arch/powerpc/xmon/xmon_bpts.h b/arch/powerpc/xmon/xmon_bpts.h
index 57e6fb03de48..377068f52edb 100644
--- a/arch/powerpc/xmon/xmon_bpts.h
+++ b/arch/powerpc/xmon/xmon_bpts.h
@@ -5,8 +5,8 @@
#define NBPTS 256
#ifndef __ASSEMBLY__
#include <asm/inst.h>
-#define BPT_SIZE (sizeof(struct ppc_inst) * 2)
-#define BPT_WORDS (BPT_SIZE / sizeof(struct ppc_inst))
+#define BPT_SIZE (sizeof(ppc_inst_t) * 2)
+#define BPT_WORDS (BPT_SIZE / sizeof(ppc_inst_t))
extern unsigned int bpt_table[NBPTS * BPT_WORDS];
#endif /* __ASSEMBLY__ */
--
2.31.1
^ permalink raw reply related
* Re: [PATCH v5 6/6] sched/fair: Consider SMT in ASYM_PACKING load balance
From: Peter Zijlstra @ 2021-09-17 18:46 UTC (permalink / raw)
To: Vincent Guittot
Cc: Juri Lelli, Aubrey Li, Srikar Dronamraju, Ravi V. Shankar,
Ricardo Neri, Ben Segall, Srinivas Pandruvada,
Joel Fernandes (Google), Ingo Molnar, Rafael J . Wysocki,
Steven Rostedt, Mel Gorman, Len Brown, Ricardo Neri,
Nicholas Piggin, Aubrey Li, Dietmar Eggemann, Tim Chen,
Quentin Perret, linuxppc-dev, linux-kernel,
Daniel Bristot de Oliveira
In-Reply-To: <CAKfTPtCh72m86pbT5vY_rWzU79Q9NP9t6mcrO8ewbZkBJdN03Q@mail.gmail.com>
On Fri, Sep 17, 2021 at 05:25:02PM +0200, Vincent Guittot wrote:
> With the removal of the condition !sds->local_stat.sum_nr_running
> which seems useless because dst_cpu is idle and not SMT, this patch
> looks good to me
I've made it look like this. Thanks!
---
Subject: sched/fair: Consider SMT in ASYM_PACKING load balance
From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
Date: Fri, 10 Sep 2021 18:18:19 -0700
From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
When deciding to pull tasks in ASYM_PACKING, it is necessary not only to
check for the idle state of the destination CPU, dst_cpu, but also of
its SMT siblings.
If dst_cpu is idle but its SMT siblings are busy, performance suffers
if it pulls tasks from a medium priority CPU that does not have SMT
siblings.
Implement asym_smt_can_pull_tasks() to inspect the state of the SMT
siblings of both dst_cpu and the CPUs in the candidate busiest group.
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Reviewed-by: Len Brown <len.brown@intel.com>
Link: https://lkml.kernel.org/r/20210911011819.12184-7-ricardo.neri-calderon@linux.intel.com
---
kernel/sched/fair.c | 92 ++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 92 insertions(+)
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8538,10 +8538,96 @@ group_type group_classify(unsigned int i
return group_has_spare;
}
+/**
+ * asym_smt_can_pull_tasks - Check whether the load balancing CPU can pull tasks
+ * @dst_cpu: Destination CPU of the load balancing
+ * @sds: Load-balancing data with statistics of the local group
+ * @sgs: Load-balancing statistics of the candidate busiest group
+ * @sg: The candidate busiest group
+ *
+ * Check the state of the SMT siblings of both @sds::local and @sg and decide
+ * if @dst_cpu can pull tasks.
+ *
+ * If @dst_cpu does not have SMT siblings, it can pull tasks if two or more of
+ * the SMT siblings of @sg are busy. If only one CPU in @sg is busy, pull tasks
+ * only if @dst_cpu has higher priority.
+ *
+ * If both @dst_cpu and @sg have SMT siblings, and @sg has exactly one more
+ * busy CPU than @sds::local, let @dst_cpu pull tasks if it has higher priority.
+ * Bigger imbalances in the number of busy CPUs will be dealt with in
+ * update_sd_pick_busiest().
+ *
+ * If @sg does not have SMT siblings, only pull tasks if all of the SMT siblings
+ * of @dst_cpu are idle and @sg has lower priority.
+ */
+static bool asym_smt_can_pull_tasks(int dst_cpu, struct sd_lb_stats *sds,
+ struct sg_lb_stats *sgs,
+ struct sched_group *sg)
+{
+#ifdef CONFIG_SCHED_SMT
+ bool local_is_smt, sg_is_smt;
+ int sg_busy_cpus;
+
+ local_is_smt = sds->local->flags & SD_SHARE_CPUCAPACITY;
+ sg_is_smt = sg->flags & SD_SHARE_CPUCAPACITY;
+
+ sg_busy_cpus = sgs->group_weight - sgs->idle_cpus;
+
+ if (!local_is_smt) {
+ /*
+ * If we are here, @dst_cpu is idle and does not have SMT
+ * siblings. Pull tasks if candidate group has two or more
+ * busy CPUs.
+ */
+ if (sg_busy_cpus >= 2) /* implies sg_is_smt */
+ return true;
+
+ /*
+ * @dst_cpu does not have SMT siblings. @sg may have SMT
+ * siblings and only one is busy. In such case, @dst_cpu
+ * can help if it has higher priority and is idle (i.e.,
+ * it has no running tasks).
+ */
+ return sched_asym_prefer(dst_cpu, sg->asym_prefer_cpu);
+ }
+
+ /* @dst_cpu has SMT siblings. */
+
+ if (sg_is_smt) {
+ int local_busy_cpus = sds->local->group_weight -
+ sds->local_stat.idle_cpus;
+ int busy_cpus_delta = sg_busy_cpus - local_busy_cpus;
+
+ if (busy_cpus_delta == 1)
+ return sched_asym_prefer(dst_cpu, sg->asym_prefer_cpu);
+
+ return false;
+ }
+
+ /*
+ * @sg does not have SMT siblings. Ensure that @sds::local does not end
+ * up with more than one busy SMT sibling and only pull tasks if there
+ * are not busy CPUs (i.e., no CPU has running tasks).
+ */
+ if (!sds->local_stat.sum_nr_running)
+ return sched_asym_prefer(dst_cpu, sg->asym_prefer_cpu);
+
+ return false;
+#else
+ /* Always return false so that callers deal with non-SMT cases. */
+ return false;
+#endif
+}
+
static inline bool
sched_asym(struct lb_env *env, struct sd_lb_stats *sds, struct sg_lb_stats *sgs,
struct sched_group *group)
{
+ /* Only do SMT checks if either local or candidate have SMT siblings */
+ if ((sds->local->flags & SD_SHARE_CPUCAPACITY) ||
+ (group->flags & SD_SHARE_CPUCAPACITY))
+ return asym_smt_can_pull_tasks(env->dst_cpu, sds, sgs, group);
+
return sched_asym_prefer(env->dst_cpu, group->asym_prefer_cpu);
}
@@ -9547,6 +9633,12 @@ static struct rq *find_busiest_queue(str
nr_running == 1)
continue;
+ /* Make sure we only pull tasks from a CPU of lower priority */
+ if ((env->sd->flags & SD_ASYM_PACKING) &&
+ sched_asym_prefer(i, env->dst_cpu) &&
+ nr_running == 1)
+ continue;
+
switch (env->migration_type) {
case migrate_load:
/*
^ permalink raw reply
* Re: [PATCH v2] powerpc/32: Don't use a struct based type for pte_t
From: Michael Ellerman @ 2021-09-18 3:26 UTC (permalink / raw)
To: Christophe Leroy, Benjamin Herrenschmidt, Paul Mackerras
Cc: linuxppc-dev, linux-kernel
In-Reply-To: <c904599f33aaf6bb7ee2836a9ff8368509e0d78d.1631887042.git.christophe.leroy@csgroup.eu>
Christophe Leroy <christophe.leroy@csgroup.eu> writes:
> Long time ago we had a config item called STRICT_MM_TYPECHECKS
> to build the kernel with pte_t defined as a structure in order
> to perform additional build checks or build it with pte_t
> defined as a simple type in order to get simpler generated code.
>
> Commit 670eea924198 ("powerpc/mm: Always use STRICT_MM_TYPECHECKS")
> made the struct based definition the only one, considering that the
> generated code was similar in both cases.
>
> That's right on ppc64 because the ABI is such that the content of a
> struct having a single simple type element is passed as register,
> but on ppc32 such a structure is passed via the stack like any
> structure.
>
> Simple test function:
>
> pte_t test(pte_t pte)
> {
> return pte;
> }
>
> Before this patch we get
>
> c00108ec <test>:
> c00108ec: 81 24 00 00 lwz r9,0(r4)
> c00108f0: 91 23 00 00 stw r9,0(r3)
> c00108f4: 4e 80 00 20 blr
>
> So, for PPC32, restore the simple type behaviour we got before
> commit 670eea924198, but instead of adding a config option to
> activate type check, do it when __CHECKER__ is set so that type
> checking is performed by 'sparse' and provides feedback like:
>
> arch/powerpc/mm/pgtable.c:466:16: warning: incorrect type in return expression (different base types)
> arch/powerpc/mm/pgtable.c:466:16: expected unsigned long
> arch/powerpc/mm/pgtable.c:466:16: got struct pte_t [usertype] x
OK that's a good trade off.
One question below ...
> diff --git a/arch/powerpc/include/asm/pgtable-types.h b/arch/powerpc/include/asm/pgtable-types.h
> index d11b4c61d686..c60199fc6fa6 100644
> --- a/arch/powerpc/include/asm/pgtable-types.h
> +++ b/arch/powerpc/include/asm/pgtable-types.h
> @@ -5,14 +5,26 @@
> /* PTE level */
> #if defined(CONFIG_PPC_8xx) && defined(CONFIG_PPC_16K_PAGES)
> typedef struct { pte_basic_t pte, pte1, pte2, pte3; } pte_t;
> -#else
> +#elif defined(__CHECKER__) || !defined(CONFIG_PPC32)
It would be nicer if this logic was in Kconfig.
eg. restore config STRICT_MM_TYPECHECKS but make it always enabled for
64-bit, and depend on CHECKER for 32-bit.
The only thing is I'm not sure if we can test __CHECKER__ in Kconfig?
cheers
^ permalink raw reply
* Re: [PATCH v2] powerpc/32: Don't use a struct based type for pte_t
From: Christophe Leroy @ 2021-09-18 8:37 UTC (permalink / raw)
To: Michael Ellerman, Benjamin Herrenschmidt, Paul Mackerras
Cc: linuxppc-dev, linux-kernel
In-Reply-To: <87tuiiimwu.fsf@mpe.ellerman.id.au>
Le 18/09/2021 à 05:26, Michael Ellerman a écrit :
> Christophe Leroy <christophe.leroy@csgroup.eu> writes:
>> Long time ago we had a config item called STRICT_MM_TYPECHECKS
>> to build the kernel with pte_t defined as a structure in order
>> to perform additional build checks or build it with pte_t
>> defined as a simple type in order to get simpler generated code.
>>
>> Commit 670eea924198 ("powerpc/mm: Always use STRICT_MM_TYPECHECKS")
>> made the struct based definition the only one, considering that the
>> generated code was similar in both cases.
>>
>> That's right on ppc64 because the ABI is such that the content of a
>> struct having a single simple type element is passed as register,
>> but on ppc32 such a structure is passed via the stack like any
>> structure.
>>
>> Simple test function:
>>
>> pte_t test(pte_t pte)
>> {
>> return pte;
>> }
>>
>> Before this patch we get
>>
>> c00108ec <test>:
>> c00108ec: 81 24 00 00 lwz r9,0(r4)
>> c00108f0: 91 23 00 00 stw r9,0(r3)
>> c00108f4: 4e 80 00 20 blr
>>
>> So, for PPC32, restore the simple type behaviour we got before
>> commit 670eea924198, but instead of adding a config option to
>> activate type check, do it when __CHECKER__ is set so that type
>> checking is performed by 'sparse' and provides feedback like:
>>
>> arch/powerpc/mm/pgtable.c:466:16: warning: incorrect type in return expression (different base types)
>> arch/powerpc/mm/pgtable.c:466:16: expected unsigned long
>> arch/powerpc/mm/pgtable.c:466:16: got struct pte_t [usertype] x
>
> OK that's a good trade off.
>
> One question below ...
>
>> diff --git a/arch/powerpc/include/asm/pgtable-types.h b/arch/powerpc/include/asm/pgtable-types.h
>> index d11b4c61d686..c60199fc6fa6 100644
>> --- a/arch/powerpc/include/asm/pgtable-types.h
>> +++ b/arch/powerpc/include/asm/pgtable-types.h
>> @@ -5,14 +5,26 @@
>> /* PTE level */
>> #if defined(CONFIG_PPC_8xx) && defined(CONFIG_PPC_16K_PAGES)
>> typedef struct { pte_basic_t pte, pte1, pte2, pte3; } pte_t;
>> -#else
>> +#elif defined(__CHECKER__) || !defined(CONFIG_PPC32)
>
> It would be nicer if this logic was in Kconfig.
>
> eg. restore config STRICT_MM_TYPECHECKS but make it always enabled for
> 64-bit, and depend on CHECKER for 32-bit.
>
> The only thing is I'm not sure if we can test __CHECKER__ in Kconfig?
I think Kconfig doesn't see __CHECKER__, otherwise it would mean that a
build may get different whether you build with C=1/2 or not.
__CHECKER__ is a define added by sparse when doing the CHECK on a per
object basis.
What I can do is to add:
#if defined(__CHECKER__) || !defined(CONFIG_PPC32)
#define STRICT_MM_TYPECHECKS
#endif
Christophe
^ permalink raw reply
* Re: [PATCH v2] powerpc/32: Don't use a struct based type for pte_t
From: Christophe Leroy @ 2021-09-18 8:47 UTC (permalink / raw)
To: David Laight, Benjamin Herrenschmidt, Paul Mackerras,
Michael Ellerman
Cc: linuxppc-dev@lists.ozlabs.org, linux-kernel@vger.kernel.org
In-Reply-To: <505920070e5f4bf8ad7ccaa12f346469@AcuMS.aculab.com>
Le 17/09/2021 à 16:32, David Laight a écrit :
> From: Christophe Leroy
>> Sent: 17 September 2021 14:58
>>
>> Long time ago we had a config item called STRICT_MM_TYPECHECKS
>> to build the kernel with pte_t defined as a structure in order
>> to perform additional build checks or build it with pte_t
>> defined as a simple type in order to get simpler generated code.
>>
> ...
>> diff --git a/arch/powerpc/include/asm/pgtable-types.h b/arch/powerpc/include/asm/pgtable-types.h
>> index d11b4c61d686..c60199fc6fa6 100644
>> --- a/arch/powerpc/include/asm/pgtable-types.h
>> +++ b/arch/powerpc/include/asm/pgtable-types.h
>> @@ -5,14 +5,26 @@
>> /* PTE level */
>> #if defined(CONFIG_PPC_8xx) && defined(CONFIG_PPC_16K_PAGES)
>> typedef struct { pte_basic_t pte, pte1, pte2, pte3; } pte_t;
>> -#else
>> +#elif defined(__CHECKER__) || !defined(CONFIG_PPC32)
>> typedef struct { pte_basic_t pte; } pte_t;
>> +#else
>> +typedef pte_basic_t pte_t;
>> #endif
>> +
>> +#if defined(__CHECKER__) || !defined(CONFIG_PPC32) || \
>> + (defined(CONFIG_PPC_8xx) && defined(CONFIG_PPC_16K_PAGES))
>> #define __pte(x) ((pte_t) { (x) })
>> static inline pte_basic_t pte_val(pte_t x)
>> {
>> return x.pte;
>> }
>> +#else
>> +#define __pte(x) ((pte_t)(x))
>> +static inline pte_basic_t pte_val(pte_t x)
>> +{
>> + return x;
>> +}
>> +#endif
>
> Would it be better to define:
> static inline pte_basic_*pte_basic(pte_t *x)
> {
> #if xxx
> return x;
> #else
> return &x->pte;
> #endif
> }
>
> Then pte_val(x) is always *pt_basic(x)
> and the casts like:
>
>> - pte_basic_t *entry = &ptep->pte;
>> + pte_basic_t *entry = (pte_basic_t *)ptep;
>
> can go away.
>
I don't like that idea too much, because it means going through a
pointer of something which is not in memory at the begining. Doing that
forces GCC to put the pte_t object on stack. And that's exactly the
purpose of this patch: avoid having to go via memory. Allthough recent
versions of GCC optimise it away at the end, I don't like it in principle.
And the only two places (pte_update() and set_huge_pte_at() where this
cast is required are really two places very special which deal with real
page tables. So IMHO it makes sense to explicitely show that what we use
is the address of the entry in the page table.
Christophe
^ permalink raw reply
* [PATCH] powerpc/476: Fix sparse report
From: Christophe Leroy @ 2021-09-18 9:22 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
Cc: Alistair Popple, linuxppc-dev, linux-kernel, kernel test robot
arch/powerpc/platforms/44x/ppc476.c:236:17: warning: cast removes address space '__iomem' of expression
arch/powerpc/platforms/44x/ppc476.c:241:34: warning: incorrect type in argument 1 (different address spaces)
arch/powerpc/platforms/44x/ppc476.c:241:34: expected void const volatile [noderef] __iomem *addr
arch/powerpc/platforms/44x/ppc476.c:241:34: got unsigned char [usertype] *
arch/powerpc/platforms/44x/ppc476.c:243:17: warning: incorrect type in argument 1 (different address spaces)
arch/powerpc/platforms/44x/ppc476.c:243:17: expected void volatile [noderef] __iomem *addr
arch/powerpc/platforms/44x/ppc476.c:243:17: got unsigned char [usertype] *[assigned] fpga
Mark 'fpga' pointer as __iomem.
Reported-by: kernel test robot <lkp@intel.com>
Fixes: ab9a4183fddf ("powerpc: Update currituck pci/usb fixup for new board revision")
Cc: Alistair Popple <alistair@popple.id.au>
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
arch/powerpc/platforms/44x/ppc476.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/powerpc/platforms/44x/ppc476.c b/arch/powerpc/platforms/44x/ppc476.c
index 07f7e3ce67b5..fb7db5cedd4e 100644
--- a/arch/powerpc/platforms/44x/ppc476.c
+++ b/arch/powerpc/platforms/44x/ppc476.c
@@ -219,7 +219,7 @@ static int board_rev = -1;
static int __init ppc47x_get_board_rev(void)
{
int reg;
- u8 *fpga;
+ u8 __iomem *fpga;
struct device_node *np = NULL;
if (of_machine_is_compatible("ibm,currituck")) {
@@ -233,7 +233,7 @@ static int __init ppc47x_get_board_rev(void)
if (!np)
goto fail;
- fpga = (u8 *) of_iomap(np, 0);
+ fpga = of_iomap(np, 0);
of_node_put(np);
if (!fpga)
goto fail;
--
2.31.1
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox