All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH bpf-next] bpf: Fold LSH and ARSH pair to a single MOVSX for sign-extension
@ 2024-04-29 15:20 Xu Kuohai
  2024-04-29 16:32 ` Yonghong Song
  0 siblings, 1 reply; 2+ messages in thread
From: Xu Kuohai @ 2024-04-29 15:20 UTC (permalink / raw)
  To: bpf
  Cc: Alexei Starovoitov, Andrii Nakryiko, Daniel Borkmann,
	Martin KaFai Lau, Eduard Zingerman, Song Liu, Yonghong Song,
	KP Singh, Stanislav Fomichev, Hao Luo, Jiri Olsa

From: Xu Kuohai <xukuohai@huawei.com>

As shown in the ExpandSEXTINREG function in [1], LLVM generates SRL and
SRA instruction pair to implement sign-extension. For x86 and arm64,
this instruction pair will be folded to a single instruction, but the
LLVM BPF backend does not do such folding.

For example, the following C code:

long f(int x)
{
	return x;
}

will be compiled to:

r0 = r1
r0 <<= 0x20
r0 s>>= 0x20
exit

Since 32-bit to 64-bit sign-extension is a common case and we already
have MOVSX instruction for sign-extension, this patch tries to fold
the 32-bit to 64-bit LSH and ARSH pair to a single MOVSX instruction.

[1] https://github.com/llvm/llvm-project/blob/4523a267829c807f3fc8fab8e5e9613985a51565/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp#L1228

Signed-off-by: Xu Kuohai <xukuohai@huawei.com>
---
 include/linux/filter.h |  8 ++++++++
 kernel/bpf/verifier.c  | 46 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 54 insertions(+)

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 7a27f19bf44d..7cc90a32ed9a 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -173,6 +173,14 @@ struct ctl_table_header;
 		.off   = 0,					\
 		.imm   = 0 })
 
+#define BPF_MOV64_SEXT_REG(DST, SRC, OFF)			\
+	((struct bpf_insn) {					\
+		.code  = BPF_ALU64 | BPF_MOV | BPF_X,		\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
+		.off   = OFF,					\
+		.imm   = 0 })
+
 #define BPF_MOV32_REG(DST, SRC)					\
 	((struct bpf_insn) {					\
 		.code  = BPF_ALU | BPF_MOV | BPF_X,		\
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 4e474ef44e9c..6bcee052d90d 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -20659,6 +20659,49 @@ static int optimize_bpf_loop(struct bpf_verifier_env *env)
 	return 0;
 }
 
+static bool is_sext32(struct bpf_insn *insn1, struct bpf_insn *insn2)
+{
+	if (insn1->code != (BPF_ALU64 | BPF_K | BPF_LSH) || insn1->imm != 32)
+		return false;
+
+	if (insn2->code != (BPF_ALU64 | BPF_K | BPF_ARSH) || insn2->imm != 32)
+		return false;
+
+	if (insn1->dst_reg != insn2->dst_reg)
+		return false;
+
+	return true;
+}
+
+/* LLVM generates sign-extension with LSH and ARSH pair, replace it with MOVSX.
+ *
+ * Before:
+ * DST <<= 32
+ * DST s>>= 32
+ *
+ * After:
+ * DST = (s32)DST
+ */
+static int optimize_sext32_insns(struct bpf_verifier_env *env)
+{
+	int i, err;
+	int insn_cnt = env->prog->len;
+	struct bpf_insn *insn = env->prog->insnsi;
+
+	for (i = 0; i < insn_cnt; i++, insn++) {
+		if (i + 1 >= insn_cnt || !is_sext32(insn, insn + 1))
+			continue;
+		/* patch current insn to MOVSX */
+		*insn = BPF_MOV64_SEXT_REG(insn->dst_reg, insn->dst_reg, 32);
+		/* remove next insn */
+		err = verifier_remove_insns(env, i + 1, 1);
+		if (err)
+			return err;
+		insn_cnt--;
+	}
+	return 0;
+}
+
 static void free_states(struct bpf_verifier_env *env)
 {
 	struct bpf_verifier_state_list *sl, *sln;
@@ -21577,6 +21620,9 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
 	if (ret == 0)
 		ret = optimize_bpf_loop(env);
 
+	if (ret == 0)
+		ret = optimize_sext32_insns(env);
+
 	if (is_priv) {
 		if (ret == 0)
 			opt_hard_wire_dead_code_branches(env);
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH bpf-next] bpf: Fold LSH and ARSH pair to a single MOVSX for sign-extension
  2024-04-29 15:20 [PATCH bpf-next] bpf: Fold LSH and ARSH pair to a single MOVSX for sign-extension Xu Kuohai
@ 2024-04-29 16:32 ` Yonghong Song
  0 siblings, 0 replies; 2+ messages in thread
From: Yonghong Song @ 2024-04-29 16:32 UTC (permalink / raw)
  To: Xu Kuohai, bpf
  Cc: Alexei Starovoitov, Andrii Nakryiko, Daniel Borkmann,
	Martin KaFai Lau, Eduard Zingerman, Song Liu, KP Singh,
	Stanislav Fomichev, Hao Luo, Jiri Olsa


On 4/29/24 8:20 AM, Xu Kuohai wrote:
> From: Xu Kuohai <xukuohai@huawei.com>
>
> As shown in the ExpandSEXTINREG function in [1], LLVM generates SRL and
> SRA instruction pair to implement sign-extension. For x86 and arm64,
> this instruction pair will be folded to a single instruction, but the
> LLVM BPF backend does not do such folding.

With -mcpu=v4, sign-extention will be generated and in selftest
test_progs-cpuv4 will test with -mcpu=v4. The cpu v4 support
is added in llvm18, and I hope once llvm18 is widely available, we
might be able to make test_progs-cpuv4 as the default test_progs.

So I think this optimization is not needed.

>
> For example, the following C code:
>
> long f(int x)
> {
> 	return x;
> }
>
> will be compiled to:
>
> r0 = r1
> r0 <<= 0x20
> r0 s>>= 0x20
> exit
>
> Since 32-bit to 64-bit sign-extension is a common case and we already
> have MOVSX instruction for sign-extension, this patch tries to fold
> the 32-bit to 64-bit LSH and ARSH pair to a single MOVSX instruction.
>
> [1] https://github.com/llvm/llvm-project/blob/4523a267829c807f3fc8fab8e5e9613985a51565/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp#L1228
>
> Signed-off-by: Xu Kuohai <xukuohai@huawei.com>
> ---
>   include/linux/filter.h |  8 ++++++++
>   kernel/bpf/verifier.c  | 46 ++++++++++++++++++++++++++++++++++++++++++
>   2 files changed, 54 insertions(+)
>
> diff --git a/include/linux/filter.h b/include/linux/filter.h
> index 7a27f19bf44d..7cc90a32ed9a 100644
> --- a/include/linux/filter.h
> +++ b/include/linux/filter.h
> @@ -173,6 +173,14 @@ struct ctl_table_header;
>   		.off   = 0,					\
>   		.imm   = 0 })
>   
> +#define BPF_MOV64_SEXT_REG(DST, SRC, OFF)			\
> +	((struct bpf_insn) {					\
> +		.code  = BPF_ALU64 | BPF_MOV | BPF_X,		\
> +		.dst_reg = DST,					\
> +		.src_reg = SRC,					\
> +		.off   = OFF,					\
> +		.imm   = 0 })
> +
>   #define BPF_MOV32_REG(DST, SRC)					\
>   	((struct bpf_insn) {					\
>   		.code  = BPF_ALU | BPF_MOV | BPF_X,		\
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 4e474ef44e9c..6bcee052d90d 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -20659,6 +20659,49 @@ static int optimize_bpf_loop(struct bpf_verifier_env *env)
>   	return 0;
>   }
>   
> +static bool is_sext32(struct bpf_insn *insn1, struct bpf_insn *insn2)
> +{
> +	if (insn1->code != (BPF_ALU64 | BPF_K | BPF_LSH) || insn1->imm != 32)
> +		return false;
> +
> +	if (insn2->code != (BPF_ALU64 | BPF_K | BPF_ARSH) || insn2->imm != 32)
> +		return false;
> +
> +	if (insn1->dst_reg != insn2->dst_reg)
> +		return false;
> +
> +	return true;
> +}
> +
> +/* LLVM generates sign-extension with LSH and ARSH pair, replace it with MOVSX.
> + *
> + * Before:
> + * DST <<= 32
> + * DST s>>= 32
> + *
> + * After:
> + * DST = (s32)DST
> + */
> +static int optimize_sext32_insns(struct bpf_verifier_env *env)
> +{
> +	int i, err;
> +	int insn_cnt = env->prog->len;
> +	struct bpf_insn *insn = env->prog->insnsi;
> +
> +	for (i = 0; i < insn_cnt; i++, insn++) {
> +		if (i + 1 >= insn_cnt || !is_sext32(insn, insn + 1))
> +			continue;
> +		/* patch current insn to MOVSX */
> +		*insn = BPF_MOV64_SEXT_REG(insn->dst_reg, insn->dst_reg, 32);
> +		/* remove next insn */
> +		err = verifier_remove_insns(env, i + 1, 1);
> +		if (err)
> +			return err;
> +		insn_cnt--;
> +	}
> +	return 0;
> +}
> +
>   static void free_states(struct bpf_verifier_env *env)
>   {
>   	struct bpf_verifier_state_list *sl, *sln;
> @@ -21577,6 +21620,9 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
>   	if (ret == 0)
>   		ret = optimize_bpf_loop(env);
>   
> +	if (ret == 0)
> +		ret = optimize_sext32_insns(env);
> +
>   	if (is_priv) {
>   		if (ret == 0)
>   			opt_hard_wire_dead_code_branches(env);

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2024-04-29 16:32 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-04-29 15:20 [PATCH bpf-next] bpf: Fold LSH and ARSH pair to a single MOVSX for sign-extension Xu Kuohai
2024-04-29 16:32 ` Yonghong Song

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.