* [PATCH net-next 0/4] Further ARM BPF jit compiler improvements
@ 2018-07-12 20:50 Russell King - ARM Linux
2018-07-12 20:50 ` [PATCH net-next 1/4] ARM: net: bpf: improve 64-bit load immediate implementation Russell King
` (4 more replies)
0 siblings, 5 replies; 6+ messages in thread
From: Russell King - ARM Linux @ 2018-07-12 20:50 UTC (permalink / raw)
To: netdev, linux-arm-kernel; +Cc: Daniel Borkmann
Four further jit compiler improves for 32-bit ARM.
arch/arm/net/bpf_jit_32.c | 120 ++++++++++++++++++++++++++++------------------
1 file changed, 73 insertions(+), 47 deletions(-)
--
RMK's Patch system: http://www.armlinux.org.uk/developer/patches/
FTTC broadband for 0.8mile line in suburbia: sync at 13.8Mbps down 630kbps up
According to speedtest.net: 13Mbps down 490kbps up
^ permalink raw reply [flat|nested] 6+ messages in thread
* [PATCH net-next 1/4] ARM: net: bpf: improve 64-bit load immediate implementation
2018-07-12 20:50 [PATCH net-next 0/4] Further ARM BPF jit compiler improvements Russell King - ARM Linux
@ 2018-07-12 20:50 ` Russell King
2018-07-12 20:50 ` [PATCH net-next 2/4] ARM: net: bpf: improve 64-bit sign-extended immediate load Russell King
` (3 subsequent siblings)
4 siblings, 0 replies; 6+ messages in thread
From: Russell King @ 2018-07-12 20:50 UTC (permalink / raw)
To: netdev, linux-arm-kernel; +Cc: Daniel Borkmann
Rather than writing each 32-bit half of the 64-bit immediate value
separately when the register is on the stack:
movw r6, #45056 ; 0xb000
movt r6, #60979 ; 0xee33
str r6, [fp, #-44] ; 0xffffffd4
mov r6, #0
str r6, [fp, #-40] ; 0xffffffd8
arrange to use the double-word store when available instead:
movw r6, #45056 ; 0xb000
movt r6, #60979 ; 0xee33
mov r7, #0
strd r6, [fp, #-44] ; 0xffffffd4
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
arch/arm/net/bpf_jit_32.c | 32 ++++++++++++++++++++------------
1 file changed, 20 insertions(+), 12 deletions(-)
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index a9f68a924800..6558bd73bbb9 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -599,9 +599,20 @@ static inline void emit_a32_mov_i(const s8 dst, const u32 val,
}
}
+static void emit_a32_mov_i64(const s8 dst[], u64 val, struct jit_ctx *ctx)
+{
+ const s8 *tmp = bpf2a32[TMP_REG_1];
+ const s8 *rd = is_stacked(dst_lo) ? tmp : dst;
+
+ emit_mov_i(rd[1], (u32)val, ctx);
+ emit_mov_i(rd[0], val >> 32, ctx);
+
+ arm_bpf_put_reg64(dst, rd, ctx);
+}
+
/* Sign extended move */
-static inline void emit_a32_mov_i64(const bool is64, const s8 dst[],
- const u32 val, struct jit_ctx *ctx) {
+static inline void emit_a32_mov_se_i64(const bool is64, const s8 dst[],
+ const u32 val, struct jit_ctx *ctx) {
u32 hi = 0;
if (is64 && (val & (1<<31)))
@@ -1309,7 +1320,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
break;
case BPF_K:
/* Sign-extend immediate value to destination reg */
- emit_a32_mov_i64(is64, dst, imm, ctx);
+ emit_a32_mov_se_i64(is64, dst, imm, ctx);
break;
}
break;
@@ -1358,7 +1369,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
* value into temporary reg and then it would be
* safe to do the operation on it.
*/
- emit_a32_mov_i64(is64, tmp2, imm, ctx);
+ emit_a32_mov_se_i64(is64, tmp2, imm, ctx);
emit_a32_alu_r64(is64, dst, tmp2, ctx, BPF_OP(code));
break;
}
@@ -1454,7 +1465,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
* reg then it would be safe to do the operation
* on it.
*/
- emit_a32_mov_i64(is64, tmp2, imm, ctx);
+ emit_a32_mov_se_i64(is64, tmp2, imm, ctx);
emit_a32_mul_r64(dst, tmp2, ctx);
break;
}
@@ -1506,12 +1517,9 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
/* dst = imm64 */
case BPF_LD | BPF_IMM | BPF_DW:
{
- const struct bpf_insn insn1 = insn[1];
- u32 hi, lo = imm;
+ u64 val = (u32)imm | (u64)insn[1].imm << 32;
- hi = insn1.imm;
- emit_a32_mov_i(dst_lo, lo, ctx);
- emit_a32_mov_i(dst_hi, hi, ctx);
+ emit_a32_mov_i64(dst, val, ctx);
return 1;
}
@@ -1531,7 +1539,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
switch (BPF_SIZE(code)) {
case BPF_DW:
/* Sign-extend immediate value into temp reg */
- emit_a32_mov_i64(true, tmp2, imm, ctx);
+ emit_a32_mov_se_i64(true, tmp2, imm, ctx);
emit_str_r(dst_lo, tmp2[1], off, ctx, BPF_W);
emit_str_r(dst_lo, tmp2[0], off+4, ctx, BPF_W);
break;
@@ -1620,7 +1628,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
rm = tmp2[0];
rn = tmp2[1];
/* Sign-extend immediate value */
- emit_a32_mov_i64(true, tmp2, imm, ctx);
+ emit_a32_mov_se_i64(true, tmp2, imm, ctx);
go_jmp:
/* Setup destination register */
rd = arm_bpf_get_reg64(dst, tmp, ctx);
--
2.7.4
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH net-next 2/4] ARM: net: bpf: improve 64-bit sign-extended immediate load
2018-07-12 20:50 [PATCH net-next 0/4] Further ARM BPF jit compiler improvements Russell King - ARM Linux
2018-07-12 20:50 ` [PATCH net-next 1/4] ARM: net: bpf: improve 64-bit load immediate implementation Russell King
@ 2018-07-12 20:50 ` Russell King
2018-07-12 20:50 ` [PATCH net-next 3/4] ARM: net: bpf: improve 64-bit store implementation Russell King
` (2 subsequent siblings)
4 siblings, 0 replies; 6+ messages in thread
From: Russell King @ 2018-07-12 20:50 UTC (permalink / raw)
To: netdev, linux-arm-kernel; +Cc: Daniel Borkmann
Improve the 64-bit sign-extended immediate from:
mov r6, #1
str r6, [fp, #-52] ; 0xffffffcc
mov r6, #0
str r6, [fp, #-48] ; 0xffffffd0
to:
mov r6, #1
mov r7, #0
strd r6, [fp, #-52] ; 0xffffffcc
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
arch/arm/net/bpf_jit_32.c | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index 6558bd73bbb9..3a182e618441 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -613,12 +613,11 @@ static void emit_a32_mov_i64(const s8 dst[], u64 val, struct jit_ctx *ctx)
/* Sign extended move */
static inline void emit_a32_mov_se_i64(const bool is64, const s8 dst[],
const u32 val, struct jit_ctx *ctx) {
- u32 hi = 0;
+ u64 val64 = val;
if (is64 && (val & (1<<31)))
- hi = (u32)~0;
- emit_a32_mov_i(dst_lo, val, ctx);
- emit_a32_mov_i(dst_hi, hi, ctx);
+ val64 |= 0xffffffff00000000ULL;
+ emit_a32_mov_i64(dst, val64, ctx);
}
static inline void emit_a32_add_r(const u8 dst, const u8 src,
--
2.7.4
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH net-next 3/4] ARM: net: bpf: improve 64-bit store implementation
2018-07-12 20:50 [PATCH net-next 0/4] Further ARM BPF jit compiler improvements Russell King - ARM Linux
2018-07-12 20:50 ` [PATCH net-next 1/4] ARM: net: bpf: improve 64-bit load immediate implementation Russell King
2018-07-12 20:50 ` [PATCH net-next 2/4] ARM: net: bpf: improve 64-bit sign-extended immediate load Russell King
@ 2018-07-12 20:50 ` Russell King
2018-07-12 20:50 ` [PATCH net-next 4/4] ARM: net: bpf: improve 64-bit ALU implementation Russell King
2018-07-13 13:30 ` [PATCH net-next 0/4] Further ARM BPF jit compiler improvements Daniel Borkmann
4 siblings, 0 replies; 6+ messages in thread
From: Russell King @ 2018-07-12 20:50 UTC (permalink / raw)
To: netdev, linux-arm-kernel; +Cc: Daniel Borkmann
Improve the 64-bit store implementation from:
ldr r6, [fp, #-8]
str r8, [r6]
ldr r6, [fp, #-8]
mov r7, #4
add r7, r6, r7
str r9, [r7]
to:
ldr r6, [fp, #-8]
str r8, [r6]
str r9, [r6, #4]
We leave the store as two separate STR instructions rather than using
STRD as the store may not be aligned, and STR can handle misalignment.
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
arch/arm/net/bpf_jit_32.c | 52 +++++++++++++++++++++++------------------------
1 file changed, 26 insertions(+), 26 deletions(-)
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index 3a182e618441..026612ee8151 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -975,29 +975,42 @@ static inline void emit_a32_mul_r64(const s8 dst[], const s8 src[],
}
/* *(size *)(dst + off) = src */
-static inline void emit_str_r(const s8 dst, const s8 src,
- const s32 off, struct jit_ctx *ctx, const u8 sz){
+static inline void emit_str_r(const s8 dst, const s8 src[],
+ s32 off, struct jit_ctx *ctx, const u8 sz){
const s8 *tmp = bpf2a32[TMP_REG_1];
+ s32 off_max;
s8 rd;
rd = arm_bpf_get_reg32(dst, tmp[1], ctx);
- if (off) {
+
+ if (sz == BPF_H)
+ off_max = 0xff;
+ else
+ off_max = 0xfff;
+
+ if (off < 0 || off > off_max) {
emit_a32_mov_i(tmp[0], off, ctx);
- emit(ARM_ADD_R(tmp[0], rd, tmp[0]), ctx);
+ emit(ARM_ADD_R(tmp[0], tmp[0], rd), ctx);
rd = tmp[0];
+ off = 0;
}
switch (sz) {
- case BPF_W:
- /* Store a Word */
- emit(ARM_STR_I(src, rd, 0), ctx);
+ case BPF_B:
+ /* Store a Byte */
+ emit(ARM_STRB_I(src_lo, rd, off), ctx);
break;
case BPF_H:
/* Store a HalfWord */
- emit(ARM_STRH_I(src, rd, 0), ctx);
+ emit(ARM_STRH_I(src_lo, rd, off), ctx);
break;
- case BPF_B:
- /* Store a Byte */
- emit(ARM_STRB_I(src, rd, 0), ctx);
+ case BPF_W:
+ /* Store a Word */
+ emit(ARM_STR_I(src_lo, rd, off), ctx);
+ break;
+ case BPF_DW:
+ /* Store a Double Word */
+ emit(ARM_STR_I(src_lo, rd, off), ctx);
+ emit(ARM_STR_I(src_hi, rd, off + 4), ctx);
break;
}
}
@@ -1539,16 +1552,14 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
case BPF_DW:
/* Sign-extend immediate value into temp reg */
emit_a32_mov_se_i64(true, tmp2, imm, ctx);
- emit_str_r(dst_lo, tmp2[1], off, ctx, BPF_W);
- emit_str_r(dst_lo, tmp2[0], off+4, ctx, BPF_W);
break;
case BPF_W:
case BPF_H:
case BPF_B:
emit_a32_mov_i(tmp2[1], imm, ctx);
- emit_str_r(dst_lo, tmp2[1], off, ctx, BPF_SIZE(code));
break;
}
+ emit_str_r(dst_lo, tmp2, off, ctx, BPF_SIZE(code));
break;
/* STX XADD: lock *(u32 *)(dst + off) += src */
case BPF_STX | BPF_XADD | BPF_W:
@@ -1560,20 +1571,9 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
case BPF_STX | BPF_MEM | BPF_H:
case BPF_STX | BPF_MEM | BPF_B:
case BPF_STX | BPF_MEM | BPF_DW:
- {
- u8 sz = BPF_SIZE(code);
-
rs = arm_bpf_get_reg64(src, tmp2, ctx);
-
- /* Store the value */
- if (BPF_SIZE(code) == BPF_DW) {
- emit_str_r(dst_lo, rs[1], off, ctx, BPF_W);
- emit_str_r(dst_lo, rs[0], off+4, ctx, BPF_W);
- } else {
- emit_str_r(dst_lo, rs[1], off, ctx, sz);
- }
+ emit_str_r(dst_lo, rs, off, ctx, BPF_SIZE(code));
break;
- }
/* PC += off if dst == src */
/* PC += off if dst > src */
/* PC += off if dst >= src */
--
2.7.4
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH net-next 4/4] ARM: net: bpf: improve 64-bit ALU implementation
2018-07-12 20:50 [PATCH net-next 0/4] Further ARM BPF jit compiler improvements Russell King - ARM Linux
` (2 preceding siblings ...)
2018-07-12 20:50 ` [PATCH net-next 3/4] ARM: net: bpf: improve 64-bit store implementation Russell King
@ 2018-07-12 20:50 ` Russell King
2018-07-13 13:30 ` [PATCH net-next 0/4] Further ARM BPF jit compiler improvements Daniel Borkmann
4 siblings, 0 replies; 6+ messages in thread
From: Russell King @ 2018-07-12 20:50 UTC (permalink / raw)
To: netdev, linux-arm-kernel; +Cc: Daniel Borkmann
Improbe the 64-bit ALU implementation from:
movw r8, #65532
movt r8, #65535
movw r9, #65535
movt r9, #65535
ldr r7, [fp, #-44]
adds r7, r7, r8
str r7, [fp, #-44]
ldr r7, [fp, #-40]
adc r7, r7, r9
str r7, [fp, #-40]
to:
movw r8, #65532
movt r8, #65535
movw r9, #65535
movt r9, #65535
ldrd r6, [fp, #-44]
adds r6, r6, r8
adc r7, r7, r9
strd r6, [fp, #-44]
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
arch/arm/net/bpf_jit_32.c | 29 ++++++++++++++++++++++++-----
1 file changed, 24 insertions(+), 5 deletions(-)
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index 026612ee8151..25b3ee85066e 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -716,11 +716,30 @@ static inline void emit_a32_alu_r(const s8 dst, const s8 src,
static inline void emit_a32_alu_r64(const bool is64, const s8 dst[],
const s8 src[], struct jit_ctx *ctx,
const u8 op) {
- emit_a32_alu_r(dst_lo, src_lo, ctx, is64, false, op);
- if (is64)
- emit_a32_alu_r(dst_hi, src_hi, ctx, is64, true, op);
- else
- emit_a32_mov_i(dst_hi, 0, ctx);
+ const s8 *tmp = bpf2a32[TMP_REG_1];
+ const s8 *tmp2 = bpf2a32[TMP_REG_2];
+ const s8 *rd;
+
+ rd = arm_bpf_get_reg64(dst, tmp, ctx);
+ if (is64) {
+ const s8 *rs;
+
+ rs = arm_bpf_get_reg64(src, tmp2, ctx);
+
+ /* ALU operation */
+ emit_alu_r(rd[1], rs[1], true, false, op, ctx);
+ emit_alu_r(rd[0], rs[0], true, true, op, ctx);
+ } else {
+ s8 rs;
+
+ rs = arm_bpf_get_reg32(src_lo, tmp2[1], ctx);
+
+ /* ALU operation */
+ emit_alu_r(rd[1], rs, true, false, op, ctx);
+ emit_a32_mov_i(rd[0], 0, ctx);
+ }
+
+ arm_bpf_put_reg64(dst, rd, ctx);
}
/* dst = src (4 bytes)*/
--
2.7.4
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH net-next 0/4] Further ARM BPF jit compiler improvements
2018-07-12 20:50 [PATCH net-next 0/4] Further ARM BPF jit compiler improvements Russell King - ARM Linux
` (3 preceding siblings ...)
2018-07-12 20:50 ` [PATCH net-next 4/4] ARM: net: bpf: improve 64-bit ALU implementation Russell King
@ 2018-07-13 13:30 ` Daniel Borkmann
4 siblings, 0 replies; 6+ messages in thread
From: Daniel Borkmann @ 2018-07-13 13:30 UTC (permalink / raw)
To: Russell King - ARM Linux, netdev, linux-arm-kernel
On 07/12/2018 10:50 PM, Russell King - ARM Linux wrote:
> Four further jit compiler improves for 32-bit ARM.
>
> arch/arm/net/bpf_jit_32.c | 120 ++++++++++++++++++++++++++++------------------
> 1 file changed, 73 insertions(+), 47 deletions(-)
Applied to bpf-next, thanks Russell! (Fyi, pull-req with the two sets included will
go out to David Miller for net-next later today.)
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2018-07-13 13:45 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2018-07-12 20:50 [PATCH net-next 0/4] Further ARM BPF jit compiler improvements Russell King - ARM Linux
2018-07-12 20:50 ` [PATCH net-next 1/4] ARM: net: bpf: improve 64-bit load immediate implementation Russell King
2018-07-12 20:50 ` [PATCH net-next 2/4] ARM: net: bpf: improve 64-bit sign-extended immediate load Russell King
2018-07-12 20:50 ` [PATCH net-next 3/4] ARM: net: bpf: improve 64-bit store implementation Russell King
2018-07-12 20:50 ` [PATCH net-next 4/4] ARM: net: bpf: improve 64-bit ALU implementation Russell King
2018-07-13 13:30 ` [PATCH net-next 0/4] Further ARM BPF jit compiler improvements Daniel Borkmann
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).