[PATCH 1/3] bpf powerpc: introduce accessors for using the tmp local stack space

linuxppc-dev.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed

* [PATCH 1/3] bpf powerpc: introduce accessors for using the tmp local stack space
@ 2016-09-23 20:35 Naveen N. Rao
  2016-09-23 20:35 ` [PATCH 2/3] bpf powerpc: implement support for tail calls Naveen N. Rao
                   ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: Naveen N. Rao @ 2016-09-23 20:35 UTC (permalink / raw)
  To: linux-kernel, linuxppc-dev, netdev, Michael Ellerman
  Cc: Alexei Starovoitov, Daniel Borkmann, David S. Miller,
	Ananth N Mavinakayanahalli

While at it, ensure that the location of the local save area is
consistent whether or not we setup our own stackframe. This property is
utilised in the next patch that adds support for tail calls.

Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
---
 arch/powerpc/net/bpf_jit64.h      | 16 +++++---
 arch/powerpc/net/bpf_jit_comp64.c | 79 ++++++++++++++++++++++-----------------
 2 files changed, 55 insertions(+), 40 deletions(-)

diff --git a/arch/powerpc/net/bpf_jit64.h b/arch/powerpc/net/bpf_jit64.h
index 5046d6f..a1645d7 100644
--- a/arch/powerpc/net/bpf_jit64.h
+++ b/arch/powerpc/net/bpf_jit64.h
@@ -16,22 +16,25 @@
 
 /*
  * Stack layout:
+ * Ensure the top half (upto local_tmp_var) stays consistent
+ * with our redzone usage.
  *
  *		[	prev sp		] <-------------
  *		[   nv gpr save area	] 8*8		|
+ *		[    tail_call_cnt	] 8		|
+ *		[    local_tmp_var	] 8		|
  * fp (r31) -->	[   ebpf stack space	] 512		|
- *		[  local/tmp var space	] 16		|
  *		[     frame header	] 32/112	|
  * sp (r1) --->	[    stack pointer	] --------------
  */
 
-/* for bpf JIT code internal usage */
-#define BPF_PPC_STACK_LOCALS	16
 /* for gpr non volatile registers BPG_REG_6 to 10, plus skb cache registers */
 #define BPF_PPC_STACK_SAVE	(8*8)
+/* for bpf JIT code internal usage */
+#define BPF_PPC_STACK_LOCALS	16
 /* Ensure this is quadword aligned */
-#define BPF_PPC_STACKFRAME	(STACK_FRAME_MIN_SIZE + BPF_PPC_STACK_LOCALS + \
-				 MAX_BPF_STACK + BPF_PPC_STACK_SAVE)
+#define BPF_PPC_STACKFRAME	(STACK_FRAME_MIN_SIZE + MAX_BPF_STACK + \
+				 BPF_PPC_STACK_LOCALS + BPF_PPC_STACK_SAVE)
 
 #ifndef __ASSEMBLY__
 
@@ -65,6 +68,9 @@ static const int b2p[] = {
 	[TMP_REG_2] = 10
 };
 
+/* PPC NVR range -- update this if we ever use NVRs below r24 */
+#define BPF_PPC_NVR_MIN		24
+
 /* Assembly helpers */
 #define DECLARE_LOAD_FUNC(func)	u64 func(u64 r3, u64 r4);			\
 				u64 func##_negative_offset(u64 r3, u64 r4);	\
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 6073b78..5f8c91f 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -58,6 +58,35 @@ static inline bool bpf_has_stack_frame(struct codegen_context *ctx)
 	return ctx->seen & SEEN_FUNC || bpf_is_seen_register(ctx, BPF_REG_FP);
 }
 
+/*
+ * When not setting up our own stackframe, the redzone usage is:
+ *
+ *		[	prev sp		] <-------------
+ *		[	  ...       	] 		|
+ * sp (r1) --->	[    stack pointer	] --------------
+ *		[   nv gpr save area	] 8*8
+ *		[    tail_call_cnt	] 8
+ *		[    local_tmp_var	] 8
+ *		[   unused red zone	] 208 bytes protected
+ */
+static int bpf_jit_stack_local(struct codegen_context *ctx)
+{
+	if (bpf_has_stack_frame(ctx))
+		return STACK_FRAME_MIN_SIZE + MAX_BPF_STACK;
+	else
+		return -(BPF_PPC_STACK_SAVE + 16);
+}
+
+static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg)
+{
+	if (reg >= BPF_PPC_NVR_MIN && reg < 32)
+		return (bpf_has_stack_frame(ctx) ? BPF_PPC_STACKFRAME : 0)
+							- (8 * (32 - reg));
+
+	pr_err("BPF JIT is asking about unknown registers");
+	BUG();
+}
+
 static void bpf_jit_emit_skb_loads(u32 *image, struct codegen_context *ctx)
 {
 	/*
@@ -100,9 +129,8 @@ static void bpf_jit_emit_func_call(u32 *image, struct codegen_context *ctx, u64
 static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
 {
 	int i;
-	bool new_stack_frame = bpf_has_stack_frame(ctx);
 
-	if (new_stack_frame) {
+	if (bpf_has_stack_frame(ctx)) {
 		/*
 		 * We need a stack frame, but we don't necessarily need to
 		 * save/restore LR unless we call other functions
@@ -122,9 +150,7 @@ static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
 	 */
 	for (i = BPF_REG_6; i <= BPF_REG_10; i++)
 		if (bpf_is_seen_register(ctx, i))
-			PPC_BPF_STL(b2p[i], 1,
-				(new_stack_frame ? BPF_PPC_STACKFRAME : 0) -
-					(8 * (32 - b2p[i])));
+			PPC_BPF_STL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i]));
 
 	/*
 	 * Save additional non-volatile regs if we cache skb
@@ -132,22 +158,21 @@ static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
 	 */
 	if (ctx->seen & SEEN_SKB) {
 		PPC_BPF_STL(b2p[SKB_HLEN_REG], 1,
-			BPF_PPC_STACKFRAME - (8 * (32 - b2p[SKB_HLEN_REG])));
+				bpf_jit_stack_offsetof(ctx, b2p[SKB_HLEN_REG]));
 		PPC_BPF_STL(b2p[SKB_DATA_REG], 1,
-			BPF_PPC_STACKFRAME - (8 * (32 - b2p[SKB_DATA_REG])));
+				bpf_jit_stack_offsetof(ctx, b2p[SKB_DATA_REG]));
 		bpf_jit_emit_skb_loads(image, ctx);
 	}
 
 	/* Setup frame pointer to point to the bpf stack area */
 	if (bpf_is_seen_register(ctx, BPF_REG_FP))
 		PPC_ADDI(b2p[BPF_REG_FP], 1,
-				BPF_PPC_STACKFRAME - BPF_PPC_STACK_SAVE);
+				STACK_FRAME_MIN_SIZE + MAX_BPF_STACK);
 }
 
 static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
 {
 	int i;
-	bool new_stack_frame = bpf_has_stack_frame(ctx);
 
 	/* Move result to r3 */
 	PPC_MR(3, b2p[BPF_REG_0]);
@@ -155,20 +180,18 @@ static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
 	/* Restore NVRs */
 	for (i = BPF_REG_6; i <= BPF_REG_10; i++)
 		if (bpf_is_seen_register(ctx, i))
-			PPC_BPF_LL(b2p[i], 1,
-				(new_stack_frame ? BPF_PPC_STACKFRAME : 0) -
-					(8 * (32 - b2p[i])));
+			PPC_BPF_LL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i]));
 
 	/* Restore non-volatile registers used for skb cache */
 	if (ctx->seen & SEEN_SKB) {
 		PPC_BPF_LL(b2p[SKB_HLEN_REG], 1,
-			BPF_PPC_STACKFRAME - (8 * (32 - b2p[SKB_HLEN_REG])));
+				bpf_jit_stack_offsetof(ctx, b2p[SKB_HLEN_REG]));
 		PPC_BPF_LL(b2p[SKB_DATA_REG], 1,
-			BPF_PPC_STACKFRAME - (8 * (32 - b2p[SKB_DATA_REG])));
+				bpf_jit_stack_offsetof(ctx, b2p[SKB_DATA_REG]));
 	}
 
 	/* Tear down our stack frame */
-	if (new_stack_frame) {
+	if (bpf_has_stack_frame(ctx)) {
 		PPC_ADDI(1, 1, BPF_PPC_STACKFRAME);
 		if (ctx->seen & SEEN_FUNC) {
 			PPC_BPF_LL(0, 1, PPC_LR_STKOFF);
@@ -200,7 +223,6 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
 		u64 imm64;
 		u8 *func;
 		u32 true_cond;
-		int stack_local_off;
 
 		/*
 		 * addrs[] maps a BPF bytecode address into a real offset from
@@ -219,9 +241,9 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
 		 * optimization but everything else should work without
 		 * any issues.
 		 */
-		if (dst_reg >= 24 && dst_reg <= 31)
+		if (dst_reg >= BPF_PPC_NVR_MIN && dst_reg < 32)
 			bpf_set_seen_register(ctx, insn[i].dst_reg);
-		if (src_reg >= 24 && src_reg <= 31)
+		if (src_reg >= BPF_PPC_NVR_MIN && src_reg < 32)
 			bpf_set_seen_register(ctx, insn[i].src_reg);
 
 		switch (code) {
@@ -490,25 +512,12 @@ bpf_alu32_trunc:
 				 * Way easier and faster(?) to store the value
 				 * into stack and then use ldbrx
 				 *
-				 * First, determine where in stack we can store
-				 * this:
-				 * - if we have allotted a stack frame, then we
-				 *   will utilize the area set aside by
-				 *   BPF_PPC_STACK_LOCALS
-				 * - else, we use the area beneath the NV GPR
-				 *   save area
-				 *
 				 * ctx->seen will be reliable in pass2, but
 				 * the instructions generated will remain the
 				 * same across all passes
 				 */
-				if (bpf_has_stack_frame(ctx))
-					stack_local_off = STACK_FRAME_MIN_SIZE;
-				else
-					stack_local_off = -(BPF_PPC_STACK_SAVE + 8);
-
-				PPC_STD(dst_reg, 1, stack_local_off);
-				PPC_ADDI(b2p[TMP_REG_1], 1, stack_local_off);
+				PPC_STD(dst_reg, 1, bpf_jit_stack_local(ctx));
+				PPC_ADDI(b2p[TMP_REG_1], 1, bpf_jit_stack_local(ctx));
 				PPC_LDBRX(dst_reg, 0, b2p[TMP_REG_1]);
 				break;
 			}
@@ -668,7 +677,7 @@ emit_clear:
 
 			/* Save skb pointer if we need to re-cache skb data */
 			if (bpf_helper_changes_skb_data(func))
-				PPC_BPF_STL(3, 1, STACK_FRAME_MIN_SIZE);
+				PPC_BPF_STL(3, 1, bpf_jit_stack_local(ctx));
 
 			bpf_jit_emit_func_call(image, ctx, (u64)func);
 
@@ -678,7 +687,7 @@ emit_clear:
 			/* refresh skb cache */
 			if (bpf_helper_changes_skb_data(func)) {
 				/* reload skb pointer to r3 */
-				PPC_BPF_LL(3, 1, STACK_FRAME_MIN_SIZE);
+				PPC_BPF_LL(3, 1, bpf_jit_stack_local(ctx));
 				bpf_jit_emit_skb_loads(image, ctx);
 			}
 			break;
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 2/3] bpf powerpc: implement support for tail calls
  2016-09-23 20:35 [PATCH 1/3] bpf powerpc: introduce accessors for using the tmp local stack space Naveen N. Rao
@ 2016-09-23 20:35 ` Naveen N. Rao
  2016-09-23 22:33   ` Daniel Borkmann
  2016-09-23 20:35 ` [PATCH 3/3] bpf powerpc: add support for bpf constant blinding Naveen N. Rao
  2016-10-05  2:36 ` [1/3] bpf powerpc: introduce accessors for using the tmp local stack space Michael Ellerman
  2 siblings, 1 reply; 10+ messages in thread
From: Naveen N. Rao @ 2016-09-23 20:35 UTC (permalink / raw)
  To: linux-kernel, linuxppc-dev, netdev, Michael Ellerman
  Cc: Alexei Starovoitov, Daniel Borkmann, David S. Miller,
	Ananth N Mavinakayanahalli

Tail calls allow JIT'ed eBPF programs to call into other JIT'ed eBPF
programs. This can be achieved either by:
(1) retaining the stack setup by the first eBPF program and having all
subsequent eBPF programs re-using it, or,
(2) by unwinding/tearing down the stack and having each eBPF program
deal with its own stack as it sees fit.

To ensure that this does not create loops, there is a limit to how many
tail calls can be done (currently 32). This requires the JIT'ed code to
maintain a count of the number of tail calls done so far.

Approach (1) is simple, but requires every eBPF program to have (almost)
the same prologue/epilogue, regardless of whether they need it. This is
inefficient for small eBPF programs which may not sometimes need a
prologue at all. As such, to minimize impact of tail call
implementation, we use approach (2) here which needs each eBPF program
in the chain to use its own prologue/epilogue. This is not ideal when
many tail calls are involved and when all the eBPF programs in the chain
have similar prologue/epilogue. However, the impact is restricted to
programs that do tail calls. Individual eBPF programs are not affected.

We maintain the tail call count in a fixed location on the stack and
updated tail call count values are passed in through this. The very
first eBPF program in a chain sets this up to 0 (the first 2
instructions). Subsequent tail calls skip the first two eBPF JIT
instructions to maintain the count. For programs that don't do tail
calls themselves, the first two instructions are NOPs.

Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/ppc-opcode.h |   2 +
 arch/powerpc/net/bpf_jit.h            |   2 +
 arch/powerpc/net/bpf_jit64.h          |   1 +
 arch/powerpc/net/bpf_jit_comp64.c     | 149 +++++++++++++++++++++++++++-------
 4 files changed, 126 insertions(+), 28 deletions(-)

diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index 127ebf5..54ff8ce 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -236,6 +236,7 @@
 #define PPC_INST_STWU			0x94000000
 #define PPC_INST_MFLR			0x7c0802a6
 #define PPC_INST_MTLR			0x7c0803a6
+#define PPC_INST_MTCTR			0x7c0903a6
 #define PPC_INST_CMPWI			0x2c000000
 #define PPC_INST_CMPDI			0x2c200000
 #define PPC_INST_CMPW			0x7c000000
@@ -250,6 +251,7 @@
 #define PPC_INST_SUB			0x7c000050
 #define PPC_INST_BLR			0x4e800020
 #define PPC_INST_BLRL			0x4e800021
+#define PPC_INST_BCTR			0x4e800420
 #define PPC_INST_MULLD			0x7c0001d2
 #define PPC_INST_MULLW			0x7c0001d6
 #define PPC_INST_MULHWU			0x7c000016
diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index d5301b6..89f7007 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -40,6 +40,8 @@
 #define PPC_BLR()		EMIT(PPC_INST_BLR)
 #define PPC_BLRL()		EMIT(PPC_INST_BLRL)
 #define PPC_MTLR(r)		EMIT(PPC_INST_MTLR | ___PPC_RT(r))
+#define PPC_BCTR()		EMIT(PPC_INST_BCTR)
+#define PPC_MTCTR(r)		EMIT(PPC_INST_MTCTR | ___PPC_RT(r))
 #define PPC_ADDI(d, a, i)	EMIT(PPC_INST_ADDI | ___PPC_RT(d) |	      \
 				     ___PPC_RA(a) | IMM_L(i))
 #define PPC_MR(d, a)		PPC_OR(d, a, a)
diff --git a/arch/powerpc/net/bpf_jit64.h b/arch/powerpc/net/bpf_jit64.h
index a1645d7..038e00b 100644
--- a/arch/powerpc/net/bpf_jit64.h
+++ b/arch/powerpc/net/bpf_jit64.h
@@ -88,6 +88,7 @@ DECLARE_LOAD_FUNC(sk_load_byte);
 #define SEEN_FUNC	0x1000 /* might call external helpers */
 #define SEEN_STACK	0x2000 /* uses BPF stack */
 #define SEEN_SKB	0x4000 /* uses sk_buff */
+#define SEEN_TAILCALL	0x8000 /* uses tail calls */
 
 struct codegen_context {
 	/*
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 5f8c91f..3ec29d6 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -17,6 +17,7 @@
 #include <linux/filter.h>
 #include <linux/if_vlan.h>
 #include <asm/kprobes.h>
+#include <linux/bpf.h>
 
 #include "bpf_jit64.h"
 
@@ -77,6 +78,11 @@ static int bpf_jit_stack_local(struct codegen_context *ctx)
 		return -(BPF_PPC_STACK_SAVE + 16);
 }
 
+static int bpf_jit_stack_tailcallcnt(struct codegen_context *ctx)
+{
+	return bpf_jit_stack_local(ctx) + 8;
+}
+
 static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg)
 {
 	if (reg >= BPF_PPC_NVR_MIN && reg < 32)
@@ -102,33 +108,25 @@ static void bpf_jit_emit_skb_loads(u32 *image, struct codegen_context *ctx)
 	PPC_BPF_LL(b2p[SKB_DATA_REG], 3, offsetof(struct sk_buff, data));
 }
 
-static void bpf_jit_emit_func_call(u32 *image, struct codegen_context *ctx, u64 func)
+static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
 {
-#ifdef PPC64_ELF_ABI_v1
-	/* func points to the function descriptor */
-	PPC_LI64(b2p[TMP_REG_2], func);
-	/* Load actual entry point from function descriptor */
-	PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_2], 0);
-	/* ... and move it to LR */
-	PPC_MTLR(b2p[TMP_REG_1]);
+	int i;
+
 	/*
-	 * Load TOC from function descriptor at offset 8.
-	 * We can clobber r2 since we get called through a
-	 * function pointer (so caller will save/restore r2)
-	 * and since we don't use a TOC ourself.
+	 * Initialize tail_call_cnt if we do tail calls.
+	 * Otherwise, put in NOPs so that it can be skipped when we are
+	 * invoked through a tail call.
 	 */
-	PPC_BPF_LL(2, b2p[TMP_REG_2], 8);
-#else
-	/* We can clobber r12 */
-	PPC_FUNC_ADDR(12, func);
-	PPC_MTLR(12);
-#endif
-	PPC_BLRL();
-}
+	if (ctx->seen & SEEN_TAILCALL) {
+		PPC_LI(b2p[TMP_REG_1], 0);
+		/* this goes in the redzone */
+		PPC_BPF_STL(b2p[TMP_REG_1], 1, -(BPF_PPC_STACK_SAVE + 8));
+	} else {
+		PPC_NOP();
+		PPC_NOP();
+	}
 
-static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
-{
-	int i;
+#define BPF_TAILCALL_PROLOGUE_SIZE	8
 
 	if (bpf_has_stack_frame(ctx)) {
 		/*
@@ -170,13 +168,10 @@ static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
 				STACK_FRAME_MIN_SIZE + MAX_BPF_STACK);
 }
 
-static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
+static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx)
 {
 	int i;
 
-	/* Move result to r3 */
-	PPC_MR(3, b2p[BPF_REG_0]);
-
 	/* Restore NVRs */
 	for (i = BPF_REG_6; i <= BPF_REG_10; i++)
 		if (bpf_is_seen_register(ctx, i))
@@ -198,10 +193,105 @@ static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
 			PPC_MTLR(0);
 		}
 	}
+}
+
+static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
+{
+	bpf_jit_emit_common_epilogue(image, ctx);
+
+	/* Move result to r3 */
+	PPC_MR(3, b2p[BPF_REG_0]);
 
 	PPC_BLR();
 }
 
+static void bpf_jit_emit_func_call(u32 *image, struct codegen_context *ctx, u64 func)
+{
+#ifdef PPC64_ELF_ABI_v1
+	/* func points to the function descriptor */
+	PPC_LI64(b2p[TMP_REG_2], func);
+	/* Load actual entry point from function descriptor */
+	PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_2], 0);
+	/* ... and move it to LR */
+	PPC_MTLR(b2p[TMP_REG_1]);
+	/*
+	 * Load TOC from function descriptor at offset 8.
+	 * We can clobber r2 since we get called through a
+	 * function pointer (so caller will save/restore r2)
+	 * and since we don't use a TOC ourself.
+	 */
+	PPC_BPF_LL(2, b2p[TMP_REG_2], 8);
+#else
+	/* We can clobber r12 */
+	PPC_FUNC_ADDR(12, func);
+	PPC_MTLR(12);
+#endif
+	PPC_BLRL();
+}
+
+static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out)
+{
+	/*
+	 * By now, the eBPF program has already setup parameters in r3, r4 and r5
+	 * r3/BPF_REG_1 - pointer to ctx -- passed as is to the next bpf program
+	 * r4/BPF_REG_2 - pointer to bpf_array
+	 * r5/BPF_REG_3 - index in bpf_array
+	 */
+	int b2p_bpf_array = b2p[BPF_REG_2];
+	int b2p_index = b2p[BPF_REG_3];
+
+	/*
+	 * if (index >= array->map.max_entries)
+	 *   goto out;
+	 */
+	PPC_LWZ(b2p[TMP_REG_1], b2p_bpf_array, offsetof(struct bpf_array, map.max_entries));
+	PPC_CMPLW(b2p_index, b2p[TMP_REG_1]);
+	PPC_BCC(COND_GE, out);
+
+	/*
+	 * if (tail_call_cnt > MAX_TAIL_CALL_CNT)
+	 *   goto out;
+	 */
+	PPC_LD(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx));
+	PPC_CMPLWI(b2p[TMP_REG_1], MAX_TAIL_CALL_CNT);
+	PPC_BCC(COND_GT, out);
+
+	/*
+	 * tail_call_cnt++;
+	 */
+	PPC_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], 1);
+	PPC_BPF_STL(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx));
+
+	/* prog = array->ptrs[index]; */
+	PPC_MULI(b2p[TMP_REG_1], b2p_index, 8);
+	PPC_ADD(b2p[TMP_REG_1], b2p[TMP_REG_1], b2p_bpf_array);
+	PPC_LD(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_array, ptrs));
+
+	/*
+	 * if (prog == NULL)
+	 *   goto out;
+	 */
+	PPC_CMPLDI(b2p[TMP_REG_1], 0);
+	PPC_BCC(COND_EQ, out);
+
+	/* goto *(prog->bpf_func + prologue_size); */
+	PPC_LD(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_prog, bpf_func));
+#ifdef PPC64_ELF_ABI_v1
+	/* skip past the function descriptor */
+	PPC_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1],
+			FUNCTION_DESCR_SIZE + BPF_TAILCALL_PROLOGUE_SIZE);
+#else
+	PPC_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], BPF_TAILCALL_PROLOGUE_SIZE);
+#endif
+	PPC_MTCTR(b2p[TMP_REG_1]);
+
+	/* tear down stack, restore NVRs, ... */
+	bpf_jit_emit_common_epilogue(image, ctx);
+
+	PPC_BCTR();
+	/* out: */
+}
+
 /* Assemble the body code between the prologue & epilogue */
 static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
 			      struct codegen_context *ctx,
@@ -846,9 +936,12 @@ common_load:
 			break;
 
 		/*
-		 * TODO: Tail call
+		 * Tail call
 		 */
 		case BPF_JMP | BPF_CALL | BPF_X:
+			ctx->seen |= SEEN_TAILCALL;
+			bpf_jit_emit_tail_call(image, ctx, addrs[i + 1]);
+			break;
 
 		default:
 			/*
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 3/3] bpf powerpc: add support for bpf constant blinding
  2016-09-23 20:35 [PATCH 1/3] bpf powerpc: introduce accessors for using the tmp local stack space Naveen N. Rao
  2016-09-23 20:35 ` [PATCH 2/3] bpf powerpc: implement support for tail calls Naveen N. Rao
@ 2016-09-23 20:35 ` Naveen N. Rao
  2016-09-23 21:40   ` Daniel Borkmann
  2016-10-05  2:36 ` [1/3] bpf powerpc: introduce accessors for using the tmp local stack space Michael Ellerman
  2 siblings, 1 reply; 10+ messages in thread
From: Naveen N. Rao @ 2016-09-23 20:35 UTC (permalink / raw)
  To: linux-kernel, linuxppc-dev, netdev, Michael Ellerman
  Cc: Alexei Starovoitov, Daniel Borkmann, David S. Miller,
	Ananth N Mavinakayanahalli

In line with similar support for other architectures by Daniel Borkmann.

'MOD Default X' from test_bpf without constant blinding:
84 bytes emitted from JIT compiler (pass:3, flen:7)
d0000000058a4688 + <x>:
   0:	nop
   4:	nop
   8:	std     r27,-40(r1)
   c:	std     r28,-32(r1)
  10:	xor     r8,r8,r8
  14:	xor     r28,r28,r28
  18:	mr      r27,r3
  1c:	li      r8,66
  20:	cmpwi   r28,0
  24:	bne     0x0000000000000030
  28:	li      r8,0
  2c:	b       0x0000000000000044
  30:	divwu   r9,r8,r28
  34:	mullw   r9,r28,r9
  38:	subf    r8,r9,r8
  3c:	rotlwi  r8,r8,0
  40:	li      r8,66
  44:	ld      r27,-40(r1)
  48:	ld      r28,-32(r1)
  4c:	mr      r3,r8
  50:	blr

... and with constant blinding:
140 bytes emitted from JIT compiler (pass:3, flen:11)
d00000000bd6ab24 + <x>:
   0:	nop
   4:	nop
   8:	std     r27,-40(r1)
   c:	std     r28,-32(r1)
  10:	xor     r8,r8,r8
  14:	xor     r28,r28,r28
  18:	mr      r27,r3
  1c:	lis     r2,-22834
  20:	ori     r2,r2,36083
  24:	rotlwi  r2,r2,0
  28:	xori    r2,r2,36017
  2c:	xoris   r2,r2,42702
  30:	rotlwi  r2,r2,0
  34:	mr      r8,r2
  38:	rotlwi  r8,r8,0
  3c:	cmpwi   r28,0
  40:	bne     0x000000000000004c
  44:	li      r8,0
  48:	b       0x000000000000007c
  4c:	divwu   r9,r8,r28
  50:	mullw   r9,r28,r9
  54:	subf    r8,r9,r8
  58:	rotlwi  r8,r8,0
  5c:	lis     r2,-17137
  60:	ori     r2,r2,39065
  64:	rotlwi  r2,r2,0
  68:	xori    r2,r2,39131
  6c:	xoris   r2,r2,48399
  70:	rotlwi  r2,r2,0
  74:	mr      r8,r2
  78:	rotlwi  r8,r8,0
  7c:	ld      r27,-40(r1)
  80:	ld      r28,-32(r1)
  84:	mr      r3,r8
  88:	blr

Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
---
 arch/powerpc/net/bpf_jit64.h      |  9 +++++----
 arch/powerpc/net/bpf_jit_comp64.c | 36 +++++++++++++++++++++++++++++-------
 2 files changed, 34 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/net/bpf_jit64.h b/arch/powerpc/net/bpf_jit64.h
index 038e00b..62fa758 100644
--- a/arch/powerpc/net/bpf_jit64.h
+++ b/arch/powerpc/net/bpf_jit64.h
@@ -39,10 +39,10 @@
 #ifndef __ASSEMBLY__
 
 /* BPF register usage */
-#define SKB_HLEN_REG	(MAX_BPF_REG + 0)
-#define SKB_DATA_REG	(MAX_BPF_REG + 1)
-#define TMP_REG_1	(MAX_BPF_REG + 2)
-#define TMP_REG_2	(MAX_BPF_REG + 3)
+#define SKB_HLEN_REG	(MAX_BPF_JIT_REG + 0)
+#define SKB_DATA_REG	(MAX_BPF_JIT_REG + 1)
+#define TMP_REG_1	(MAX_BPF_JIT_REG + 2)
+#define TMP_REG_2	(MAX_BPF_JIT_REG + 3)
 
 /* BPF to ppc register mappings */
 static const int b2p[] = {
@@ -62,6 +62,7 @@ static const int b2p[] = {
 	/* frame pointer aka BPF_REG_10 */
 	[BPF_REG_FP] = 31,
 	/* eBPF jit internal registers */
+	[BPF_REG_AX] = 2,
 	[SKB_HLEN_REG] = 25,
 	[SKB_DATA_REG] = 26,
 	[TMP_REG_1] = 9,
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 3ec29d6..0fe98a5 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -974,21 +974,37 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 	int pass;
 	int flen;
 	struct bpf_binary_header *bpf_hdr;
+	struct bpf_prog *org_fp = fp;
+	struct bpf_prog *tmp_fp;
+	bool bpf_blinded = false;
 
 	if (!bpf_jit_enable)
-		return fp;
+		return org_fp;
+
+	tmp_fp = bpf_jit_blind_constants(org_fp);
+	if (IS_ERR(tmp_fp))
+		return org_fp;
+
+	if (tmp_fp != org_fp) {
+		bpf_blinded = true;
+		fp = tmp_fp;
+	}
 
 	flen = fp->len;
 	addrs = kzalloc((flen+1) * sizeof(*addrs), GFP_KERNEL);
-	if (addrs == NULL)
-		return fp;
+	if (addrs == NULL) {
+		fp = org_fp;
+		goto out;
+	}
+
+	memset(&cgctx, 0, sizeof(struct codegen_context));
 
-	cgctx.idx = 0;
-	cgctx.seen = 0;
 	/* Scouting faux-generate pass 0 */
-	if (bpf_jit_build_body(fp, 0, &cgctx, addrs))
+	if (bpf_jit_build_body(fp, 0, &cgctx, addrs)) {
 		/* We hit something illegal or unsupported. */
+		fp = org_fp;
 		goto out;
+	}
 
 	/*
 	 * Pretend to build prologue, given the features we've seen.  This will
@@ -1003,8 +1019,10 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 
 	bpf_hdr = bpf_jit_binary_alloc(alloclen, &image, 4,
 			bpf_jit_fill_ill_insns);
-	if (!bpf_hdr)
+	if (!bpf_hdr) {
+		fp = org_fp;
 		goto out;
+	}
 
 	code_base = (u32 *)(image + FUNCTION_DESCR_SIZE);
 
@@ -1041,6 +1059,10 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 
 out:
 	kfree(addrs);
+
+	if (bpf_blinded)
+		bpf_jit_prog_release_other(fp, fp == org_fp ? tmp_fp : org_fp);
+
 	return fp;
 }
 
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH 3/3] bpf powerpc: add support for bpf constant blinding
  2016-09-23 20:35 ` [PATCH 3/3] bpf powerpc: add support for bpf constant blinding Naveen N. Rao
@ 2016-09-23 21:40   ` Daniel Borkmann
  0 siblings, 0 replies; 10+ messages in thread
From: Daniel Borkmann @ 2016-09-23 21:40 UTC (permalink / raw)
  To: Naveen N. Rao, linux-kernel, linuxppc-dev, netdev,
	Michael Ellerman
  Cc: Alexei Starovoitov, David S. Miller, Ananth N Mavinakayanahalli

On 09/23/2016 10:35 PM, Naveen N. Rao wrote:
> In line with similar support for other architectures by Daniel Borkmann.
>
> 'MOD Default X' from test_bpf without constant blinding:
> 84 bytes emitted from JIT compiler (pass:3, flen:7)
> d0000000058a4688 + <x>:
>     0:	nop
>     4:	nop
>     8:	std     r27,-40(r1)
>     c:	std     r28,-32(r1)
>    10:	xor     r8,r8,r8
>    14:	xor     r28,r28,r28
>    18:	mr      r27,r3
>    1c:	li      r8,66
>    20:	cmpwi   r28,0
>    24:	bne     0x0000000000000030
>    28:	li      r8,0
>    2c:	b       0x0000000000000044
>    30:	divwu   r9,r8,r28
>    34:	mullw   r9,r28,r9
>    38:	subf    r8,r9,r8
>    3c:	rotlwi  r8,r8,0
>    40:	li      r8,66
>    44:	ld      r27,-40(r1)
>    48:	ld      r28,-32(r1)
>    4c:	mr      r3,r8
>    50:	blr
>
> ... and with constant blinding:
> 140 bytes emitted from JIT compiler (pass:3, flen:11)
> d00000000bd6ab24 + <x>:
>     0:	nop
>     4:	nop
>     8:	std     r27,-40(r1)
>     c:	std     r28,-32(r1)
>    10:	xor     r8,r8,r8
>    14:	xor     r28,r28,r28
>    18:	mr      r27,r3
>    1c:	lis     r2,-22834
>    20:	ori     r2,r2,36083
>    24:	rotlwi  r2,r2,0
>    28:	xori    r2,r2,36017
>    2c:	xoris   r2,r2,42702
>    30:	rotlwi  r2,r2,0
>    34:	mr      r8,r2
>    38:	rotlwi  r8,r8,0
>    3c:	cmpwi   r28,0
>    40:	bne     0x000000000000004c
>    44:	li      r8,0
>    48:	b       0x000000000000007c
>    4c:	divwu   r9,r8,r28
>    50:	mullw   r9,r28,r9
>    54:	subf    r8,r9,r8
>    58:	rotlwi  r8,r8,0
>    5c:	lis     r2,-17137
>    60:	ori     r2,r2,39065
>    64:	rotlwi  r2,r2,0
>    68:	xori    r2,r2,39131
>    6c:	xoris   r2,r2,48399
>    70:	rotlwi  r2,r2,0
>    74:	mr      r8,r2
>    78:	rotlwi  r8,r8,0
>    7c:	ld      r27,-40(r1)
>    80:	ld      r28,-32(r1)
>    84:	mr      r3,r8
>    88:	blr
>
> Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>

Acked-by: Daniel Borkmann <daniel@iogearbox.net>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 2/3] bpf powerpc: implement support for tail calls
  2016-09-23 20:35 ` [PATCH 2/3] bpf powerpc: implement support for tail calls Naveen N. Rao
@ 2016-09-23 22:33   ` Daniel Borkmann
  2016-09-24  7:30     ` Alexei Starovoitov
  0 siblings, 1 reply; 10+ messages in thread
From: Daniel Borkmann @ 2016-09-23 22:33 UTC (permalink / raw)
  To: Naveen N. Rao, linux-kernel, linuxppc-dev, netdev,
	Michael Ellerman
  Cc: Alexei Starovoitov, David S. Miller, Ananth N Mavinakayanahalli

On 09/23/2016 10:35 PM, Naveen N. Rao wrote:
> Tail calls allow JIT'ed eBPF programs to call into other JIT'ed eBPF
> programs. This can be achieved either by:
> (1) retaining the stack setup by the first eBPF program and having all
> subsequent eBPF programs re-using it, or,
> (2) by unwinding/tearing down the stack and having each eBPF program
> deal with its own stack as it sees fit.
>
> To ensure that this does not create loops, there is a limit to how many
> tail calls can be done (currently 32). This requires the JIT'ed code to
> maintain a count of the number of tail calls done so far.
>
> Approach (1) is simple, but requires every eBPF program to have (almost)
> the same prologue/epilogue, regardless of whether they need it. This is
> inefficient for small eBPF programs which may not sometimes need a
> prologue at all. As such, to minimize impact of tail call
> implementation, we use approach (2) here which needs each eBPF program
> in the chain to use its own prologue/epilogue. This is not ideal when
> many tail calls are involved and when all the eBPF programs in the chain
> have similar prologue/epilogue. However, the impact is restricted to
> programs that do tail calls. Individual eBPF programs are not affected.
>
> We maintain the tail call count in a fixed location on the stack and
> updated tail call count values are passed in through this. The very
> first eBPF program in a chain sets this up to 0 (the first 2
> instructions). Subsequent tail calls skip the first two eBPF JIT
> instructions to maintain the count. For programs that don't do tail
> calls themselves, the first two instructions are NOPs.
>
> Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>

Thanks for adding support, Naveen, that's really great! I think 2) seems
fine as well in this context as prologue size can vary quite a bit here,
and depending on program types likelihood of tail call usage as well (but
I wouldn't expect deep nesting). Thanks a lot!

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 2/3] bpf powerpc: implement support for tail calls
  2016-09-23 22:33   ` Daniel Borkmann
@ 2016-09-24  7:30     ` Alexei Starovoitov
  2016-09-26  8:56       ` Naveen N. Rao
  0 siblings, 1 reply; 10+ messages in thread
From: Alexei Starovoitov @ 2016-09-24  7:30 UTC (permalink / raw)
  To: Daniel Borkmann
  Cc: Naveen N. Rao, linux-kernel, linuxppc-dev, netdev,
	Michael Ellerman, Alexei Starovoitov, David S. Miller,
	Ananth N Mavinakayanahalli

On Sat, Sep 24, 2016 at 12:33:54AM +0200, Daniel Borkmann wrote:
> On 09/23/2016 10:35 PM, Naveen N. Rao wrote:
> >Tail calls allow JIT'ed eBPF programs to call into other JIT'ed eBPF
> >programs. This can be achieved either by:
> >(1) retaining the stack setup by the first eBPF program and having all
> >subsequent eBPF programs re-using it, or,
> >(2) by unwinding/tearing down the stack and having each eBPF program
> >deal with its own stack as it sees fit.
> >
> >To ensure that this does not create loops, there is a limit to how many
> >tail calls can be done (currently 32). This requires the JIT'ed code to
> >maintain a count of the number of tail calls done so far.
> >
> >Approach (1) is simple, but requires every eBPF program to have (almost)
> >the same prologue/epilogue, regardless of whether they need it. This is
> >inefficient for small eBPF programs which may not sometimes need a
> >prologue at all. As such, to minimize impact of tail call
> >implementation, we use approach (2) here which needs each eBPF program
> >in the chain to use its own prologue/epilogue. This is not ideal when
> >many tail calls are involved and when all the eBPF programs in the chain
> >have similar prologue/epilogue. However, the impact is restricted to
> >programs that do tail calls. Individual eBPF programs are not affected.
> >
> >We maintain the tail call count in a fixed location on the stack and
> >updated tail call count values are passed in through this. The very
> >first eBPF program in a chain sets this up to 0 (the first 2
> >instructions). Subsequent tail calls skip the first two eBPF JIT
> >instructions to maintain the count. For programs that don't do tail
> >calls themselves, the first two instructions are NOPs.
> >
> >Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
> 
> Thanks for adding support, Naveen, that's really great! I think 2) seems
> fine as well in this context as prologue size can vary quite a bit here,
> and depending on program types likelihood of tail call usage as well (but
> I wouldn't expect deep nesting). Thanks a lot!

Great stuff. In this circumstances approach 2 makes sense to me as well.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 2/3] bpf powerpc: implement support for tail calls
  2016-09-24  7:30     ` Alexei Starovoitov
@ 2016-09-26  8:56       ` Naveen N. Rao
  2016-09-26  9:00         ` Daniel Borkmann
  0 siblings, 1 reply; 10+ messages in thread
From: Naveen N. Rao @ 2016-09-26  8:56 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Daniel Borkmann, Ananth N Mavinakayanahalli, Alexei Starovoitov,
	netdev, linux-kernel, linuxppc-dev, David S. Miller

On 2016/09/24 03:30AM, Alexei Starovoitov wrote:
> On Sat, Sep 24, 2016 at 12:33:54AM +0200, Daniel Borkmann wrote:
> > On 09/23/2016 10:35 PM, Naveen N. Rao wrote:
> > >Tail calls allow JIT'ed eBPF programs to call into other JIT'ed eBPF
> > >programs. This can be achieved either by:
> > >(1) retaining the stack setup by the first eBPF program and having all
> > >subsequent eBPF programs re-using it, or,
> > >(2) by unwinding/tearing down the stack and having each eBPF program
> > >deal with its own stack as it sees fit.
> > >
> > >To ensure that this does not create loops, there is a limit to how many
> > >tail calls can be done (currently 32). This requires the JIT'ed code to
> > >maintain a count of the number of tail calls done so far.
> > >
> > >Approach (1) is simple, but requires every eBPF program to have (almost)
> > >the same prologue/epilogue, regardless of whether they need it. This is
> > >inefficient for small eBPF programs which may not sometimes need a
> > >prologue at all. As such, to minimize impact of tail call
> > >implementation, we use approach (2) here which needs each eBPF program
> > >in the chain to use its own prologue/epilogue. This is not ideal when
> > >many tail calls are involved and when all the eBPF programs in the chain
> > >have similar prologue/epilogue. However, the impact is restricted to
> > >programs that do tail calls. Individual eBPF programs are not affected.
> > >
> > >We maintain the tail call count in a fixed location on the stack and
> > >updated tail call count values are passed in through this. The very
> > >first eBPF program in a chain sets this up to 0 (the first 2
> > >instructions). Subsequent tail calls skip the first two eBPF JIT
> > >instructions to maintain the count. For programs that don't do tail
> > >calls themselves, the first two instructions are NOPs.
> > >
> > >Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
> > 
> > Thanks for adding support, Naveen, that's really great! I think 2) seems
> > fine as well in this context as prologue size can vary quite a bit here,
> > and depending on program types likelihood of tail call usage as well (but
> > I wouldn't expect deep nesting). Thanks a lot!
> 
> Great stuff. In this circumstances approach 2 makes sense to me as well.

Alexie, Daniel,
Thanks for the quick review!

- Naveen

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 2/3] bpf powerpc: implement support for tail calls
  2016-09-26  8:56       ` Naveen N. Rao
@ 2016-09-26  9:00         ` Daniel Borkmann
  2016-09-26  9:09           ` Naveen N. Rao
  0 siblings, 1 reply; 10+ messages in thread
From: Daniel Borkmann @ 2016-09-26  9:00 UTC (permalink / raw)
  To: Naveen N. Rao, Alexei Starovoitov
  Cc: Ananth N Mavinakayanahalli, Alexei Starovoitov, netdev,
	linux-kernel, linuxppc-dev, David S. Miller

On 09/26/2016 10:56 AM, Naveen N. Rao wrote:
> On 2016/09/24 03:30AM, Alexei Starovoitov wrote:
>> On Sat, Sep 24, 2016 at 12:33:54AM +0200, Daniel Borkmann wrote:
>>> On 09/23/2016 10:35 PM, Naveen N. Rao wrote:
>>>> Tail calls allow JIT'ed eBPF programs to call into other JIT'ed eBPF
>>>> programs. This can be achieved either by:
>>>> (1) retaining the stack setup by the first eBPF program and having all
>>>> subsequent eBPF programs re-using it, or,
>>>> (2) by unwinding/tearing down the stack and having each eBPF program
>>>> deal with its own stack as it sees fit.
>>>>
>>>> To ensure that this does not create loops, there is a limit to how many
>>>> tail calls can be done (currently 32). This requires the JIT'ed code to
>>>> maintain a count of the number of tail calls done so far.
>>>>
>>>> Approach (1) is simple, but requires every eBPF program to have (almost)
>>>> the same prologue/epilogue, regardless of whether they need it. This is
>>>> inefficient for small eBPF programs which may not sometimes need a
>>>> prologue at all. As such, to minimize impact of tail call
>>>> implementation, we use approach (2) here which needs each eBPF program
>>>> in the chain to use its own prologue/epilogue. This is not ideal when
>>>> many tail calls are involved and when all the eBPF programs in the chain
>>>> have similar prologue/epilogue. However, the impact is restricted to
>>>> programs that do tail calls. Individual eBPF programs are not affected.
>>>>
>>>> We maintain the tail call count in a fixed location on the stack and
>>>> updated tail call count values are passed in through this. The very
>>>> first eBPF program in a chain sets this up to 0 (the first 2
>>>> instructions). Subsequent tail calls skip the first two eBPF JIT
>>>> instructions to maintain the count. For programs that don't do tail
>>>> calls themselves, the first two instructions are NOPs.
>>>>
>>>> Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
>>>
>>> Thanks for adding support, Naveen, that's really great! I think 2) seems
>>> fine as well in this context as prologue size can vary quite a bit here,
>>> and depending on program types likelihood of tail call usage as well (but
>>> I wouldn't expect deep nesting). Thanks a lot!
>>
>> Great stuff. In this circumstances approach 2 makes sense to me as well.
>
> Alexie, Daniel,
> Thanks for the quick review!

The patches would go via Michael's tree (same way as with the JIT itself
in the past), right?

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 2/3] bpf powerpc: implement support for tail calls
  2016-09-26  9:00         ` Daniel Borkmann
@ 2016-09-26  9:09           ` Naveen N. Rao
  0 siblings, 0 replies; 10+ messages in thread
From: Naveen N. Rao @ 2016-09-26  9:09 UTC (permalink / raw)
  To: Daniel Borkmann
  Cc: Alexei Starovoitov, Ananth N Mavinakayanahalli,
	Alexei Starovoitov, netdev, linux-kernel, linuxppc-dev,
	David S. Miller

On 2016/09/26 11:00AM, Daniel Borkmann wrote:
> On 09/26/2016 10:56 AM, Naveen N. Rao wrote:
> > On 2016/09/24 03:30AM, Alexei Starovoitov wrote:
> > > On Sat, Sep 24, 2016 at 12:33:54AM +0200, Daniel Borkmann wrote:
> > > > On 09/23/2016 10:35 PM, Naveen N. Rao wrote:
> > > > > Tail calls allow JIT'ed eBPF programs to call into other JIT'ed eBPF
> > > > > programs. This can be achieved either by:
> > > > > (1) retaining the stack setup by the first eBPF program and having all
> > > > > subsequent eBPF programs re-using it, or,
> > > > > (2) by unwinding/tearing down the stack and having each eBPF program
> > > > > deal with its own stack as it sees fit.
> > > > > 
> > > > > To ensure that this does not create loops, there is a limit to how many
> > > > > tail calls can be done (currently 32). This requires the JIT'ed code to
> > > > > maintain a count of the number of tail calls done so far.
> > > > > 
> > > > > Approach (1) is simple, but requires every eBPF program to have (almost)
> > > > > the same prologue/epilogue, regardless of whether they need it. This is
> > > > > inefficient for small eBPF programs which may not sometimes need a
> > > > > prologue at all. As such, to minimize impact of tail call
> > > > > implementation, we use approach (2) here which needs each eBPF program
> > > > > in the chain to use its own prologue/epilogue. This is not ideal when
> > > > > many tail calls are involved and when all the eBPF programs in the chain
> > > > > have similar prologue/epilogue. However, the impact is restricted to
> > > > > programs that do tail calls. Individual eBPF programs are not affected.
> > > > > 
> > > > > We maintain the tail call count in a fixed location on the stack and
> > > > > updated tail call count values are passed in through this. The very
> > > > > first eBPF program in a chain sets this up to 0 (the first 2
> > > > > instructions). Subsequent tail calls skip the first two eBPF JIT
> > > > > instructions to maintain the count. For programs that don't do tail
> > > > > calls themselves, the first two instructions are NOPs.
> > > > > 
> > > > > Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
> > > > 
> > > > Thanks for adding support, Naveen, that's really great! I think 2) seems
> > > > fine as well in this context as prologue size can vary quite a bit here,
> > > > and depending on program types likelihood of tail call usage as well (but
> > > > I wouldn't expect deep nesting). Thanks a lot!
> > > 
> > > Great stuff. In this circumstances approach 2 makes sense to me as well.
> > 
> > Alexie, Daniel,
> > Thanks for the quick review!
> 
> The patches would go via Michael's tree (same way as with the JIT itself
> in the past), right?

Yes, this set is contained within arch/powerpc, so Michael can take this 
through his tree.

The other set with updates to samples/bpf can probably go through 
David's tree.

- Naveen

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [1/3] bpf powerpc: introduce accessors for using the tmp local stack space
  2016-09-23 20:35 [PATCH 1/3] bpf powerpc: introduce accessors for using the tmp local stack space Naveen N. Rao
  2016-09-23 20:35 ` [PATCH 2/3] bpf powerpc: implement support for tail calls Naveen N. Rao
  2016-09-23 20:35 ` [PATCH 3/3] bpf powerpc: add support for bpf constant blinding Naveen N. Rao
@ 2016-10-05  2:36 ` Michael Ellerman
  2 siblings, 0 replies; 10+ messages in thread
From: Michael Ellerman @ 2016-10-05  2:36 UTC (permalink / raw)
  To: Naveen N. Rao, linux-kernel, linuxppc-dev, netdev
  Cc: Ananth N Mavinakayanahalli, David S. Miller, Daniel Borkmann,
	Alexei Starovoitov

On Fri, 2016-23-09 at 20:35:00 UTC, "Naveen N. Rao" wrote:
> While at it, ensure that the location of the local save area is
> consistent whether or not we setup our own stackframe. This property is
> utilised in the next patch that adds support for tail calls.
> 
> Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>

Series applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/7b847f523fe07b4ad73a01cec49a4d

cheers

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2016-10-05  2:36 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-09-23 20:35 [PATCH 1/3] bpf powerpc: introduce accessors for using the tmp local stack space Naveen N. Rao
2016-09-23 20:35 ` [PATCH 2/3] bpf powerpc: implement support for tail calls Naveen N. Rao
2016-09-23 22:33   ` Daniel Borkmann
2016-09-24  7:30     ` Alexei Starovoitov
2016-09-26  8:56       ` Naveen N. Rao
2016-09-26  9:00         ` Daniel Borkmann
2016-09-26  9:09           ` Naveen N. Rao
2016-09-23 20:35 ` [PATCH 3/3] bpf powerpc: add support for bpf constant blinding Naveen N. Rao
2016-09-23 21:40   ` Daniel Borkmann
2016-10-05  2:36 ` [1/3] bpf powerpc: introduce accessors for using the tmp local stack space Michael Ellerman

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).