netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [REGRESSION][PATCH] bpf_jit drops the ball on indirect negative mem references
@ 2012-03-28 19:15 Jan Seiffert
  2012-03-28 20:05 ` Eric Dumazet
  2012-03-28 20:58 ` David Miller
  0 siblings, 2 replies; 14+ messages in thread
From: Jan Seiffert @ 2012-03-28 19:15 UTC (permalink / raw)
  To: netdev; +Cc: linux-kernel, David S. Miller, Matt Evans, Eric Dumazet

[-- Attachment #1: Type: text/plain, Size: 2386 bytes --]

Consider the following test program:

#include <stdio.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <pcap-bpf.h>

#define die(x) do {perror(x); return 1;} while (0)
struct bpf_insn udp_filter[] = {
	/*   0 */ BPF_STMT(BPF_LDX|BPF_W|BPF_IMM, -1048576+(0)), /* leax	net[0] */
	/*   1 */ BPF_STMT(BPF_LD|BPF_B|BPF_IND, 0),             /* ldb	[x+0] */
	/*   2 */ BPF_STMT(BPF_RET|BPF_A, 0),                    /* ret	a */
};

int main(int argc, char *argv[])
{
	char buf[512];
	struct sockaddr_in addr;
	struct bpf_program prg;
	socklen_t addr_s;
	ssize_t res;
	int fd;

	addr.sin_family = AF_INET;
	addr.sin_port = htons(5000);
	addr.sin_addr.s_addr = 0;
	addr_s = sizeof(addr);
	prg.bf_len = sizeof(udp_filter)/sizeof(udp_filter[0]);
	prg.bf_insns = udp_filter;
	if(-1 == (fd = socket(AF_INET, SOCK_DGRAM, 0)))
		die("socket");
	if(-1 == bind(fd, (struct sockaddr *)&addr, sizeof(addr)))
		die("bind");
	if(-1 == setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prg, sizeof(prg)))
		die("setsockopt");
	res = recvfrom(fd, buf, sizeof(buf), 0, (struct sockaddr *)&addr, &addr_s);
	if(res != -1)
		printf("packet received: %zi bytes\n", res);
	else
		die("recvfrom");
	return 0;
}

when used with the bpf jit disabled works:
console 1 $ ./bpf
console 2 $ echo "hello" | nc -u localhost 5000
console 1: packet received: 6 bytes

When the bpf jit gets enabled (echo 100 >
/proc/sys/net/core/bpf_jit_enable) the same program stops working:
console 1 $ ./bpf
console 2 $ echo "hello" | nc -u localhost 5000
console 1:

The reason is that both jits (x86 and powerpc) do not handle negative
memory references like SKF_NET_OFF or SKF_LL_OFF, only the simple
ancillary data references are supported (by mapping to special
instructions).
In the case of an absolute reference, the jit aborts the translation
if a negative reference is seen, also a negative k on the indirect
load aborts the translation, but if X is negative to begin with, only
the error handler is reached at runtime which drops the whole packet.

I propose the following patch to fix this situation.
Lightly tested on x86, but the powerpc asm part is prop. wrong.

Signed-of-by: Jan Seiffert <kaffeemonster@googlemail.com>

-- 
I know this is not a 100% submission, please refrain from shooting me,
i'm not really set up for professional kernel development, i just want
this fixed.

Greetings
Jan

[-- Attachment #2: bpf_neg.diff --]
[-- Type: application/octet-stream, Size: 10559 bytes --]

diff --git a/arch/powerpc/net/bpf_jit_64.S b/arch/powerpc/net/bpf_jit_64.S
index ff4506e..87dc6e4 100644
--- a/arch/powerpc/net/bpf_jit_64.S
+++ b/arch/powerpc/net/bpf_jit_64.S
@@ -31,14 +31,11 @@
  * then branch directly to slow_path_XXX if required.  (In fact, could
  * load a spare GPR with the address of slow_path_generic and pass size
  * as an argument, making the call site a mtlr, li and bllr.)
- *
- * Technically, the "is addr < 0" check is unnecessary & slowing down
- * the ABS path, as it's statically checked on generation.
  */
 	.globl	sk_load_word
 sk_load_word:
 	cmpdi	r_addr, 0
-	blt	bpf_error
+	blt	bpf_slow_path_neg_word
 	/* Are we accessing past headlen? */
 	subi	r_scratch1, r_HL, 4
 	cmpd	r_scratch1, r_addr
@@ -51,7 +48,7 @@ sk_load_word:
 	.globl	sk_load_half
 sk_load_half:
 	cmpdi	r_addr, 0
-	blt	bpf_error
+	blt	bpf_slow_path_neg_half
 	subi	r_scratch1, r_HL, 2
 	cmpd	r_scratch1, r_addr
 	blt	bpf_slow_path_half
@@ -61,7 +58,7 @@ sk_load_half:
 	.globl	sk_load_byte
 sk_load_byte:
 	cmpdi	r_addr, 0
-	blt	bpf_error
+	blt	bpf_slow_path_neg_byte
 	cmpd	r_HL, r_addr
 	ble	bpf_slow_path_byte
 	lbzx	r_A, r_D, r_addr
@@ -69,16 +66,22 @@ sk_load_byte:
 
 /*
  * BPF_S_LDX_B_MSH: ldxb  4*([offset]&0xf)
- * r_addr is the offset value, already known positive
+ * r_addr is the offset value
  */
 	.globl sk_load_byte_msh
 sk_load_byte_msh:
+	cmpdi	r_addr, 0
+	blt	bpf_slow_path_neg_byte_msh
 	cmpd	r_HL, r_addr
 	ble	bpf_slow_path_byte_msh
 	lbzx	r_X, r_D, r_addr
 	rlwinm	r_X, r_X, 2, 32-4-2, 31-2
 	blr
 
+bpf_error_slow:
+	/* fabricate a cr0 = lt */
+	li	r_scratch1, -1
+	cmpdi	r_scratch1, 0
 bpf_error:
 	/* Entered with cr0 = lt */
 	li	r3, 0
@@ -136,3 +139,57 @@ bpf_slow_path_byte_msh:
 	lbz	r_X, BPF_PPC_STACK_BASIC+(2*8)(r1)
 	rlwinm	r_X, r_X, 2, 32-4-2, 31-2
 	blr
+
+/* Call out to bpf_internal_load_pointer_neg_helper:
+ * We'll need to back up our volatile regs first; we have
+ * local variable space at r1+(BPF_PPC_STACK_BASIC).
+ * Allocate a new stack frame here to remain ABI-compliant in
+ * stashing LR.
+ */
+#define bpf_slow_path_neg_common(SIZE)				\
+	lis     r_scratch1,-32; /* SKF_LL_OFF */		\
+	cmpd	r_addr, r_scratch1; /* addr < SKF_* */		\
+	blt	bpf_error; /* cr0 = LT */			\
+	mflr	r0;						\
+	std	r0, 16(r1);					\
+	/* R3 goes in parameter space of caller's frame */	\
+	std	r_skb, (BPF_PPC_STACKFRAME+48)(r1);		\
+	std	r_A, (BPF_PPC_STACK_BASIC+(0*8))(r1);		\
+	std	r_X, (BPF_PPC_STACK_BASIC+(1*8))(r1);		\
+	stdu	r1, -BPF_PPC_SLOWPATH_FRAME(r1);		\
+	/* R3 = r_skb, as passed */				\
+	mr	r4, r_addr;					\
+	li	r5, SIZE;					\
+	bl	bpf_internal_load_pointer_neg_helper;		\
+	/* R3 != 0 on success */				\
+	addi	r1, r1, BPF_PPC_SLOWPATH_FRAME;			\
+	ld	r0, 16(r1);					\
+	ld	r_A, (BPF_PPC_STACK_BASIC+(0*8))(r1);		\
+	ld	r_X, (BPF_PPC_STACK_BASIC+(1*8))(r1);		\
+	mtlr	r0;						\
+	cmpldi	r3, 0;						\
+	beq	bpf_error_slow;	/* cr0 = EQ */			\
+	mr	r_addr, r3;					\
+	ld	r_skb, (BPF_PPC_STACKFRAME+48)(r1);		\
+	/* Great success! */
+
+bpf_slow_path_neg_word:
+	bpf_slow_path_neg_common(4)
+	lwz	r_A, (r_addr)
+	blr
+
+bpf_slow_path_neg_half:
+	bpf_slow_path_neg_common(2)
+	lhz	r_A, (r_addr)
+	blr
+
+bpf_slow_path_neg_byte:
+	bpf_slow_path_neg_common(1)
+	lbz	r_A, (r_addr)
+	blr
+
+bpf_slow_path_neg_byte_msh:
+	bpf_slow_path_neg_common(1)
+	lbz	r_X, (r_addr)
+	rlwinm	r_X, r_X, 2, 32-4-2, 31-2
+	blr
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 73619d3..0019f70 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -400,12 +400,10 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
 			func = sk_load_byte;
 		common_load:
 			/*
-			 * Load from [K].  Reference with the (negative)
-			 * SKF_NET_OFF/SKF_LL_OFF offsets is unsupported.
+			 * Load from [K]. Negative offsets are tested for
+			 * in the helper functions.
 			 */
 			ctx->seen |= SEEN_DATAREF;
-			if ((int)K < 0)
-				return -ENOTSUPP;
 			PPC_LI64(r_scratch1, func);
 			PPC_MTLR(r_scratch1);
 			PPC_LI32(r_addr, K);
@@ -429,7 +427,7 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
 		common_load_ind:
 			/*
 			 * Load from [X + K].  Negative offsets are tested for
-			 * in the helper functions, and result in a 'ret 0'.
+			 * in the helper functions.
 			 */
 			ctx->seen |= SEEN_DATAREF | SEEN_XREG;
 			PPC_LI64(r_scratch1, func);
@@ -443,12 +441,6 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
 			break;
 
 		case BPF_S_LDX_B_MSH:
-			/*
-			 * x86 version drops packet (RET 0) when K<0, whereas
-			 * interpreter does allow K<0 (__load_pointer, special
-			 * ancillary data).  common_load returns ENOTSUPP if K<0,
-			 * so we fall back to interpreter & filter works.
-			 */
 			func = sk_load_byte_msh;
 			goto common_load;
 			break;
diff --git a/arch/x86/net/bpf_jit.S b/arch/x86/net/bpf_jit.S
index 6687022..1201783 100644
--- a/arch/x86/net/bpf_jit.S
+++ b/arch/x86/net/bpf_jit.S
@@ -18,17 +18,13 @@
  * r9d : hlen = skb->len - skb->data_len
  */
 #define SKBDATA	%r8
-
-sk_load_word_ind:
-	.globl	sk_load_word_ind
-
-	add	%ebx,%esi	/* offset += X */
-#	test    %esi,%esi	/* if (offset < 0) goto bpf_error; */
-	js	bpf_error
+#define SKF_MAX_NEG_OFF    $(-0x200000) /* SKF_LL_OFF from filter.h */
 
 sk_load_word:
 	.globl	sk_load_word
 
+	test	%esi,%esi
+	js	bpf_slow_path_word_neg
 	mov	%r9d,%eax		# hlen
 	sub	%esi,%eax		# hlen - offset
 	cmp	$3,%eax
@@ -37,16 +33,11 @@ sk_load_word:
 	bswap   %eax  			/* ntohl() */
 	ret
 
-
-sk_load_half_ind:
-	.globl sk_load_half_ind
-
-	add	%ebx,%esi	/* offset += X */
-	js	bpf_error
-
 sk_load_half:
 	.globl	sk_load_half
 
+	test	%esi,%esi
+	js	bpf_slow_path_half_neg
 	mov	%r9d,%eax
 	sub	%esi,%eax		#	hlen - offset
 	cmp	$1,%eax
@@ -55,14 +46,11 @@ sk_load_half:
 	rol	$8,%ax			# ntohs()
 	ret
 
-sk_load_byte_ind:
-	.globl sk_load_byte_ind
-	add	%ebx,%esi	/* offset += X */
-	js	bpf_error
-
 sk_load_byte:
 	.globl	sk_load_byte
 
+	test	%esi,%esi
+	js	bpf_slow_path_byte_neg
 	cmp	%esi,%r9d   /* if (offset >= hlen) goto bpf_slow_path_byte */
 	jle	bpf_slow_path_byte
 	movzbl	(SKBDATA,%rsi),%eax
@@ -73,10 +61,12 @@ sk_load_byte:
  *
  * Implements BPF_S_LDX_B_MSH : ldxb  4*([offset]&0xf)
  * Must preserve A accumulator (%eax)
- * Inputs : %esi is the offset value, already known positive
+ * Inputs : %esi is the offset value
  */
 ENTRY(sk_load_byte_msh)
 	CFI_STARTPROC
+	test	%esi,%esi
+	js	bpf_slow_path_byte_msh_neg
 	cmp	%esi,%r9d      /* if (offset >= hlen) goto bpf_slow_path_byte_msh */
 	jle	bpf_slow_path_byte_msh
 	movzbl	(SKBDATA,%rsi),%ebx
@@ -138,3 +128,45 @@ bpf_slow_path_byte_msh:
 	shl	$2,%al
 	xchg	%eax,%ebx
 	ret
+
+#define bpf_slow_path_neg_common(SIZE)				\
+	cmp	SKF_MAX_NEG_OFF, %esi;	/* test range */	\
+	jl	bpf_error;	/* offset lower -> error  */	\
+	push	%rdi;	/* save skb */				\
+	push	%r9;						\
+	push	SKBDATA;					\
+/* rsi already has offset */					\
+	mov	$SIZE,%ecx;	/* size */			\
+	call	bpf_internal_load_pointer_neg_helper;		\
+	test	%rax,%rax;					\
+	pop	SKBDATA;					\
+	pop	%r9;						\
+	pop	%rdi;						\
+	jz	bpf_error
+
+bpf_slow_path_word_neg:
+	bpf_slow_path_neg_common(4)
+	mov	(%rax), %eax
+	bswap	%eax
+	ret
+
+bpf_slow_path_half_neg:
+	bpf_slow_path_neg_common(2)
+	mov	(%rax),%ax
+	rol	$8,%ax
+	movzwl	%ax,%eax
+	ret
+
+bpf_slow_path_byte_neg:
+	bpf_slow_path_neg_common(1)
+	movzbl	(%rax), %eax
+	ret
+
+bpf_slow_path_byte_msh_neg:
+	xchg	%eax,%ebx /* dont lose A , X is about to be scratched */
+	bpf_slow_path_neg_common(1)
+	movzbl	(%rax),%eax
+	and	$15,%al
+	shl	$2,%al
+	xchg	%eax,%ebx
+	ret
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 5671752..3c4a454 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -30,7 +30,6 @@ int bpf_jit_enable __read_mostly;
  * assembly code in arch/x86/net/bpf_jit.S
  */
 extern u8 sk_load_word[], sk_load_half[], sk_load_byte[], sk_load_byte_msh[];
-extern u8 sk_load_word_ind[], sk_load_half_ind[], sk_load_byte_ind[];
 
 static inline u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
 {
@@ -475,10 +474,6 @@ void bpf_jit_compile(struct sk_filter *fp)
 			case BPF_S_LD_W_ABS:
 				func = sk_load_word;
 common_load:			seen |= SEEN_DATAREF;
-				if ((int)K < 0) {
-					/* Abort the JIT because __load_pointer() is needed. */
-					goto out;
-				}
 				t_offset = func - (image + addrs[i]);
 				EMIT1_off32(0xbe, K); /* mov imm32,%esi */
 				EMIT1_off32(0xe8, t_offset); /* call */
@@ -490,27 +485,28 @@ common_load:			seen |= SEEN_DATAREF;
 				func = sk_load_byte;
 				goto common_load;
 			case BPF_S_LDX_B_MSH:
-				if ((int)K < 0) {
-					/* Abort the JIT because __load_pointer() is needed. */
-					goto out;
-				}
 				seen |= SEEN_DATAREF | SEEN_XREG;
 				t_offset = sk_load_byte_msh - (image + addrs[i]);
 				EMIT1_off32(0xbe, K);	/* mov imm32,%esi */
 				EMIT1_off32(0xe8, t_offset); /* call sk_load_byte_msh */
 				break;
 			case BPF_S_LD_W_IND:
-				func = sk_load_word_ind;
+				func = sk_load_word;
 common_load_ind:		seen |= SEEN_DATAREF | SEEN_XREG;
 				t_offset = func - (image + addrs[i]);
-				EMIT1_off32(0xbe, K);	/* mov imm32,%esi   */
+				if (K) {
+					EMIT2(0x8d, 0xb3); /* lea imm32(%rbx),%esi */
+					EMIT(K, 4);
+				} else {
+					EMIT2(0x89,0xde); /* mov %ebx,%esi */
+				}
 				EMIT1_off32(0xe8, t_offset);	/* call sk_load_xxx_ind */
 				break;
 			case BPF_S_LD_H_IND:
-				func = sk_load_half_ind;
+				func = sk_load_half;
 				goto common_load_ind;
 			case BPF_S_LD_B_IND:
-				func = sk_load_byte_ind;
+				func = sk_load_byte;
 				goto common_load_ind;
 			case BPF_S_JMP_JA:
 				t_offset = addrs[i + K] - addrs[i];
diff --git a/net/core/filter.c b/net/core/filter.c
index 5dea452..04ca613 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -41,7 +41,7 @@
 #include <linux/ratelimit.h>
 
 /* No hurry in this branch */
-static void *__load_pointer(const struct sk_buff *skb, int k, unsigned int size)
+void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, unsigned int size)
 {
 	u8 *ptr = NULL;
 
@@ -54,13 +54,14 @@ static void *__load_pointer(const struct sk_buff *skb, int k, unsigned int size)
 		return ptr;
 	return NULL;
 }
+EXPORT_SYMBOL(bpf_internal_load_pointer_neg_helper);
 
 static inline void *load_pointer(const struct sk_buff *skb, int k,
 				 unsigned int size, void *buffer)
 {
 	if (k >= 0)
 		return skb_header_pointer(skb, k, size, buffer);
-	return __load_pointer(skb, k, size);
+	return bpf_internal_load_pointer_neg_helper(skb, k, size);
 }
 
 /**

^ permalink raw reply related	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2012-03-30 16:54 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-03-28 19:15 [REGRESSION][PATCH] bpf_jit drops the ball on indirect negative mem references Jan Seiffert
2012-03-28 20:05 ` Eric Dumazet
2012-03-28 20:26   ` Jan Seiffert
2012-03-28 20:39     ` Eric Dumazet
2012-03-29 11:54       ` Jan Seiffert
2012-03-29 13:57         ` Eric Dumazet
2012-03-30  9:11         ` Eric Dumazet
2012-03-30 13:42           ` Jan Seiffert
2012-03-30 14:08             ` Eric Dumazet
2012-03-30 14:11               ` Josh Boyer
2012-03-30 15:24               ` Matt Evans
2012-03-30 15:19             ` Matt Evans
2012-03-30 15:51               ` Jan Seiffert
2012-03-28 20:58 ` David Miller

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).