- * [PATCH bpf-next v2 01/16] bpf: allocate 0x06 to new eBPF instruction class JMP32
  2019-01-21 13:15 [PATCH bpf-next v2 00/16] bpf: propose new jmp32 instructions Jiong Wang
@ 2019-01-21 13:15 ` Jiong Wang
  2019-01-21 13:15 ` [PATCH bpf-next v2 02/16] bpf: refactor verifier min/max code for condition jump Jiong Wang
                   ` (14 subsequent siblings)
  15 siblings, 0 replies; 20+ messages in thread
From: Jiong Wang @ 2019-01-21 13:15 UTC (permalink / raw)
  To: ast, daniel; +Cc: netdev, oss-drivers, Jiong Wang
The new eBPF instruction class JMP32 uses the reserved class number 0x6.
Kernel BPF ISA documentation updated accordingly.
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
---
 Documentation/networking/filter.txt | 15 ++++++++-------
 include/uapi/linux/bpf.h            |  1 +
 tools/include/uapi/linux/bpf.h      |  1 +
 3 files changed, 10 insertions(+), 7 deletions(-)
diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt
index 2196b82..01603bc 100644
--- a/Documentation/networking/filter.txt
+++ b/Documentation/networking/filter.txt
@@ -865,7 +865,7 @@ Three LSB bits store instruction class which is one of:
   BPF_STX   0x03          BPF_STX   0x03
   BPF_ALU   0x04          BPF_ALU   0x04
   BPF_JMP   0x05          BPF_JMP   0x05
-  BPF_RET   0x06          [ class 6 unused, for future if needed ]
+  BPF_RET   0x06          BPF_JMP32 0x06
   BPF_MISC  0x07          BPF_ALU64 0x07
 
 When BPF_CLASS(code) == BPF_ALU or BPF_JMP, 4th bit encodes source operand ...
@@ -902,9 +902,9 @@ If BPF_CLASS(code) == BPF_ALU or BPF_ALU64 [ in eBPF ], BPF_OP(code) is one of:
   BPF_ARSH  0xc0  /* eBPF only: sign extending shift right */
   BPF_END   0xd0  /* eBPF only: endianness conversion */
 
-If BPF_CLASS(code) == BPF_JMP, BPF_OP(code) is one of:
+If BPF_CLASS(code) == BPF_JMP or BPF_JMP32 [ in eBPF ], BPF_OP(code) is one of:
 
-  BPF_JA    0x00
+  BPF_JA    0x00  /* BPF_JMP only */
   BPF_JEQ   0x10
   BPF_JGT   0x20
   BPF_JGE   0x30
@@ -912,8 +912,8 @@ If BPF_CLASS(code) == BPF_JMP, BPF_OP(code) is one of:
   BPF_JNE   0x50  /* eBPF only: jump != */
   BPF_JSGT  0x60  /* eBPF only: signed '>' */
   BPF_JSGE  0x70  /* eBPF only: signed '>=' */
-  BPF_CALL  0x80  /* eBPF only: function call */
-  BPF_EXIT  0x90  /* eBPF only: function return */
+  BPF_CALL  0x80  /* eBPF BPF_JMP only: function call */
+  BPF_EXIT  0x90  /* eBPF BPF_JMP only: function return */
   BPF_JLT   0xa0  /* eBPF only: unsigned '<' */
   BPF_JLE   0xb0  /* eBPF only: unsigned '<=' */
   BPF_JSLT  0xc0  /* eBPF only: signed '<' */
@@ -936,8 +936,9 @@ Classic BPF wastes the whole BPF_RET class to represent a single 'ret'
 operation. Classic BPF_RET | BPF_K means copy imm32 into return register
 and perform function exit. eBPF is modeled to match CPU, so BPF_JMP | BPF_EXIT
 in eBPF means function exit only. The eBPF program needs to store return
-value into register R0 before doing a BPF_EXIT. Class 6 in eBPF is currently
-unused and reserved for future use.
+value into register R0 before doing a BPF_EXIT. Class 6 in eBPF is used as
+BPF_JMP32 to mean exactly the same operations as BPF_JMP, but with 32-bit wide
+operands for the comparisons instead.
 
 For load and store instructions the 8-bit 'code' field is divided as:
 
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 91c4388..a79bce4 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -14,6 +14,7 @@
 /* Extended instruction set based on top of classic BPF */
 
 /* instruction classes */
+#define BPF_JMP32	0x06	/* jmp mode in word width */
 #define BPF_ALU64	0x07	/* alu mode in double word width */
 
 /* ld/ldx fields */
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 91c4388..a79bce4 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -14,6 +14,7 @@
 /* Extended instruction set based on top of classic BPF */
 
 /* instruction classes */
+#define BPF_JMP32	0x06	/* jmp mode in word width */
 #define BPF_ALU64	0x07	/* alu mode in double word width */
 
 /* ld/ldx fields */
-- 
2.7.4
^ permalink raw reply related	[flat|nested] 20+ messages in thread
- * [PATCH bpf-next v2 02/16] bpf: refactor verifier min/max code for condition jump
  2019-01-21 13:15 [PATCH bpf-next v2 00/16] bpf: propose new jmp32 instructions Jiong Wang
  2019-01-21 13:15 ` [PATCH bpf-next v2 01/16] bpf: allocate 0x06 to new eBPF instruction class JMP32 Jiong Wang
@ 2019-01-21 13:15 ` Jiong Wang
  2019-01-21 13:15 ` [PATCH bpf-next v2 03/16] bpf: verifier support JMP32 Jiong Wang
                   ` (13 subsequent siblings)
  15 siblings, 0 replies; 20+ messages in thread
From: Jiong Wang @ 2019-01-21 13:15 UTC (permalink / raw)
  To: ast, daniel; +Cc: netdev, oss-drivers, Jiong Wang
The current min/max code does both signed and unsigned comparisons against
the input argument "val" which is "u64" and there is explicit type casting
when the comparison is signed.
As we will need slightly more complexer type casting when JMP32 introduced,
it is better to host the signed type casting. This makes the code more
clean with ignorable runtime overhead.
Also, code for J*GE/GT/LT/LE and JEQ/JNE are very similar, this patch
combine them.
The main purpose for this refactor is to make sure the min/max code will
still be readable and with minimum code duplication after JMP32 introduced.
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
---
 kernel/bpf/verifier.c | 172 +++++++++++++++++++++++++++++---------------------
 1 file changed, 99 insertions(+), 73 deletions(-)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index ce87198..53f5135 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -4033,9 +4033,13 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
  */
 static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode)
 {
+	s64 sval;
+
 	if (__is_pointer_value(false, reg))
 		return -1;
 
+	sval = (s64)val;
+
 	switch (opcode) {
 	case BPF_JEQ:
 		if (tnum_is_const(reg->var_off))
@@ -4058,9 +4062,9 @@ static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode)
 			return 0;
 		break;
 	case BPF_JSGT:
-		if (reg->smin_value > (s64)val)
+		if (reg->smin_value > sval)
 			return 1;
-		else if (reg->smax_value < (s64)val)
+		else if (reg->smax_value < sval)
 			return 0;
 		break;
 	case BPF_JLT:
@@ -4070,9 +4074,9 @@ static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode)
 			return 0;
 		break;
 	case BPF_JSLT:
-		if (reg->smax_value < (s64)val)
+		if (reg->smax_value < sval)
 			return 1;
-		else if (reg->smin_value >= (s64)val)
+		else if (reg->smin_value >= sval)
 			return 0;
 		break;
 	case BPF_JGE:
@@ -4082,9 +4086,9 @@ static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode)
 			return 0;
 		break;
 	case BPF_JSGE:
-		if (reg->smin_value >= (s64)val)
+		if (reg->smin_value >= sval)
 			return 1;
-		else if (reg->smax_value < (s64)val)
+		else if (reg->smax_value < sval)
 			return 0;
 		break;
 	case BPF_JLE:
@@ -4094,9 +4098,9 @@ static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode)
 			return 0;
 		break;
 	case BPF_JSLE:
-		if (reg->smax_value <= (s64)val)
+		if (reg->smax_value <= sval)
 			return 1;
-		else if (reg->smin_value > (s64)val)
+		else if (reg->smin_value > sval)
 			return 0;
 		break;
 	}
@@ -4113,6 +4117,8 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg,
 			    struct bpf_reg_state *false_reg, u64 val,
 			    u8 opcode)
 {
+	s64 sval;
+
 	/* If the dst_reg is a pointer, we can't learn anything about its
 	 * variable offset from the compare (unless src_reg were a pointer into
 	 * the same object, but we don't bother with that.
@@ -4122,19 +4128,22 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg,
 	if (__is_pointer_value(false, false_reg))
 		return;
 
+	sval = (s64)val;
+
 	switch (opcode) {
 	case BPF_JEQ:
-		/* If this is false then we know nothing Jon Snow, but if it is
-		 * true then we know for sure.
-		 */
-		__mark_reg_known(true_reg, val);
-		break;
 	case BPF_JNE:
-		/* If this is true we know nothing Jon Snow, but if it is false
-		 * we know the value for sure;
+	{
+		struct bpf_reg_state *reg =
+			opcode == BPF_JEQ ? true_reg : false_reg;
+
+		/* For BPF_JEQ, if this is false we know nothing Jon Snow, but
+		 * if it is true we know the value for sure. Likewise for
+		 * BPF_JNE.
 		 */
-		__mark_reg_known(false_reg, val);
+		__mark_reg_known(reg, val);
 		break;
+	}
 	case BPF_JSET:
 		false_reg->var_off = tnum_and(false_reg->var_off,
 					      tnum_const(~val));
@@ -4142,38 +4151,46 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg,
 			true_reg->var_off = tnum_or(true_reg->var_off,
 						    tnum_const(val));
 		break;
-	case BPF_JGT:
-		false_reg->umax_value = min(false_reg->umax_value, val);
-		true_reg->umin_value = max(true_reg->umin_value, val + 1);
-		break;
-	case BPF_JSGT:
-		false_reg->smax_value = min_t(s64, false_reg->smax_value, val);
-		true_reg->smin_value = max_t(s64, true_reg->smin_value, val + 1);
-		break;
-	case BPF_JLT:
-		false_reg->umin_value = max(false_reg->umin_value, val);
-		true_reg->umax_value = min(true_reg->umax_value, val - 1);
-		break;
-	case BPF_JSLT:
-		false_reg->smin_value = max_t(s64, false_reg->smin_value, val);
-		true_reg->smax_value = min_t(s64, true_reg->smax_value, val - 1);
-		break;
 	case BPF_JGE:
-		false_reg->umax_value = min(false_reg->umax_value, val - 1);
-		true_reg->umin_value = max(true_reg->umin_value, val);
+	case BPF_JGT:
+	{
+		u64 false_umax = opcode == BPF_JGT ? val    : val - 1;
+		u64 true_umin = opcode == BPF_JGT ? val + 1 : val;
+
+		false_reg->umax_value = min(false_reg->umax_value, false_umax);
+		true_reg->umin_value = max(true_reg->umin_value, true_umin);
 		break;
+	}
 	case BPF_JSGE:
-		false_reg->smax_value = min_t(s64, false_reg->smax_value, val - 1);
-		true_reg->smin_value = max_t(s64, true_reg->smin_value, val);
+	case BPF_JSGT:
+	{
+		s64 false_smax = opcode == BPF_JSGT ? sval    : sval - 1;
+		s64 true_smin = opcode == BPF_JSGT ? sval + 1 : sval;
+
+		false_reg->smax_value = min(false_reg->smax_value, false_smax);
+		true_reg->smin_value = max(true_reg->smin_value, true_smin);
 		break;
+	}
 	case BPF_JLE:
-		false_reg->umin_value = max(false_reg->umin_value, val + 1);
-		true_reg->umax_value = min(true_reg->umax_value, val);
+	case BPF_JLT:
+	{
+		u64 false_umin = opcode == BPF_JLT ? val    : val + 1;
+		u64 true_umax = opcode == BPF_JLT ? val - 1 : val;
+
+		false_reg->umin_value = max(false_reg->umin_value, false_umin);
+		true_reg->umax_value = min(true_reg->umax_value, true_umax);
 		break;
+	}
 	case BPF_JSLE:
-		false_reg->smin_value = max_t(s64, false_reg->smin_value, val + 1);
-		true_reg->smax_value = min_t(s64, true_reg->smax_value, val);
+	case BPF_JSLT:
+	{
+		s64 false_smin = opcode == BPF_JSLT ? sval    : sval + 1;
+		s64 true_smax = opcode == BPF_JSLT ? sval - 1 : sval;
+
+		false_reg->smin_value = max(false_reg->smin_value, false_smin);
+		true_reg->smax_value = min(true_reg->smax_value, true_smax);
 		break;
+	}
 	default:
 		break;
 	}
@@ -4198,22 +4215,23 @@ static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
 				struct bpf_reg_state *false_reg, u64 val,
 				u8 opcode)
 {
+	s64 sval;
+
 	if (__is_pointer_value(false, false_reg))
 		return;
 
+	sval = (s64)val;
+
 	switch (opcode) {
 	case BPF_JEQ:
-		/* If this is false then we know nothing Jon Snow, but if it is
-		 * true then we know for sure.
-		 */
-		__mark_reg_known(true_reg, val);
-		break;
 	case BPF_JNE:
-		/* If this is true we know nothing Jon Snow, but if it is false
-		 * we know the value for sure;
-		 */
-		__mark_reg_known(false_reg, val);
+	{
+		struct bpf_reg_state *reg =
+			opcode == BPF_JEQ ? true_reg : false_reg;
+
+		__mark_reg_known(reg, val);
 		break;
+	}
 	case BPF_JSET:
 		false_reg->var_off = tnum_and(false_reg->var_off,
 					      tnum_const(~val));
@@ -4221,38 +4239,46 @@ static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
 			true_reg->var_off = tnum_or(true_reg->var_off,
 						    tnum_const(val));
 		break;
-	case BPF_JGT:
-		true_reg->umax_value = min(true_reg->umax_value, val - 1);
-		false_reg->umin_value = max(false_reg->umin_value, val);
-		break;
-	case BPF_JSGT:
-		true_reg->smax_value = min_t(s64, true_reg->smax_value, val - 1);
-		false_reg->smin_value = max_t(s64, false_reg->smin_value, val);
-		break;
-	case BPF_JLT:
-		true_reg->umin_value = max(true_reg->umin_value, val + 1);
-		false_reg->umax_value = min(false_reg->umax_value, val);
-		break;
-	case BPF_JSLT:
-		true_reg->smin_value = max_t(s64, true_reg->smin_value, val + 1);
-		false_reg->smax_value = min_t(s64, false_reg->smax_value, val);
-		break;
 	case BPF_JGE:
-		true_reg->umax_value = min(true_reg->umax_value, val);
-		false_reg->umin_value = max(false_reg->umin_value, val + 1);
+	case BPF_JGT:
+	{
+		u64 false_umin = opcode == BPF_JGT ? val    : val + 1;
+		u64 true_umax = opcode == BPF_JGT ? val - 1 : val;
+
+		false_reg->umin_value = max(false_reg->umin_value, false_umin);
+		true_reg->umax_value = min(true_reg->umax_value, true_umax);
 		break;
+	}
 	case BPF_JSGE:
-		true_reg->smax_value = min_t(s64, true_reg->smax_value, val);
-		false_reg->smin_value = max_t(s64, false_reg->smin_value, val + 1);
+	case BPF_JSGT:
+	{
+		s64 false_smin = opcode == BPF_JSGT ? sval    : sval + 1;
+		s64 true_smax = opcode == BPF_JSGT ? sval - 1 : sval;
+
+		false_reg->smin_value = max(false_reg->smin_value, false_smin);
+		true_reg->smax_value = min(true_reg->smax_value, true_smax);
 		break;
+	}
 	case BPF_JLE:
-		true_reg->umin_value = max(true_reg->umin_value, val);
-		false_reg->umax_value = min(false_reg->umax_value, val - 1);
+	case BPF_JLT:
+	{
+		u64 false_umax = opcode == BPF_JLT ? val    : val - 1;
+		u64 true_umin = opcode == BPF_JLT ? val + 1 : val;
+
+		false_reg->umax_value = min(false_reg->umax_value, false_umax);
+		true_reg->umin_value = max(true_reg->umin_value, true_umin);
 		break;
+	}
 	case BPF_JSLE:
-		true_reg->smin_value = max_t(s64, true_reg->smin_value, val);
-		false_reg->smax_value = min_t(s64, false_reg->smax_value, val - 1);
+	case BPF_JSLT:
+	{
+		s64 false_smax = opcode == BPF_JSLT ? sval    : sval - 1;
+		s64 true_smin = opcode == BPF_JSLT ? sval + 1 : sval;
+
+		false_reg->smax_value = min(false_reg->smax_value, false_smax);
+		true_reg->smin_value = max(true_reg->smin_value, true_smin);
 		break;
+	}
 	default:
 		break;
 	}
-- 
2.7.4
^ permalink raw reply related	[flat|nested] 20+ messages in thread
- * [PATCH bpf-next v2 03/16] bpf: verifier support JMP32
  2019-01-21 13:15 [PATCH bpf-next v2 00/16] bpf: propose new jmp32 instructions Jiong Wang
  2019-01-21 13:15 ` [PATCH bpf-next v2 01/16] bpf: allocate 0x06 to new eBPF instruction class JMP32 Jiong Wang
  2019-01-21 13:15 ` [PATCH bpf-next v2 02/16] bpf: refactor verifier min/max code for condition jump Jiong Wang
@ 2019-01-21 13:15 ` Jiong Wang
  2019-01-21 13:15 ` [PATCH bpf-next v2 04/16] bpf: disassembler " Jiong Wang
                   ` (12 subsequent siblings)
  15 siblings, 0 replies; 20+ messages in thread
From: Jiong Wang @ 2019-01-21 13:15 UTC (permalink / raw)
  To: ast, daniel; +Cc: netdev, oss-drivers, Jiong Wang
This patch teach verifier about the new BPF_JMP32 instruction class.
Verifier need to treat it similar as the existing BPF_JMP class.
A BPF_JMP32 insn needs to go through all checks that have been done on
BPF_JMP.
Also, verifier is doing runtime optimizations based on the extra info
conditional jump instruction could offer, especially when the comparison is
between constant and register that the value range of the register could be
improved based on the comparison results. These code are updated
accordingly.
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
---
 kernel/bpf/core.c     |   3 +-
 kernel/bpf/verifier.c | 200 ++++++++++++++++++++++++++++++++++++++++++--------
 2 files changed, 170 insertions(+), 33 deletions(-)
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index f908b93..f978cc0 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -360,7 +360,8 @@ static int bpf_adj_branches(struct bpf_prog *prog, u32 pos, u32 delta,
 			insn++;
 		}
 		code = insn->code;
-		if (BPF_CLASS(code) != BPF_JMP ||
+		if ((BPF_CLASS(code) != BPF_JMP &&
+		     BPF_CLASS(code) != BPF_JMP32) ||
 		    BPF_OP(code) == BPF_EXIT)
 			continue;
 		/* Adjust offset of jmps if we cross patch boundaries. */
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 53f5135..01a43b3 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1095,7 +1095,7 @@ static int check_subprogs(struct bpf_verifier_env *env)
 	for (i = 0; i < insn_cnt; i++) {
 		u8 code = insn[i].code;
 
-		if (BPF_CLASS(code) != BPF_JMP)
+		if (BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32)
 			goto next;
 		if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL)
 			goto next;
@@ -4031,14 +4031,49 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
  *  0 - branch will not be taken and fall-through to next insn
  * -1 - unknown. Example: "if (reg < 5)" is unknown when register value range [0,10]
  */
-static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode)
+static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode,
+			   bool is_jmp32)
 {
+	struct bpf_reg_state reg_lo;
 	s64 sval;
 
 	if (__is_pointer_value(false, reg))
 		return -1;
 
-	sval = (s64)val;
+	if (is_jmp32) {
+		reg_lo = *reg;
+		reg = ®_lo;
+		/* For JMP32, only low 32 bits are compared, coerce_reg_to_size
+		 * could truncate high bits and update umin/umax according to
+		 * information of low bits.
+		 */
+		coerce_reg_to_size(reg, 4);
+		/* smin/smax need special handling. For example, after coerce,
+		 * if smin_value is 0x00000000ffffffffLL, the value is -1 when
+		 * used as operand to JMP32. It is a negative number from s32's
+		 * point of view, while it is a positive number when seen as
+		 * s64. The smin/smax are kept as s64, therefore, when used with
+		 * JMP32, they need to be transformed into s32, then sign
+		 * extended back to s64.
+		 *
+		 * Also, smin/smax were copied from umin/umax. If umin/umax has
+		 * different sign bit, then min/max relationship doesn't
+		 * maintain after casting into s32, for this case, set smin/smax
+		 * to safest range.
+		 */
+		if ((reg->umax_value ^ reg->umin_value) &
+		    (1ULL << 31)) {
+			reg->smin_value = S32_MIN;
+			reg->smax_value = S32_MAX;
+		}
+		reg->smin_value = (s64)(s32)reg->smin_value;
+		reg->smax_value = (s64)(s32)reg->smax_value;
+
+		val = (u32)val;
+		sval = (s64)(s32)val;
+	} else {
+		sval = (s64)val;
+	}
 
 	switch (opcode) {
 	case BPF_JEQ:
@@ -4108,6 +4143,29 @@ static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode)
 	return -1;
 }
 
+/* Generate min value of the high 32-bit from TNUM info. */
+static u64 gen_hi_min(struct tnum var)
+{
+	return var.value & ~0xffffffffULL;
+}
+
+/* Generate max value of the high 32-bit from TNUM info. */
+static u64 gen_hi_max(struct tnum var)
+{
+	return (var.value | var.mask) & ~0xffffffffULL;
+}
+
+/* Return true if VAL is compared with a s64 sign extended from s32, and they
+ * are with the same signedness.
+ */
+static bool cmp_val_with_extended_s64(s64 sval, struct bpf_reg_state *reg)
+{
+	return ((s32)sval >= 0 &&
+		reg->smin_value >= 0 && reg->smax_value <= S32_MAX) ||
+	       ((s32)sval < 0 &&
+		reg->smax_value <= 0 && reg->smin_value >= S32_MIN);
+}
+
 /* Adjusts the register min/max values in the case that the dst_reg is the
  * variable register that we are working on, and src_reg is a constant or we're
  * simply doing a BPF_K check.
@@ -4115,7 +4173,7 @@ static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode)
  */
 static void reg_set_min_max(struct bpf_reg_state *true_reg,
 			    struct bpf_reg_state *false_reg, u64 val,
-			    u8 opcode)
+			    u8 opcode, bool is_jmp32)
 {
 	s64 sval;
 
@@ -4128,7 +4186,8 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg,
 	if (__is_pointer_value(false, false_reg))
 		return;
 
-	sval = (s64)val;
+	val = is_jmp32 ? (u32)val : val;
+	sval = is_jmp32 ? (s64)(s32)val : (s64)val;
 
 	switch (opcode) {
 	case BPF_JEQ:
@@ -4141,7 +4200,15 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg,
 		 * if it is true we know the value for sure. Likewise for
 		 * BPF_JNE.
 		 */
-		__mark_reg_known(reg, val);
+		if (is_jmp32) {
+			u64 old_v = reg->var_off.value;
+			u64 hi_mask = ~0xffffffffULL;
+
+			reg->var_off.value = (old_v & hi_mask) | val;
+			reg->var_off.mask &= hi_mask;
+		} else {
+			__mark_reg_known(reg, val);
+		}
 		break;
 	}
 	case BPF_JSET:
@@ -4157,6 +4224,10 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg,
 		u64 false_umax = opcode == BPF_JGT ? val    : val - 1;
 		u64 true_umin = opcode == BPF_JGT ? val + 1 : val;
 
+		if (is_jmp32) {
+			false_umax += gen_hi_max(false_reg->var_off);
+			true_umin += gen_hi_min(true_reg->var_off);
+		}
 		false_reg->umax_value = min(false_reg->umax_value, false_umax);
 		true_reg->umin_value = max(true_reg->umin_value, true_umin);
 		break;
@@ -4167,6 +4238,11 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg,
 		s64 false_smax = opcode == BPF_JSGT ? sval    : sval - 1;
 		s64 true_smin = opcode == BPF_JSGT ? sval + 1 : sval;
 
+		/* If the full s64 was not sign-extended from s32 then don't
+		 * deduct further info.
+		 */
+		if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg))
+			break;
 		false_reg->smax_value = min(false_reg->smax_value, false_smax);
 		true_reg->smin_value = max(true_reg->smin_value, true_smin);
 		break;
@@ -4177,6 +4253,10 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg,
 		u64 false_umin = opcode == BPF_JLT ? val    : val + 1;
 		u64 true_umax = opcode == BPF_JLT ? val - 1 : val;
 
+		if (is_jmp32) {
+			false_umin += gen_hi_min(false_reg->var_off);
+			true_umax += gen_hi_max(true_reg->var_off);
+		}
 		false_reg->umin_value = max(false_reg->umin_value, false_umin);
 		true_reg->umax_value = min(true_reg->umax_value, true_umax);
 		break;
@@ -4187,6 +4267,8 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg,
 		s64 false_smin = opcode == BPF_JSLT ? sval    : sval + 1;
 		s64 true_smax = opcode == BPF_JSLT ? sval - 1 : sval;
 
+		if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg))
+			break;
 		false_reg->smin_value = max(false_reg->smin_value, false_smin);
 		true_reg->smax_value = min(true_reg->smax_value, true_smax);
 		break;
@@ -4213,14 +4295,15 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg,
  */
 static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
 				struct bpf_reg_state *false_reg, u64 val,
-				u8 opcode)
+				u8 opcode, bool is_jmp32)
 {
 	s64 sval;
 
 	if (__is_pointer_value(false, false_reg))
 		return;
 
-	sval = (s64)val;
+	val = is_jmp32 ? (u32)val : val;
+	sval = is_jmp32 ? (s64)(s32)val : (s64)val;
 
 	switch (opcode) {
 	case BPF_JEQ:
@@ -4229,7 +4312,15 @@ static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
 		struct bpf_reg_state *reg =
 			opcode == BPF_JEQ ? true_reg : false_reg;
 
-		__mark_reg_known(reg, val);
+		if (is_jmp32) {
+			u64 old_v = reg->var_off.value;
+			u64 hi_mask = ~0xffffffffULL;
+
+			reg->var_off.value = (old_v & hi_mask) | val;
+			reg->var_off.mask &= hi_mask;
+		} else {
+			__mark_reg_known(reg, val);
+		}
 		break;
 	}
 	case BPF_JSET:
@@ -4245,6 +4336,10 @@ static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
 		u64 false_umin = opcode == BPF_JGT ? val    : val + 1;
 		u64 true_umax = opcode == BPF_JGT ? val - 1 : val;
 
+		if (is_jmp32) {
+			false_umin += gen_hi_min(false_reg->var_off);
+			true_umax += gen_hi_max(true_reg->var_off);
+		}
 		false_reg->umin_value = max(false_reg->umin_value, false_umin);
 		true_reg->umax_value = min(true_reg->umax_value, true_umax);
 		break;
@@ -4255,6 +4350,8 @@ static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
 		s64 false_smin = opcode == BPF_JSGT ? sval    : sval + 1;
 		s64 true_smax = opcode == BPF_JSGT ? sval - 1 : sval;
 
+		if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg))
+			break;
 		false_reg->smin_value = max(false_reg->smin_value, false_smin);
 		true_reg->smax_value = min(true_reg->smax_value, true_smax);
 		break;
@@ -4265,6 +4362,10 @@ static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
 		u64 false_umax = opcode == BPF_JLT ? val    : val - 1;
 		u64 true_umin = opcode == BPF_JLT ? val + 1 : val;
 
+		if (is_jmp32) {
+			false_umax += gen_hi_max(false_reg->var_off);
+			true_umin += gen_hi_min(true_reg->var_off);
+		}
 		false_reg->umax_value = min(false_reg->umax_value, false_umax);
 		true_reg->umin_value = max(true_reg->umin_value, true_umin);
 		break;
@@ -4275,6 +4376,8 @@ static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
 		s64 false_smax = opcode == BPF_JSLT ? sval    : sval - 1;
 		s64 true_smin = opcode == BPF_JSLT ? sval + 1 : sval;
 
+		if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg))
+			break;
 		false_reg->smax_value = min(false_reg->smax_value, false_smax);
 		true_reg->smin_value = max(true_reg->smin_value, true_smin);
 		break;
@@ -4416,6 +4519,10 @@ static bool try_match_pkt_pointers(const struct bpf_insn *insn,
 	if (BPF_SRC(insn->code) != BPF_X)
 		return false;
 
+	/* Pointers are always 64-bit. */
+	if (BPF_CLASS(insn->code) == BPF_JMP32)
+		return false;
+
 	switch (BPF_OP(insn->code)) {
 	case BPF_JGT:
 		if ((dst_reg->type == PTR_TO_PACKET &&
@@ -4508,16 +4615,18 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
 	struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
 	struct bpf_reg_state *dst_reg, *other_branch_regs;
 	u8 opcode = BPF_OP(insn->code);
+	bool is_jmp32;
 	int err;
 
-	if (opcode > BPF_JSLE) {
-		verbose(env, "invalid BPF_JMP opcode %x\n", opcode);
+	/* Only conditional jumps are expected to reach here. */
+	if (opcode == BPF_JA || opcode > BPF_JSLE) {
+		verbose(env, "invalid BPF_JMP/JMP32 opcode %x\n", opcode);
 		return -EINVAL;
 	}
 
 	if (BPF_SRC(insn->code) == BPF_X) {
 		if (insn->imm != 0) {
-			verbose(env, "BPF_JMP uses reserved fields\n");
+			verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
 			return -EINVAL;
 		}
 
@@ -4533,7 +4642,7 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
 		}
 	} else {
 		if (insn->src_reg != BPF_REG_0) {
-			verbose(env, "BPF_JMP uses reserved fields\n");
+			verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
 			return -EINVAL;
 		}
 	}
@@ -4544,9 +4653,11 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
 		return err;
 
 	dst_reg = ®s[insn->dst_reg];
+	is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
 
 	if (BPF_SRC(insn->code) == BPF_K) {
-		int pred = is_branch_taken(dst_reg, insn->imm, opcode);
+		int pred = is_branch_taken(dst_reg, insn->imm, opcode,
+					   is_jmp32);
 
 		if (pred == 1) {
 			 /* only follow the goto, ignore fall-through */
@@ -4574,30 +4685,51 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
 	 * comparable.
 	 */
 	if (BPF_SRC(insn->code) == BPF_X) {
+		struct bpf_reg_state *src_reg = ®s[insn->src_reg];
+		struct bpf_reg_state lo_reg0 = *dst_reg;
+		struct bpf_reg_state lo_reg1 = *src_reg;
+		struct bpf_reg_state *src_lo, *dst_lo;
+
+		dst_lo = &lo_reg0;
+		src_lo = &lo_reg1;
+		coerce_reg_to_size(dst_lo, 4);
+		coerce_reg_to_size(src_lo, 4);
+
 		if (dst_reg->type == SCALAR_VALUE &&
-		    regs[insn->src_reg].type == SCALAR_VALUE) {
-			if (tnum_is_const(regs[insn->src_reg].var_off))
+		    src_reg->type == SCALAR_VALUE) {
+			if (tnum_is_const(src_reg->var_off) ||
+			    (is_jmp32 && tnum_is_const(src_lo->var_off)))
 				reg_set_min_max(&other_branch_regs[insn->dst_reg],
-						dst_reg, regs[insn->src_reg].var_off.value,
-						opcode);
-			else if (tnum_is_const(dst_reg->var_off))
+						dst_reg,
+						is_jmp32
+						? src_lo->var_off.value
+						: src_reg->var_off.value,
+						opcode, is_jmp32);
+			else if (tnum_is_const(dst_reg->var_off) ||
+				 (is_jmp32 && tnum_is_const(dst_lo->var_off)))
 				reg_set_min_max_inv(&other_branch_regs[insn->src_reg],
-						    ®s[insn->src_reg],
-						    dst_reg->var_off.value, opcode);
-			else if (opcode == BPF_JEQ || opcode == BPF_JNE)
+						    src_reg,
+						    is_jmp32
+						    ? dst_lo->var_off.value
+						    : dst_reg->var_off.value,
+						    opcode, is_jmp32);
+			else if (!is_jmp32 &&
+				 (opcode == BPF_JEQ || opcode == BPF_JNE))
 				/* Comparing for equality, we can combine knowledge */
 				reg_combine_min_max(&other_branch_regs[insn->src_reg],
 						    &other_branch_regs[insn->dst_reg],
-						    ®s[insn->src_reg],
-						    ®s[insn->dst_reg], opcode);
+						    src_reg, dst_reg, opcode);
 		}
 	} else if (dst_reg->type == SCALAR_VALUE) {
 		reg_set_min_max(&other_branch_regs[insn->dst_reg],
-					dst_reg, insn->imm, opcode);
+					dst_reg, insn->imm, opcode, is_jmp32);
 	}
 
-	/* detect if R == 0 where R is returned from bpf_map_lookup_elem() */
-	if (BPF_SRC(insn->code) == BPF_K &&
+	/* detect if R == 0 where R is returned from bpf_map_lookup_elem().
+	 * NOTE: these optimizations below are related with pointer comparison
+	 *       which will never be JMP32.
+	 */
+	if (!is_jmp32 && BPF_SRC(insn->code) == BPF_K &&
 	    insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
 	    reg_type_may_be_null(dst_reg->type)) {
 		/* Mark all identical registers in each branch as either
@@ -4926,7 +5058,8 @@ static int check_cfg(struct bpf_verifier_env *env)
 		goto check_state;
 	t = insn_stack[cur_stack - 1];
 
-	if (BPF_CLASS(insns[t].code) == BPF_JMP) {
+	if (BPF_CLASS(insns[t].code) == BPF_JMP ||
+	    BPF_CLASS(insns[t].code) == BPF_JMP32) {
 		u8 opcode = BPF_OP(insns[t].code);
 
 		if (opcode == BPF_EXIT) {
@@ -6082,7 +6215,7 @@ static int do_check(struct bpf_verifier_env *env)
 			if (err)
 				return err;
 
-		} else if (class == BPF_JMP) {
+		} else if (class == BPF_JMP || class == BPF_JMP32) {
 			u8 opcode = BPF_OP(insn->code);
 
 			if (opcode == BPF_CALL) {
@@ -6090,7 +6223,8 @@ static int do_check(struct bpf_verifier_env *env)
 				    insn->off != 0 ||
 				    (insn->src_reg != BPF_REG_0 &&
 				     insn->src_reg != BPF_PSEUDO_CALL) ||
-				    insn->dst_reg != BPF_REG_0) {
+				    insn->dst_reg != BPF_REG_0 ||
+				    class == BPF_JMP32) {
 					verbose(env, "BPF_CALL uses reserved fields\n");
 					return -EINVAL;
 				}
@@ -6106,7 +6240,8 @@ static int do_check(struct bpf_verifier_env *env)
 				if (BPF_SRC(insn->code) != BPF_K ||
 				    insn->imm != 0 ||
 				    insn->src_reg != BPF_REG_0 ||
-				    insn->dst_reg != BPF_REG_0) {
+				    insn->dst_reg != BPF_REG_0 ||
+				    class == BPF_JMP32) {
 					verbose(env, "BPF_JA uses reserved fields\n");
 					return -EINVAL;
 				}
@@ -6118,7 +6253,8 @@ static int do_check(struct bpf_verifier_env *env)
 				if (BPF_SRC(insn->code) != BPF_K ||
 				    insn->imm != 0 ||
 				    insn->src_reg != BPF_REG_0 ||
-				    insn->dst_reg != BPF_REG_0) {
+				    insn->dst_reg != BPF_REG_0 ||
+				    class == BPF_JMP32) {
 					verbose(env, "BPF_EXIT uses reserved fields\n");
 					return -EINVAL;
 				}
-- 
2.7.4
^ permalink raw reply related	[flat|nested] 20+ messages in thread
- * [PATCH bpf-next v2 04/16] bpf: disassembler support JMP32
  2019-01-21 13:15 [PATCH bpf-next v2 00/16] bpf: propose new jmp32 instructions Jiong Wang
                   ` (2 preceding siblings ...)
  2019-01-21 13:15 ` [PATCH bpf-next v2 03/16] bpf: verifier support JMP32 Jiong Wang
@ 2019-01-21 13:15 ` Jiong Wang
  2019-01-21 13:15 ` [PATCH bpf-next v2 05/16] tools: bpftool: teach cfg code about JMP32 Jiong Wang
                   ` (11 subsequent siblings)
  15 siblings, 0 replies; 20+ messages in thread
From: Jiong Wang @ 2019-01-21 13:15 UTC (permalink / raw)
  To: ast, daniel; +Cc: netdev, oss-drivers, Jiong Wang
This patch teaches disassembler about JMP32. There are two places to
update:
  - Class 0x6 now used by BPF_JMP32, not "unused".
  - BPF_JMP32 need to show comparison operands properly.
    The disassemble format is to add an extra "(32)" before the operands if
    it is a sub-register. A better disassemble format for both JMP32 and
    ALU32 just show the register prefix as "w" instead of "r", this is the
    format using by LLVM assembler.
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
---
 kernel/bpf/disasm.c | 34 +++++++++++++++++++---------------
 1 file changed, 19 insertions(+), 15 deletions(-)
diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c
index d6b7637..de73f55 100644
--- a/kernel/bpf/disasm.c
+++ b/kernel/bpf/disasm.c
@@ -67,7 +67,7 @@ const char *const bpf_class_string[8] = {
 	[BPF_STX]   = "stx",
 	[BPF_ALU]   = "alu",
 	[BPF_JMP]   = "jmp",
-	[BPF_RET]   = "BUG",
+	[BPF_JMP32] = "jmp32",
 	[BPF_ALU64] = "alu64",
 };
 
@@ -136,23 +136,22 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs,
 			else
 				print_bpf_end_insn(verbose, cbs->private_data, insn);
 		} else if (BPF_OP(insn->code) == BPF_NEG) {
-			verbose(cbs->private_data, "(%02x) r%d = %s-r%d\n",
-				insn->code, insn->dst_reg,
-				class == BPF_ALU ? "(u32) " : "",
+			verbose(cbs->private_data, "(%02x) %c%d = -%c%d\n",
+				insn->code, class == BPF_ALU ? 'w' : 'r',
+				insn->dst_reg, class == BPF_ALU ? 'w' : 'r',
 				insn->dst_reg);
 		} else if (BPF_SRC(insn->code) == BPF_X) {
-			verbose(cbs->private_data, "(%02x) %sr%d %s %sr%d\n",
-				insn->code, class == BPF_ALU ? "(u32) " : "",
+			verbose(cbs->private_data, "(%02x) %c%d %s %c%d\n",
+				insn->code, class == BPF_ALU ? 'w' : 'r',
 				insn->dst_reg,
 				bpf_alu_string[BPF_OP(insn->code) >> 4],
-				class == BPF_ALU ? "(u32) " : "",
+				class == BPF_ALU ? 'w' : 'r',
 				insn->src_reg);
 		} else {
-			verbose(cbs->private_data, "(%02x) %sr%d %s %s%d\n",
-				insn->code, class == BPF_ALU ? "(u32) " : "",
+			verbose(cbs->private_data, "(%02x) %c%d %s %d\n",
+				insn->code, class == BPF_ALU ? 'w' : 'r',
 				insn->dst_reg,
 				bpf_alu_string[BPF_OP(insn->code) >> 4],
-				class == BPF_ALU ? "(u32) " : "",
 				insn->imm);
 		}
 	} else if (class == BPF_STX) {
@@ -220,7 +219,7 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs,
 			verbose(cbs->private_data, "BUG_ld_%02x\n", insn->code);
 			return;
 		}
-	} else if (class == BPF_JMP) {
+	} else if (class == BPF_JMP32 || class == BPF_JMP) {
 		u8 opcode = BPF_OP(insn->code);
 
 		if (opcode == BPF_CALL) {
@@ -244,13 +243,18 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs,
 		} else if (insn->code == (BPF_JMP | BPF_EXIT)) {
 			verbose(cbs->private_data, "(%02x) exit\n", insn->code);
 		} else if (BPF_SRC(insn->code) == BPF_X) {
-			verbose(cbs->private_data, "(%02x) if r%d %s r%d goto pc%+d\n",
-				insn->code, insn->dst_reg,
+			verbose(cbs->private_data,
+				"(%02x) if %c%d %s %c%d goto pc%+d\n",
+				insn->code, class == BPF_JMP32 ? 'w' : 'r',
+				insn->dst_reg,
 				bpf_jmp_string[BPF_OP(insn->code) >> 4],
+				class == BPF_JMP32 ? 'w' : 'r',
 				insn->src_reg, insn->off);
 		} else {
-			verbose(cbs->private_data, "(%02x) if r%d %s 0x%x goto pc%+d\n",
-				insn->code, insn->dst_reg,
+			verbose(cbs->private_data,
+				"(%02x) if %c%d %s 0x%x goto pc%+d\n",
+				insn->code, class == BPF_JMP32 ? 'w' : 'r',
+				insn->dst_reg,
 				bpf_jmp_string[BPF_OP(insn->code) >> 4],
 				insn->imm, insn->off);
 		}
-- 
2.7.4
^ permalink raw reply related	[flat|nested] 20+ messages in thread
- * [PATCH bpf-next v2 05/16] tools: bpftool: teach cfg code about JMP32
  2019-01-21 13:15 [PATCH bpf-next v2 00/16] bpf: propose new jmp32 instructions Jiong Wang
                   ` (3 preceding siblings ...)
  2019-01-21 13:15 ` [PATCH bpf-next v2 04/16] bpf: disassembler " Jiong Wang
@ 2019-01-21 13:15 ` Jiong Wang
  2019-01-21 13:15 ` [PATCH bpf-next v2 06/16] bpf: interpreter support for JMP32 Jiong Wang
                   ` (10 subsequent siblings)
  15 siblings, 0 replies; 20+ messages in thread
From: Jiong Wang @ 2019-01-21 13:15 UTC (permalink / raw)
  To: ast, daniel; +Cc: netdev, oss-drivers, Jiong Wang
The cfg code need to be aware of the new JMP32 instruction class so it
could partition functions correctly.
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
---
 tools/bpf/bpftool/cfg.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/tools/bpf/bpftool/cfg.c b/tools/bpf/bpftool/cfg.c
index 31f0db4..3e21f99 100644
--- a/tools/bpf/bpftool/cfg.c
+++ b/tools/bpf/bpftool/cfg.c
@@ -157,6 +157,11 @@ static bool cfg_partition_funcs(struct cfg *cfg, struct bpf_insn *cur,
 	return false;
 }
 
+static bool is_jmp_insn(u8 code)
+{
+	return BPF_CLASS(code) == BPF_JMP || BPF_CLASS(code) == BPF_JMP32;
+}
+
 static bool func_partition_bb_head(struct func_node *func)
 {
 	struct bpf_insn *cur, *end;
@@ -170,7 +175,7 @@ static bool func_partition_bb_head(struct func_node *func)
 		return true;
 
 	for (; cur <= end; cur++) {
-		if (BPF_CLASS(cur->code) == BPF_JMP) {
+		if (is_jmp_insn(cur->code)) {
 			u8 opcode = BPF_OP(cur->code);
 
 			if (opcode == BPF_EXIT || opcode == BPF_CALL)
@@ -296,7 +301,7 @@ static bool func_add_bb_edges(struct func_node *func)
 		e->src = bb;
 
 		insn = bb->tail;
-		if (BPF_CLASS(insn->code) != BPF_JMP ||
+		if (!is_jmp_insn(insn->code) ||
 		    BPF_OP(insn->code) == BPF_EXIT) {
 			e->dst = bb_next(bb);
 			e->flags |= EDGE_FLAG_FALLTHROUGH;
-- 
2.7.4
^ permalink raw reply related	[flat|nested] 20+ messages in thread
- * [PATCH bpf-next v2 06/16] bpf: interpreter support for JMP32
  2019-01-21 13:15 [PATCH bpf-next v2 00/16] bpf: propose new jmp32 instructions Jiong Wang
                   ` (4 preceding siblings ...)
  2019-01-21 13:15 ` [PATCH bpf-next v2 05/16] tools: bpftool: teach cfg code about JMP32 Jiong Wang
@ 2019-01-21 13:15 ` Jiong Wang
  2019-01-21 13:15 ` [PATCH bpf-next v2 07/16] bpf: JIT blinds support JMP32 Jiong Wang
                   ` (9 subsequent siblings)
  15 siblings, 0 replies; 20+ messages in thread
From: Jiong Wang @ 2019-01-21 13:15 UTC (permalink / raw)
  To: ast, daniel; +Cc: netdev, oss-drivers, Jiong Wang
This patch implements interpreting new JMP32 instructions.
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
---
 kernel/bpf/core.c | 197 +++++++++++++++++-------------------------------------
 1 file changed, 63 insertions(+), 134 deletions(-)
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index f978cc0..199612e 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1131,6 +1131,31 @@ EXPORT_SYMBOL_GPL(__bpf_call_base);
 	INSN_2(JMP, CALL),			\
 	/* Exit instruction. */			\
 	INSN_2(JMP, EXIT),			\
+	/* 32-bit Jump instructions. */		\
+	/*   Register based. */			\
+	INSN_3(JMP32, JEQ,  X),			\
+	INSN_3(JMP32, JNE,  X),			\
+	INSN_3(JMP32, JGT,  X),			\
+	INSN_3(JMP32, JLT,  X),			\
+	INSN_3(JMP32, JGE,  X),			\
+	INSN_3(JMP32, JLE,  X),			\
+	INSN_3(JMP32, JSGT, X),			\
+	INSN_3(JMP32, JSLT, X),			\
+	INSN_3(JMP32, JSGE, X),			\
+	INSN_3(JMP32, JSLE, X),			\
+	INSN_3(JMP32, JSET, X),			\
+	/*   Immediate based. */		\
+	INSN_3(JMP32, JEQ,  K),			\
+	INSN_3(JMP32, JNE,  K),			\
+	INSN_3(JMP32, JGT,  K),			\
+	INSN_3(JMP32, JLT,  K),			\
+	INSN_3(JMP32, JGE,  K),			\
+	INSN_3(JMP32, JLE,  K),			\
+	INSN_3(JMP32, JSGT, K),			\
+	INSN_3(JMP32, JSLT, K),			\
+	INSN_3(JMP32, JSGE, K),			\
+	INSN_3(JMP32, JSLE, K),			\
+	INSN_3(JMP32, JSET, K),			\
 	/* Jump instructions. */		\
 	/*   Register based. */			\
 	INSN_3(JMP, JEQ,  X),			\
@@ -1391,145 +1416,49 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack)
 out:
 		CONT;
 	}
-	/* JMP */
 	JMP_JA:
 		insn += insn->off;
 		CONT;
-	JMP_JEQ_X:
-		if (DST == SRC) {
-			insn += insn->off;
-			CONT_JMP;
-		}
-		CONT;
-	JMP_JEQ_K:
-		if (DST == IMM) {
-			insn += insn->off;
-			CONT_JMP;
-		}
-		CONT;
-	JMP_JNE_X:
-		if (DST != SRC) {
-			insn += insn->off;
-			CONT_JMP;
-		}
-		CONT;
-	JMP_JNE_K:
-		if (DST != IMM) {
-			insn += insn->off;
-			CONT_JMP;
-		}
-		CONT;
-	JMP_JGT_X:
-		if (DST > SRC) {
-			insn += insn->off;
-			CONT_JMP;
-		}
-		CONT;
-	JMP_JGT_K:
-		if (DST > IMM) {
-			insn += insn->off;
-			CONT_JMP;
-		}
-		CONT;
-	JMP_JLT_X:
-		if (DST < SRC) {
-			insn += insn->off;
-			CONT_JMP;
-		}
-		CONT;
-	JMP_JLT_K:
-		if (DST < IMM) {
-			insn += insn->off;
-			CONT_JMP;
-		}
-		CONT;
-	JMP_JGE_X:
-		if (DST >= SRC) {
-			insn += insn->off;
-			CONT_JMP;
-		}
-		CONT;
-	JMP_JGE_K:
-		if (DST >= IMM) {
-			insn += insn->off;
-			CONT_JMP;
-		}
-		CONT;
-	JMP_JLE_X:
-		if (DST <= SRC) {
-			insn += insn->off;
-			CONT_JMP;
-		}
-		CONT;
-	JMP_JLE_K:
-		if (DST <= IMM) {
-			insn += insn->off;
-			CONT_JMP;
-		}
-		CONT;
-	JMP_JSGT_X:
-		if (((s64) DST) > ((s64) SRC)) {
-			insn += insn->off;
-			CONT_JMP;
-		}
-		CONT;
-	JMP_JSGT_K:
-		if (((s64) DST) > ((s64) IMM)) {
-			insn += insn->off;
-			CONT_JMP;
-		}
-		CONT;
-	JMP_JSLT_X:
-		if (((s64) DST) < ((s64) SRC)) {
-			insn += insn->off;
-			CONT_JMP;
-		}
-		CONT;
-	JMP_JSLT_K:
-		if (((s64) DST) < ((s64) IMM)) {
-			insn += insn->off;
-			CONT_JMP;
-		}
-		CONT;
-	JMP_JSGE_X:
-		if (((s64) DST) >= ((s64) SRC)) {
-			insn += insn->off;
-			CONT_JMP;
-		}
-		CONT;
-	JMP_JSGE_K:
-		if (((s64) DST) >= ((s64) IMM)) {
-			insn += insn->off;
-			CONT_JMP;
-		}
-		CONT;
-	JMP_JSLE_X:
-		if (((s64) DST) <= ((s64) SRC)) {
-			insn += insn->off;
-			CONT_JMP;
-		}
-		CONT;
-	JMP_JSLE_K:
-		if (((s64) DST) <= ((s64) IMM)) {
-			insn += insn->off;
-			CONT_JMP;
-		}
-		CONT;
-	JMP_JSET_X:
-		if (DST & SRC) {
-			insn += insn->off;
-			CONT_JMP;
-		}
-		CONT;
-	JMP_JSET_K:
-		if (DST & IMM) {
-			insn += insn->off;
-			CONT_JMP;
-		}
-		CONT;
 	JMP_EXIT:
 		return BPF_R0;
-
+	/* JMP */
+#define COND_JMP(SIGN, OPCODE, CMP_OP)				\
+	JMP_##OPCODE##_X:					\
+		if ((SIGN##64) DST CMP_OP (SIGN##64) SRC) {	\
+			insn += insn->off;			\
+			CONT_JMP;				\
+		}						\
+		CONT;						\
+	JMP32_##OPCODE##_X:					\
+		if ((SIGN##32) DST CMP_OP (SIGN##32) SRC) {	\
+			insn += insn->off;			\
+			CONT_JMP;				\
+		}						\
+		CONT;						\
+	JMP_##OPCODE##_K:					\
+		if ((SIGN##64) DST CMP_OP (SIGN##64) IMM) {	\
+			insn += insn->off;			\
+			CONT_JMP;				\
+		}						\
+		CONT;						\
+	JMP32_##OPCODE##_K:					\
+		if ((SIGN##32) DST CMP_OP (SIGN##32) IMM) {	\
+			insn += insn->off;			\
+			CONT_JMP;				\
+		}						\
+		CONT;
+	COND_JMP(u, JEQ, ==)
+	COND_JMP(u, JNE, !=)
+	COND_JMP(u, JGT, >)
+	COND_JMP(u, JLT, <)
+	COND_JMP(u, JGE, >=)
+	COND_JMP(u, JLE, <=)
+	COND_JMP(u, JSET, &)
+	COND_JMP(s, JSGT, >)
+	COND_JMP(s, JSLT, <)
+	COND_JMP(s, JSGE, >=)
+	COND_JMP(s, JSLE, <=)
+#undef COND_JMP
 	/* STX and ST and LDX*/
 #define LDST(SIZEOP, SIZE)						\
 	STX_MEM_##SIZEOP:						\
-- 
2.7.4
^ permalink raw reply related	[flat|nested] 20+ messages in thread
- * [PATCH bpf-next v2 07/16] bpf: JIT blinds support JMP32
  2019-01-21 13:15 [PATCH bpf-next v2 00/16] bpf: propose new jmp32 instructions Jiong Wang
                   ` (5 preceding siblings ...)
  2019-01-21 13:15 ` [PATCH bpf-next v2 06/16] bpf: interpreter support for JMP32 Jiong Wang
@ 2019-01-21 13:15 ` Jiong Wang
  2019-01-21 13:15 ` [PATCH bpf-next v2 08/16] bpf: functional and min/max reasoning unit tests for JMP32 Jiong Wang
                   ` (8 subsequent siblings)
  15 siblings, 0 replies; 20+ messages in thread
From: Jiong Wang @ 2019-01-21 13:15 UTC (permalink / raw)
  To: ast, daniel; +Cc: netdev, oss-drivers, Jiong Wang
This patch adds JIT blinds support for JMP32.
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
---
 include/linux/filter.h | 20 ++++++++++++++++++++
 kernel/bpf/core.c      | 21 +++++++++++++++++++++
 2 files changed, 41 insertions(+)
diff --git a/include/linux/filter.h b/include/linux/filter.h
index ad106d8..248dbc5 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -277,6 +277,26 @@ struct sock_reuseport;
 		.off   = OFF,					\
 		.imm   = IMM })
 
+/* Like BPF_JMP_REG, but with 32-bit wide operands for comparison. */
+
+#define BPF_JMP32_REG(OP, DST, SRC, OFF)			\
+	((struct bpf_insn) {					\
+		.code  = BPF_JMP32 | BPF_OP(OP) | BPF_X,	\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
+		.off   = OFF,					\
+		.imm   = 0 })
+
+/* Like BPF_JMP_IMM, but with 32-bit wide operands for comparison. */
+
+#define BPF_JMP32_IMM(OP, DST, IMM, OFF)			\
+	((struct bpf_insn) {					\
+		.code  = BPF_JMP32 | BPF_OP(OP) | BPF_K,	\
+		.dst_reg = DST,					\
+		.src_reg = 0,					\
+		.off   = OFF,					\
+		.imm   = IMM })
+
 /* Unconditional jumps, goto pc + off16 */
 
 #define BPF_JMP_A(OFF)						\
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 199612e..84a8b25 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -935,6 +935,27 @@ static int bpf_jit_blind_insn(const struct bpf_insn *from,
 		*to++ = BPF_JMP_REG(from->code, from->dst_reg, BPF_REG_AX, off);
 		break;
 
+	case BPF_JMP32 | BPF_JEQ  | BPF_K:
+	case BPF_JMP32 | BPF_JNE  | BPF_K:
+	case BPF_JMP32 | BPF_JGT  | BPF_K:
+	case BPF_JMP32 | BPF_JLT  | BPF_K:
+	case BPF_JMP32 | BPF_JGE  | BPF_K:
+	case BPF_JMP32 | BPF_JLE  | BPF_K:
+	case BPF_JMP32 | BPF_JSGT | BPF_K:
+	case BPF_JMP32 | BPF_JSLT | BPF_K:
+	case BPF_JMP32 | BPF_JSGE | BPF_K:
+	case BPF_JMP32 | BPF_JSLE | BPF_K:
+	case BPF_JMP32 | BPF_JSET | BPF_K:
+		/* Accommodate for extra offset in case of a backjump. */
+		off = from->off;
+		if (off < 0)
+			off -= 2;
+		*to++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
+		*to++ = BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
+		*to++ = BPF_JMP32_REG(from->code, from->dst_reg, BPF_REG_AX,
+				      off);
+		break;
+
 	case BPF_LD | BPF_IMM | BPF_DW:
 		*to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ aux[1].imm);
 		*to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
-- 
2.7.4
^ permalink raw reply related	[flat|nested] 20+ messages in thread
- * [PATCH bpf-next v2 08/16] bpf: functional and min/max reasoning unit tests for JMP32
  2019-01-21 13:15 [PATCH bpf-next v2 00/16] bpf: propose new jmp32 instructions Jiong Wang
                   ` (6 preceding siblings ...)
  2019-01-21 13:15 ` [PATCH bpf-next v2 07/16] bpf: JIT blinds support JMP32 Jiong Wang
@ 2019-01-21 13:15 ` Jiong Wang
  2019-01-23 14:29   ` Jiong Wang
  2019-01-21 13:15 ` [PATCH bpf-next v2 09/16] x86_64: bpf: implement jitting of JMP32 Jiong Wang
                   ` (7 subsequent siblings)
  15 siblings, 1 reply; 20+ messages in thread
From: Jiong Wang @ 2019-01-21 13:15 UTC (permalink / raw)
  To: ast, daniel; +Cc: netdev, oss-drivers, Jiong Wang
This patch adds unit tests for new JMP32 instructions.
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
---
 samples/bpf/bpf_insn.h                      |  20 +
 tools/include/linux/filter.h                |  20 +
 tools/testing/selftests/bpf/test_verifier.c | 786 +++++++++++++++++++++++++++-
 3 files changed, 802 insertions(+), 24 deletions(-)
diff --git a/samples/bpf/bpf_insn.h b/samples/bpf/bpf_insn.h
index 20dc5ce..5442379 100644
--- a/samples/bpf/bpf_insn.h
+++ b/samples/bpf/bpf_insn.h
@@ -164,6 +164,16 @@ struct bpf_insn;
 		.off   = OFF,					\
 		.imm   = 0 })
 
+/* Like BPF_JMP_REG, but with 32-bit wide operands for comparison. */
+
+#define BPF_JMP32_REG(OP, DST, SRC, OFF)			\
+	((struct bpf_insn) {					\
+		.code  = BPF_JMP32 | BPF_OP(OP) | BPF_X,	\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
+		.off   = OFF,					\
+		.imm   = 0 })
+
 /* Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + off16 */
 
 #define BPF_JMP_IMM(OP, DST, IMM, OFF)				\
@@ -174,6 +184,16 @@ struct bpf_insn;
 		.off   = OFF,					\
 		.imm   = IMM })
 
+/* Like BPF_JMP_IMM, but with 32-bit wide operands for comparison. */
+
+#define BPF_JMP32_IMM(OP, DST, IMM, OFF)			\
+	((struct bpf_insn) {					\
+		.code  = BPF_JMP32 | BPF_OP(OP) | BPF_K,	\
+		.dst_reg = DST,					\
+		.src_reg = 0,					\
+		.off   = OFF,					\
+		.imm   = IMM })
+
 /* Raw code statement block */
 
 #define BPF_RAW_INSN(CODE, DST, SRC, OFF, IMM)			\
diff --git a/tools/include/linux/filter.h b/tools/include/linux/filter.h
index af55acf..cce0b02 100644
--- a/tools/include/linux/filter.h
+++ b/tools/include/linux/filter.h
@@ -199,6 +199,16 @@
 		.off   = OFF,					\
 		.imm   = 0 })
 
+/* Like BPF_JMP_REG, but with 32-bit wide operands for comparison. */
+
+#define BPF_JMP32_REG(OP, DST, SRC, OFF)			\
+	((struct bpf_insn) {					\
+		.code  = BPF_JMP32 | BPF_OP(OP) | BPF_X,	\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
+		.off   = OFF,					\
+		.imm   = 0 })
+
 /* Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + off16 */
 
 #define BPF_JMP_IMM(OP, DST, IMM, OFF)				\
@@ -209,6 +219,16 @@
 		.off   = OFF,					\
 		.imm   = IMM })
 
+/* Like BPF_JMP_IMM, but with 32-bit wide operands for comparison. */
+
+#define BPF_JMP32_IMM(OP, DST, IMM, OFF)			\
+	((struct bpf_insn) {					\
+		.code  = BPF_JMP32 | BPF_OP(OP) | BPF_K,	\
+		.dst_reg = DST,					\
+		.src_reg = 0,					\
+		.off   = OFF,					\
+		.imm   = IMM })
+
 /* Unconditional jumps, goto pc + off16 */
 
 #define BPF_JMP_A(OFF)						\
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 2fd90d4..7bf8003 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -211,6 +211,42 @@ static void bpf_fill_rand_ld_dw(struct bpf_test *self)
 	BPF_MOV64_IMM(BPF_REG_5, 0),					\
 	BPF_EMIT_CALL(BPF_FUNC_sk_lookup_tcp)
 
+/* BPF_DIRECT_PKT_R2 contains 7 instructions, it initializes default return
+ * value into 0 and does necessary preparation for direct packet access
+ * through r2. The allowed access range is 8 bytes.
+ */
+#define BPF_DIRECT_PKT_R2						\
+	BPF_MOV64_IMM(BPF_REG_0, 0),					\
+	BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,			\
+		    offsetof(struct __sk_buff, data)),			\
+	BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,			\
+		    offsetof(struct __sk_buff, data_end)),		\
+	BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),				\
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8),				\
+	BPF_JMP_REG(BPF_JLE, BPF_REG_4, BPF_REG_3, 1),			\
+	BPF_EXIT_INSN()
+
+/* BPF_RAND_UEXT_R7 contains 4 instructions, it initializes R7 into a random
+ * positive u32, and zero-extend it into 64-bit.
+ */
+#define BPF_RAND_UEXT_R7						\
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,			\
+		     BPF_FUNC_get_prandom_u32),				\
+	BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),				\
+	BPF_ALU64_IMM(BPF_LSH, BPF_REG_7, 33),				\
+	BPF_ALU64_IMM(BPF_RSH, BPF_REG_7, 33)
+
+/* BPF_RAND_SEXT_R7 contains 5 instructions, it initializes R7 into a random
+ * negative u32, and sign-extend it into 64-bit.
+ */
+#define BPF_RAND_SEXT_R7						\
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,			\
+		     BPF_FUNC_get_prandom_u32),				\
+	BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),				\
+	BPF_ALU64_IMM(BPF_OR, BPF_REG_7, 0x80000000)			\
+	BPF_ALU64_IMM(BPF_LSH, BPF_REG_7, 32),				\
+	BPF_ALU64_IMM(BPF_ARSH, BPF_REG_7, 32),
+
 static struct bpf_test tests[] = {
 	{
 		"add+sub+mul",
@@ -15411,18 +15447,7 @@ static struct bpf_test tests[] = {
 	{
 		"jset: functional",
 		.insns = {
-			/* r0 = 0 */
-			BPF_MOV64_IMM(BPF_REG_0, 0),
-			/* prep for direct packet access via r2 */
-			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
-				    offsetof(struct __sk_buff, data)),
-			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
-				    offsetof(struct __sk_buff, data_end)),
-			BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
-			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8),
-			BPF_JMP_REG(BPF_JLE, BPF_REG_4, BPF_REG_3, 1),
-			BPF_EXIT_INSN(),
-
+			BPF_DIRECT_PKT_R2,
 			BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_2, 0),
 
 			/* reg, bit 63 or bit 0 set, taken */
@@ -15478,18 +15503,7 @@ static struct bpf_test tests[] = {
 	{
 		"jset: sign-extend",
 		.insns = {
-			/* r0 = 0 */
-			BPF_MOV64_IMM(BPF_REG_0, 0),
-			/* prep for direct packet access via r2 */
-			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
-				    offsetof(struct __sk_buff, data)),
-			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
-				    offsetof(struct __sk_buff, data_end)),
-			BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
-			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8),
-			BPF_JMP_REG(BPF_JLE, BPF_REG_4, BPF_REG_3, 1),
-			BPF_EXIT_INSN(),
-
+			BPF_DIRECT_PKT_R2,
 			BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_2, 0),
 
 			BPF_JMP_IMM(BPF_JSET, BPF_REG_7, 0x80000000, 1),
@@ -15599,6 +15613,730 @@ static struct bpf_test tests[] = {
 		.result_unpriv = ACCEPT,
 		.result = ACCEPT,
 	},
+	{
+		"jset32: BPF_K",
+		.insns = {
+			BPF_DIRECT_PKT_R2,
+			BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_2, 0),
+			/* reg, high bits shouldn't be tested */
+			BPF_JMP32_IMM(BPF_JSET, BPF_REG_7, -2, 1),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+			BPF_EXIT_INSN(),
+
+			BPF_JMP32_IMM(BPF_JSET, BPF_REG_7, 1, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_0, 2),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.runs = 3,
+		.retvals = {
+			{ .retval = 0,
+			  .data64 = { 1ULL << 63, }
+			},
+			{ .retval = 2,
+			  .data64 = { 1, }
+			},
+			{ .retval = 2,
+			  .data64 = { 1ULL << 63 | 1, }
+			},
+		},
+	},
+	{
+		"jset32: BPF_X",
+		.insns = {
+			BPF_DIRECT_PKT_R2,
+			BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_2, 0),
+			BPF_LD_IMM64(BPF_REG_8, 0x8000000000000000),
+			BPF_JMP32_REG(BPF_JSET, BPF_REG_7, BPF_REG_8, 1),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+			BPF_EXIT_INSN(),
+
+			BPF_LD_IMM64(BPF_REG_8, 0x8000000000000001),
+			BPF_JMP32_REG(BPF_JSET, BPF_REG_7, BPF_REG_8, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_0, 2),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.runs = 3,
+		.retvals = {
+			{ .retval = 0,
+			  .data64 = { 1ULL << 63, }
+			},
+			{ .retval = 2,
+			  .data64 = { 1, }
+			},
+			{ .retval = 2,
+			  .data64 = { 1ULL << 63 | 1, }
+			},
+		},
+	},
+	{
+		"jset32: min/max deduction",
+		.insns = {
+			BPF_RAND_UEXT_R7,
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_JMP32_IMM(BPF_JSET, BPF_REG_7, 0x10, 1),
+			BPF_EXIT_INSN(),
+			BPF_JMP32_IMM(BPF_JGE, BPF_REG_7, 0x10, 1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+	},
+	{
+		"jeq32: BPF_K",
+		.insns = {
+			BPF_DIRECT_PKT_R2,
+			BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_2, 0),
+			BPF_JMP32_IMM(BPF_JEQ, BPF_REG_7, -1, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_0, 2),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.runs = 2,
+		.retvals = {
+			{ .retval = 0,
+			  .data64 = { -2, }
+			},
+			{ .retval = 2,
+			  .data64 = { -1, }
+			},
+		},
+	},
+	{
+		"jeq32: BPF_X",
+		.insns = {
+			BPF_DIRECT_PKT_R2,
+			BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_2, 0),
+			BPF_LD_IMM64(BPF_REG_8, 0x7000000000000001),
+			BPF_JMP32_REG(BPF_JEQ, BPF_REG_7, BPF_REG_8, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_0, 2),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.runs = 3,
+		.retvals = {
+			{ .retval = 0,
+			  .data64 = { 2, }
+			},
+			{ .retval = 2,
+			  .data64 = { 1, }
+			},
+			{ .retval = 2,
+			  .data64 = { 1ULL << 63 | 1, }
+			},
+		},
+	},
+	{
+		"jeq32: min/max deduction",
+		.insns = {
+			BPF_RAND_UEXT_R7,
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_JMP32_IMM(BPF_JEQ, BPF_REG_7, 0x10, 1),
+			BPF_EXIT_INSN(),
+			BPF_JMP32_IMM(BPF_JSGE, BPF_REG_7, 0xf, 1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+	},
+	{
+		"jne32: BPF_K",
+		.insns = {
+			BPF_DIRECT_PKT_R2,
+			BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_2, 0),
+			BPF_JMP32_IMM(BPF_JNE, BPF_REG_7, -1, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_0, 2),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.runs = 2,
+		.retvals = {
+			{ .retval = 2,
+			  .data64 = { 1, }
+			},
+			{ .retval = 0,
+			  .data64 = { -1, }
+			},
+		},
+	},
+	{
+		"jne32: BPF_X",
+		.insns = {
+			BPF_DIRECT_PKT_R2,
+			BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_2, 0),
+			BPF_LD_IMM64(BPF_REG_8, 0x8000000000000001),
+			BPF_JMP32_REG(BPF_JNE, BPF_REG_7, BPF_REG_8, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_0, 2),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.runs = 3,
+		.retvals = {
+			{ .retval = 0,
+			  .data64 = { 1, }
+			},
+			{ .retval = 2,
+			  .data64 = { 2, }
+			},
+			{ .retval = 2,
+			  .data64 = { 1ULL << 63 | 2, }
+			},
+		},
+	},
+	{
+		"jne32: min/max deduction",
+		.insns = {
+			BPF_RAND_UEXT_R7,
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_JMP32_IMM(BPF_JNE, BPF_REG_7, 0x10, 1),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x10, 1),
+			BPF_EXIT_INSN(),
+			BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+	},
+	{
+		"jge32: BPF_K",
+		.insns = {
+			BPF_DIRECT_PKT_R2,
+			BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_2, 0),
+			BPF_JMP32_IMM(BPF_JGE, BPF_REG_7, UINT_MAX - 1, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 2),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.runs = 3,
+		.retvals = {
+			{ .retval = 2,
+			  .data64 = { UINT_MAX, }
+			},
+			{ .retval = 2,
+			  .data64 = { UINT_MAX - 1, }
+			},
+			{ .retval = 0,
+			  .data64 = { 0, }
+			},
+		},
+	},
+	{
+		"jge32: BPF_X",
+		.insns = {
+			BPF_DIRECT_PKT_R2,
+			BPF_LD_IMM64(BPF_REG_8, UINT_MAX | 1ULL << 32),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_2, 0),
+			BPF_JMP32_REG(BPF_JGE, BPF_REG_7, BPF_REG_8, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 2),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.runs = 3,
+		.retvals = {
+			{ .retval = 2,
+			  .data64 = { UINT_MAX, }
+			},
+			{ .retval = 0,
+			  .data64 = { INT_MAX, }
+			},
+			{ .retval = 0,
+			  .data64 = { (UINT_MAX - 1) | 2ULL << 32, }
+			},
+		},
+	},
+	{
+		"jge32: min/max deduction",
+		.insns = {
+			BPF_RAND_UEXT_R7,
+			BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 2),
+			BPF_LD_IMM64(BPF_REG_8, 0x7ffffff0 | 1ULL << 32),
+			BPF_JMP32_REG(BPF_JGE, BPF_REG_7, BPF_REG_8, 1),
+			BPF_EXIT_INSN(),
+			BPF_JMP32_IMM(BPF_JGE, BPF_REG_7, 0x7ffffff0, 1),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.retval = 2,
+	},
+	{
+		"jgt32: BPF_K",
+		.insns = {
+			BPF_DIRECT_PKT_R2,
+			BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_2, 0),
+			BPF_JMP32_IMM(BPF_JGT, BPF_REG_7, UINT_MAX - 1, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 2),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.runs = 3,
+		.retvals = {
+			{ .retval = 2,
+			  .data64 = { UINT_MAX, }
+			},
+			{ .retval = 0,
+			  .data64 = { UINT_MAX - 1, }
+			},
+			{ .retval = 0,
+			  .data64 = { 0, }
+			},
+		},
+	},
+	{
+		"jgt32: BPF_X",
+		.insns = {
+			BPF_DIRECT_PKT_R2,
+			BPF_LD_IMM64(BPF_REG_8, (UINT_MAX - 1) | 1ULL << 32),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_2, 0),
+			BPF_JMP32_REG(BPF_JGT, BPF_REG_7, BPF_REG_8, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 2),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.runs = 3,
+		.retvals = {
+			{ .retval = 2,
+			  .data64 = { UINT_MAX, }
+			},
+			{ .retval = 0,
+			  .data64 = { UINT_MAX - 1, }
+			},
+			{ .retval = 0,
+			  .data64 = { (UINT_MAX - 1) | 2ULL << 32, }
+			},
+		},
+	},
+	{
+		"jgt32: min/max deduction",
+		.insns = {
+			BPF_RAND_UEXT_R7,
+			BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 2),
+			BPF_LD_IMM64(BPF_REG_8, 0x7ffffff0 | 1ULL << 32),
+			BPF_JMP32_REG(BPF_JGT, BPF_REG_7, BPF_REG_8, 1),
+			BPF_EXIT_INSN(),
+			BPF_JMP_IMM(BPF_JGT, BPF_REG_7, 0x7ffffff0, 1),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.retval = 2,
+	},
+	{
+		"jle32: BPF_K",
+		.insns = {
+			BPF_DIRECT_PKT_R2,
+			BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_2, 0),
+			BPF_JMP32_IMM(BPF_JLE, BPF_REG_7, INT_MAX, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 2),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.runs = 3,
+		.retvals = {
+			{ .retval = 2,
+			  .data64 = { INT_MAX - 1, }
+			},
+			{ .retval = 0,
+			  .data64 = { UINT_MAX, }
+			},
+			{ .retval = 2,
+			  .data64 = { INT_MAX, }
+			},
+		},
+	},
+	{
+		"jle32: BPF_X",
+		.insns = {
+			BPF_DIRECT_PKT_R2,
+			BPF_LD_IMM64(BPF_REG_8, (INT_MAX - 1) | 2ULL << 32),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_2, 0),
+			BPF_JMP32_REG(BPF_JLE, BPF_REG_7, BPF_REG_8, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 2),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.runs = 3,
+		.retvals = {
+			{ .retval = 0,
+			  .data64 = { INT_MAX | 1ULL << 32, }
+			},
+			{ .retval = 2,
+			  .data64 = { INT_MAX - 2, }
+			},
+			{ .retval = 0,
+			  .data64 = { UINT_MAX, }
+			},
+		},
+	},
+	{
+		"jle32: min/max deduction",
+		.insns = {
+			BPF_RAND_UEXT_R7,
+			BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 2),
+			BPF_LD_IMM64(BPF_REG_8, 0x7ffffff0 | 1ULL << 32),
+			BPF_JMP32_REG(BPF_JLE, BPF_REG_7, BPF_REG_8, 1),
+			BPF_EXIT_INSN(),
+			BPF_JMP32_IMM(BPF_JLE, BPF_REG_7, 0x7ffffff0, 1),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.retval = 2,
+	},
+	{
+		"jlt32: BPF_K",
+		.insns = {
+			BPF_DIRECT_PKT_R2,
+			BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_2, 0),
+			BPF_JMP32_IMM(BPF_JLT, BPF_REG_7, INT_MAX, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 2),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.runs = 3,
+		.retvals = {
+			{ .retval = 0,
+			  .data64 = { INT_MAX, }
+			},
+			{ .retval = 0,
+			  .data64 = { UINT_MAX, }
+			},
+			{ .retval = 2,
+			  .data64 = { INT_MAX - 1, }
+			},
+		},
+	},
+	{
+		"jlt32: BPF_X",
+		.insns = {
+			BPF_DIRECT_PKT_R2,
+			BPF_LD_IMM64(BPF_REG_8, INT_MAX | 2ULL << 32),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_2, 0),
+			BPF_JMP32_REG(BPF_JLT, BPF_REG_7, BPF_REG_8, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 2),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.runs = 3,
+		.retvals = {
+			{ .retval = 0,
+			  .data64 = { INT_MAX | 1ULL << 32, }
+			},
+			{ .retval = 0,
+			  .data64 = { UINT_MAX, }
+			},
+			{ .retval = 2,
+			  .data64 = { (INT_MAX - 1) | 3ULL << 32, }
+			},
+		},
+	},
+	{
+		"jlt32: min/max deduction",
+		.insns = {
+			BPF_RAND_UEXT_R7,
+			BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 2),
+			BPF_LD_IMM64(BPF_REG_8, 0x7ffffff0 | 1ULL << 32),
+			BPF_JMP32_REG(BPF_JLT, BPF_REG_7, BPF_REG_8, 1),
+			BPF_EXIT_INSN(),
+			BPF_JMP_IMM(BPF_JSLT, BPF_REG_7, 0x7ffffff0, 1),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.retval = 2,
+	},
+	{
+		"jsge32: BPF_K",
+		.insns = {
+			BPF_DIRECT_PKT_R2,
+			BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_2, 0),
+			BPF_JMP32_IMM(BPF_JSGE, BPF_REG_7, -1, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 2),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.runs = 3,
+		.retvals = {
+			{ .retval = 2,
+			  .data64 = { 0, }
+			},
+			{ .retval = 2,
+			  .data64 = { -1, }
+			},
+			{ .retval = 0,
+			  .data64 = { -2, }
+			},
+		},
+	},
+	{
+		"jsge32: BPF_X",
+		.insns = {
+			BPF_DIRECT_PKT_R2,
+			BPF_LD_IMM64(BPF_REG_8, (__u32)-1 | 2ULL << 32),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_2, 0),
+			BPF_JMP32_REG(BPF_JSGE, BPF_REG_7, BPF_REG_8, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 2),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.runs = 3,
+		.retvals = {
+			{ .retval = 2,
+			  .data64 = { -1, }
+			},
+			{ .retval = 2,
+			  .data64 = { 0x7fffffff | 1ULL << 32, }
+			},
+			{ .retval = 0,
+			  .data64 = { -2, }
+			},
+		},
+	},
+	{
+		"jsge32: min/max deduction",
+		.insns = {
+			BPF_RAND_UEXT_R7,
+			BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 2),
+			BPF_LD_IMM64(BPF_REG_8, 0x7ffffff0 | 1ULL << 32),
+			BPF_JMP32_REG(BPF_JSGE, BPF_REG_7, BPF_REG_8, 1),
+			BPF_EXIT_INSN(),
+			BPF_JMP_IMM(BPF_JSGE, BPF_REG_7, 0x7ffffff0, 1),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.retval = 2,
+	},
+	{
+		"jsgt32: BPF_K",
+		.insns = {
+			BPF_DIRECT_PKT_R2,
+			BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_2, 0),
+			BPF_JMP32_IMM(BPF_JSGT, BPF_REG_7, -1, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 2),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.runs = 3,
+		.retvals = {
+			{ .retval = 0,
+			  .data64 = { (__u32)-2, }
+			},
+			{ .retval = 0,
+			  .data64 = { -1, }
+			},
+			{ .retval = 2,
+			  .data64 = { 1, }
+			},
+		},
+	},
+	{
+		"jsgt32: BPF_X",
+		.insns = {
+			BPF_DIRECT_PKT_R2,
+			BPF_LD_IMM64(BPF_REG_8, 0x7ffffffe | 1ULL << 32),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_2, 0),
+			BPF_JMP32_REG(BPF_JSGT, BPF_REG_7, BPF_REG_8, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 2),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.runs = 3,
+		.retvals = {
+			{ .retval = 0,
+			  .data64 = { 0x7ffffffe, }
+			},
+			{ .retval = 0,
+			  .data64 = { 0x1ffffffffULL, }
+			},
+			{ .retval = 2,
+			  .data64 = { 0x7fffffff, }
+			},
+		},
+	},
+	{
+		"jsgt32: min/max deduction",
+		.insns = {
+			BPF_RAND_SEXT_R7,
+			BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 2),
+			BPF_LD_IMM64(BPF_REG_8, (__u32)(-2) | 1ULL << 32),
+			BPF_JMP32_REG(BPF_JSGT, BPF_REG_7, BPF_REG_8, 1),
+			BPF_EXIT_INSN(),
+			BPF_JMP_IMM(BPF_JSGT, BPF_REG_7, -2, 1),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.retval = 2,
+	},
+	{
+		"jsle32: BPF_K",
+		.insns = {
+			BPF_DIRECT_PKT_R2,
+			BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_2, 0),
+			BPF_JMP32_IMM(BPF_JSLE, BPF_REG_7, -1, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 2),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.runs = 3,
+		.retvals = {
+			{ .retval = 2,
+			  .data64 = { (__u32)-2, }
+			},
+			{ .retval = 2,
+			  .data64 = { -1, }
+			},
+			{ .retval = 0,
+			  .data64 = { 1, }
+			},
+		},
+	},
+	{
+		"jsle32: BPF_X",
+		.insns = {
+			BPF_DIRECT_PKT_R2,
+			BPF_LD_IMM64(BPF_REG_8, 0x7ffffffe | 1ULL << 32),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_2, 0),
+			BPF_JMP32_REG(BPF_JSLE, BPF_REG_7, BPF_REG_8, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 2),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.runs = 3,
+		.retvals = {
+			{ .retval = 2,
+			  .data64 = { 0x7ffffffe, }
+			},
+			{ .retval = 2,
+			  .data64 = { (__u32)-1, }
+			},
+			{ .retval = 0,
+			  .data64 = { 0x7fffffff | 2ULL << 32, }
+			},
+		},
+	},
+	{
+		"jsle32: min/max deduction",
+		.insns = {
+			BPF_RAND_UEXT_R7,
+			BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 2),
+			BPF_LD_IMM64(BPF_REG_8, 0x7ffffff0 | 1ULL << 32),
+			BPF_JMP32_REG(BPF_JSLE, BPF_REG_7, BPF_REG_8, 1),
+			BPF_EXIT_INSN(),
+			BPF_JMP_IMM(BPF_JSLE, BPF_REG_7, 0x7ffffff0, 1),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.retval = 2,
+	},
+	{
+		"jslt32: BPF_K",
+		.insns = {
+			BPF_DIRECT_PKT_R2,
+			BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_2, 0),
+			BPF_JMP32_IMM(BPF_JSLT, BPF_REG_7, -1, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 2),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.runs = 3,
+		.retvals = {
+			{ .retval = 2,
+			  .data64 = { (__u32)-2, }
+			},
+			{ .retval = 0,
+			  .data64 = { -1, }
+			},
+			{ .retval = 0,
+			  .data64 = { 1, }
+			},
+		},
+	},
+	{
+		"jslt32: BPF_X",
+		.insns = {
+			BPF_DIRECT_PKT_R2,
+			BPF_LD_IMM64(BPF_REG_8, 0x7fffffff | 1ULL << 32),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_2, 0),
+			BPF_JMP32_REG(BPF_JSLT, BPF_REG_7, BPF_REG_8, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 2),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.runs = 3,
+		.retvals = {
+			{ .retval = 2,
+			  .data64 = { 0x7ffffffe, }
+			},
+			{ .retval = 2,
+			  .data64 = { 0xffffffff, }
+			},
+			{ .retval = 0,
+			  .data64 = { 0x7fffffff | 2ULL << 32, }
+			},
+		},
+	},
+	{
+		"jslt32: min/max deduction",
+		.insns = {
+			BPF_RAND_SEXT_R7,
+			BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 2),
+			BPF_LD_IMM64(BPF_REG_8, (__u32)(-1) | 1ULL << 32),
+			BPF_JMP32_REG(BPF_JSLT, BPF_REG_7, BPF_REG_8, 1),
+			BPF_EXIT_INSN(),
+			BPF_JMP32_IMM(BPF_JSLT, BPF_REG_7, -1, 1),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.retval = 2,
+	},
 };
 
 static int probe_filter_length(const struct bpf_insn *fp)
-- 
2.7.4
^ permalink raw reply related	[flat|nested] 20+ messages in thread
- * Re: [PATCH bpf-next v2 08/16] bpf: functional and min/max reasoning unit tests for JMP32
  2019-01-21 13:15 ` [PATCH bpf-next v2 08/16] bpf: functional and min/max reasoning unit tests for JMP32 Jiong Wang
@ 2019-01-23 14:29   ` Jiong Wang
  0 siblings, 0 replies; 20+ messages in thread
From: Jiong Wang @ 2019-01-23 14:29 UTC (permalink / raw)
  To: Jiong Wang; +Cc: ast, daniel, netdev, oss-drivers
Jiong Wang writes:
<snip>
> +/* BPF_RAND_SEXT_R7 contains 5 instructions, it initializes R7 into a random
> + * negative u32, and sign-extend it into 64-bit.
> + */
> +#define BPF_RAND_SEXT_R7						\
> +	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,			\
> +		     BPF_FUNC_get_prandom_u32),				\
> +	BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),				\
> +	BPF_ALU64_IMM(BPF_OR, BPF_REG_7, 0x80000000)			\
                                                    ^~~~ ',' missing
> +	BPF_ALU64_IMM(BPF_LSH, BPF_REG_7, 32),				\
> +	BPF_ALU64_IMM(BPF_ARSH, BPF_REG_7, 32),
                                              ^~~~ ',' should be removed
Apology, just found there is a minor fix on testcase that I forget to
commit before generating patch set. One ',' is missing and one ',' should
be removed as described above.
I will correct this in v3 together with other reviews if any.
Regards,
Jiong
^ permalink raw reply	[flat|nested] 20+ messages in thread
 
- * [PATCH bpf-next v2 09/16] x86_64: bpf: implement jitting of JMP32
  2019-01-21 13:15 [PATCH bpf-next v2 00/16] bpf: propose new jmp32 instructions Jiong Wang
                   ` (7 preceding siblings ...)
  2019-01-21 13:15 ` [PATCH bpf-next v2 08/16] bpf: functional and min/max reasoning unit tests for JMP32 Jiong Wang
@ 2019-01-21 13:15 ` Jiong Wang
  2019-01-21 13:15 ` [PATCH bpf-next v2 10/16] x32: " Jiong Wang
                   ` (6 subsequent siblings)
  15 siblings, 0 replies; 20+ messages in thread
From: Jiong Wang @ 2019-01-21 13:15 UTC (permalink / raw)
  To: ast, daniel; +Cc: netdev, oss-drivers, Jiong Wang
This patch implements code-gen for new JMP32 instructions on x86_64.
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
---
 arch/x86/net/bpf_jit_comp.c | 46 +++++++++++++++++++++++++++++++++++++++------
 1 file changed, 40 insertions(+), 6 deletions(-)
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 5542303..afabf59 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -881,20 +881,41 @@ xadd:			if (is_imm8(insn->off))
 		case BPF_JMP | BPF_JSLT | BPF_X:
 		case BPF_JMP | BPF_JSGE | BPF_X:
 		case BPF_JMP | BPF_JSLE | BPF_X:
+		case BPF_JMP32 | BPF_JEQ | BPF_X:
+		case BPF_JMP32 | BPF_JNE | BPF_X:
+		case BPF_JMP32 | BPF_JGT | BPF_X:
+		case BPF_JMP32 | BPF_JLT | BPF_X:
+		case BPF_JMP32 | BPF_JGE | BPF_X:
+		case BPF_JMP32 | BPF_JLE | BPF_X:
+		case BPF_JMP32 | BPF_JSGT | BPF_X:
+		case BPF_JMP32 | BPF_JSLT | BPF_X:
+		case BPF_JMP32 | BPF_JSGE | BPF_X:
+		case BPF_JMP32 | BPF_JSLE | BPF_X:
 			/* cmp dst_reg, src_reg */
-			EMIT3(add_2mod(0x48, dst_reg, src_reg), 0x39,
-			      add_2reg(0xC0, dst_reg, src_reg));
+			if (BPF_CLASS(insn->code) == BPF_JMP)
+				EMIT1(add_2mod(0x48, dst_reg, src_reg));
+			else if (is_ereg(dst_reg) || is_ereg(src_reg))
+				EMIT1(add_2mod(0x40, dst_reg, src_reg));
+			EMIT2(0x39, add_2reg(0xC0, dst_reg, src_reg));
 			goto emit_cond_jmp;
 
 		case BPF_JMP | BPF_JSET | BPF_X:
+		case BPF_JMP32 | BPF_JSET | BPF_X:
 			/* test dst_reg, src_reg */
-			EMIT3(add_2mod(0x48, dst_reg, src_reg), 0x85,
-			      add_2reg(0xC0, dst_reg, src_reg));
+			if (BPF_CLASS(insn->code) == BPF_JMP)
+				EMIT1(add_2mod(0x48, dst_reg, src_reg));
+			else if (is_ereg(dst_reg) || is_ereg(src_reg))
+				EMIT1(add_2mod(0x40, dst_reg, src_reg));
+			EMIT2(0x85, add_2reg(0xC0, dst_reg, src_reg));
 			goto emit_cond_jmp;
 
 		case BPF_JMP | BPF_JSET | BPF_K:
+		case BPF_JMP32 | BPF_JSET | BPF_K:
 			/* test dst_reg, imm32 */
-			EMIT1(add_1mod(0x48, dst_reg));
+			if (BPF_CLASS(insn->code) == BPF_JMP)
+				EMIT1(add_1mod(0x48, dst_reg));
+			else if (is_ereg(dst_reg))
+				EMIT1(add_1mod(0x40, dst_reg));
 			EMIT2_off32(0xF7, add_1reg(0xC0, dst_reg), imm32);
 			goto emit_cond_jmp;
 
@@ -908,8 +929,21 @@ xadd:			if (is_imm8(insn->off))
 		case BPF_JMP | BPF_JSLT | BPF_K:
 		case BPF_JMP | BPF_JSGE | BPF_K:
 		case BPF_JMP | BPF_JSLE | BPF_K:
+		case BPF_JMP32 | BPF_JEQ | BPF_K:
+		case BPF_JMP32 | BPF_JNE | BPF_K:
+		case BPF_JMP32 | BPF_JGT | BPF_K:
+		case BPF_JMP32 | BPF_JLT | BPF_K:
+		case BPF_JMP32 | BPF_JGE | BPF_K:
+		case BPF_JMP32 | BPF_JLE | BPF_K:
+		case BPF_JMP32 | BPF_JSGT | BPF_K:
+		case BPF_JMP32 | BPF_JSLT | BPF_K:
+		case BPF_JMP32 | BPF_JSGE | BPF_K:
+		case BPF_JMP32 | BPF_JSLE | BPF_K:
 			/* cmp dst_reg, imm8/32 */
-			EMIT1(add_1mod(0x48, dst_reg));
+			if (BPF_CLASS(insn->code) == BPF_JMP)
+				EMIT1(add_1mod(0x48, dst_reg));
+			else if (is_ereg(dst_reg))
+				EMIT1(add_1mod(0x40, dst_reg));
 
 			if (is_imm8(imm32))
 				EMIT3(0x83, add_1reg(0xF8, dst_reg), imm32);
-- 
2.7.4
^ permalink raw reply related	[flat|nested] 20+ messages in thread
- * [PATCH bpf-next v2 10/16] x32: bpf: implement jitting of JMP32
  2019-01-21 13:15 [PATCH bpf-next v2 00/16] bpf: propose new jmp32 instructions Jiong Wang
                   ` (8 preceding siblings ...)
  2019-01-21 13:15 ` [PATCH bpf-next v2 09/16] x86_64: bpf: implement jitting of JMP32 Jiong Wang
@ 2019-01-21 13:15 ` Jiong Wang
  2019-01-21 13:15 ` [PATCH bpf-next v2 11/16] arm64: " Jiong Wang
                   ` (5 subsequent siblings)
  15 siblings, 0 replies; 20+ messages in thread
From: Jiong Wang @ 2019-01-21 13:15 UTC (permalink / raw)
  To: ast, daniel; +Cc: netdev, oss-drivers, Jiong Wang, Wang YanQing
This patch implements code-gen for new JMP32 instructions on x32.
Also fixed several reverse xmas tree coding style issues as I am there.
Cc: Wang YanQing <udknight@gmail.com>
Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
---
 arch/x86/net/bpf_jit_comp32.c | 121 +++++++++++++++++++++++++++++-------------
 1 file changed, 85 insertions(+), 36 deletions(-)
diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c
index 8f6cc71..0d9cdff 100644
--- a/arch/x86/net/bpf_jit_comp32.c
+++ b/arch/x86/net/bpf_jit_comp32.c
@@ -2072,7 +2072,18 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 		case BPF_JMP | BPF_JSGT | BPF_X:
 		case BPF_JMP | BPF_JSLE | BPF_X:
 		case BPF_JMP | BPF_JSLT | BPF_X:
-		case BPF_JMP | BPF_JSGE | BPF_X: {
+		case BPF_JMP | BPF_JSGE | BPF_X:
+		case BPF_JMP32 | BPF_JEQ | BPF_X:
+		case BPF_JMP32 | BPF_JNE | BPF_X:
+		case BPF_JMP32 | BPF_JGT | BPF_X:
+		case BPF_JMP32 | BPF_JLT | BPF_X:
+		case BPF_JMP32 | BPF_JGE | BPF_X:
+		case BPF_JMP32 | BPF_JLE | BPF_X:
+		case BPF_JMP32 | BPF_JSGT | BPF_X:
+		case BPF_JMP32 | BPF_JSLE | BPF_X:
+		case BPF_JMP32 | BPF_JSLT | BPF_X:
+		case BPF_JMP32 | BPF_JSGE | BPF_X: {
+			bool is_jmp64 = BPF_CLASS(insn->code) == BPF_JMP;
 			u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
 			u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
 			u8 sreg_lo = sstk ? IA32_ECX : src_lo;
@@ -2081,25 +2092,35 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 			if (dstk) {
 				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
 				      STACK_VAR(dst_lo));
-				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
-				      STACK_VAR(dst_hi));
+				if (is_jmp64)
+					EMIT3(0x8B,
+					      add_2reg(0x40, IA32_EBP,
+						       IA32_EDX),
+					      STACK_VAR(dst_hi));
 			}
 
 			if (sstk) {
 				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
 				      STACK_VAR(src_lo));
-				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX),
-				      STACK_VAR(src_hi));
+				if (is_jmp64)
+					EMIT3(0x8B,
+					      add_2reg(0x40, IA32_EBP,
+						       IA32_EBX),
+					      STACK_VAR(src_hi));
 			}
 
-			/* cmp dreg_hi,sreg_hi */
-			EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi));
-			EMIT2(IA32_JNE, 2);
+			if (is_jmp64) {
+				/* cmp dreg_hi,sreg_hi */
+				EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi));
+				EMIT2(IA32_JNE, 2);
+			}
 			/* cmp dreg_lo,sreg_lo */
 			EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo));
 			goto emit_cond_jmp;
 		}
-		case BPF_JMP | BPF_JSET | BPF_X: {
+		case BPF_JMP | BPF_JSET | BPF_X:
+		case BPF_JMP32 | BPF_JSET | BPF_X: {
+			bool is_jmp64 = BPF_CLASS(insn->code) == BPF_JMP;
 			u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
 			u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
 			u8 sreg_lo = sstk ? IA32_ECX : src_lo;
@@ -2108,15 +2129,21 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 			if (dstk) {
 				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
 				      STACK_VAR(dst_lo));
-				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
-				      STACK_VAR(dst_hi));
+				if (is_jmp64)
+					EMIT3(0x8B,
+					      add_2reg(0x40, IA32_EBP,
+						       IA32_EDX),
+					      STACK_VAR(dst_hi));
 			}
 
 			if (sstk) {
 				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
 				      STACK_VAR(src_lo));
-				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX),
-				      STACK_VAR(src_hi));
+				if (is_jmp64)
+					EMIT3(0x8B,
+					      add_2reg(0x40, IA32_EBP,
+						       IA32_EBX),
+					      STACK_VAR(src_hi));
 			}
 			/* and dreg_lo,sreg_lo */
 			EMIT2(0x23, add_2reg(0xC0, sreg_lo, dreg_lo));
@@ -2126,32 +2153,39 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 			EMIT2(0x09, add_2reg(0xC0, dreg_lo, dreg_hi));
 			goto emit_cond_jmp;
 		}
-		case BPF_JMP | BPF_JSET | BPF_K: {
-			u32 hi;
+		case BPF_JMP | BPF_JSET | BPF_K:
+		case BPF_JMP32 | BPF_JSET | BPF_K: {
+			bool is_jmp64 = BPF_CLASS(insn->code) == BPF_JMP;
 			u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
 			u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
 			u8 sreg_lo = IA32_ECX;
 			u8 sreg_hi = IA32_EBX;
+			u32 hi;
 
 			if (dstk) {
 				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
 				      STACK_VAR(dst_lo));
-				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
-				      STACK_VAR(dst_hi));
+				if (is_jmp64)
+					EMIT3(0x8B,
+					      add_2reg(0x40, IA32_EBP,
+						       IA32_EDX),
+					      STACK_VAR(dst_hi));
 			}
-			hi = imm32 & (1<<31) ? (u32)~0 : 0;
 
 			/* mov ecx,imm32 */
-			EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32);
-			/* mov ebx,imm32 */
-			EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EBX), hi);
+			EMIT2_off32(0xC7, add_1reg(0xC0, sreg_lo), imm32);
 
 			/* and dreg_lo,sreg_lo */
 			EMIT2(0x23, add_2reg(0xC0, sreg_lo, dreg_lo));
-			/* and dreg_hi,sreg_hi */
-			EMIT2(0x23, add_2reg(0xC0, sreg_hi, dreg_hi));
-			/* or dreg_lo,dreg_hi */
-			EMIT2(0x09, add_2reg(0xC0, dreg_lo, dreg_hi));
+			if (is_jmp64) {
+				hi = imm32 & (1 << 31) ? (u32)~0 : 0;
+				/* mov ebx,imm32 */
+				EMIT2_off32(0xC7, add_1reg(0xC0, sreg_hi), hi);
+				/* and dreg_hi,sreg_hi */
+				EMIT2(0x23, add_2reg(0xC0, sreg_hi, dreg_hi));
+				/* or dreg_lo,dreg_hi */
+				EMIT2(0x09, add_2reg(0xC0, dreg_lo, dreg_hi));
+			}
 			goto emit_cond_jmp;
 		}
 		case BPF_JMP | BPF_JEQ | BPF_K:
@@ -2163,29 +2197,44 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 		case BPF_JMP | BPF_JSGT | BPF_K:
 		case BPF_JMP | BPF_JSLE | BPF_K:
 		case BPF_JMP | BPF_JSLT | BPF_K:
-		case BPF_JMP | BPF_JSGE | BPF_K: {
-			u32 hi;
+		case BPF_JMP | BPF_JSGE | BPF_K:
+		case BPF_JMP32 | BPF_JEQ | BPF_K:
+		case BPF_JMP32 | BPF_JNE | BPF_K:
+		case BPF_JMP32 | BPF_JGT | BPF_K:
+		case BPF_JMP32 | BPF_JLT | BPF_K:
+		case BPF_JMP32 | BPF_JGE | BPF_K:
+		case BPF_JMP32 | BPF_JLE | BPF_K:
+		case BPF_JMP32 | BPF_JSGT | BPF_K:
+		case BPF_JMP32 | BPF_JSLE | BPF_K:
+		case BPF_JMP32 | BPF_JSLT | BPF_K:
+		case BPF_JMP32 | BPF_JSGE | BPF_K: {
+			bool is_jmp64 = BPF_CLASS(insn->code) == BPF_JMP;
 			u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
 			u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
 			u8 sreg_lo = IA32_ECX;
 			u8 sreg_hi = IA32_EBX;
+			u32 hi;
 
 			if (dstk) {
 				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
 				      STACK_VAR(dst_lo));
-				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
-				      STACK_VAR(dst_hi));
+				if (is_jmp64)
+					EMIT3(0x8B,
+					      add_2reg(0x40, IA32_EBP,
+						       IA32_EDX),
+					      STACK_VAR(dst_hi));
 			}
 
-			hi = imm32 & (1<<31) ? (u32)~0 : 0;
 			/* mov ecx,imm32 */
 			EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32);
-			/* mov ebx,imm32 */
-			EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EBX), hi);
-
-			/* cmp dreg_hi,sreg_hi */
-			EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi));
-			EMIT2(IA32_JNE, 2);
+			if (is_jmp64) {
+				hi = imm32 & (1 << 31) ? (u32)~0 : 0;
+				/* mov ebx,imm32 */
+				EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EBX), hi);
+				/* cmp dreg_hi,sreg_hi */
+				EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi));
+				EMIT2(IA32_JNE, 2);
+			}
 			/* cmp dreg_lo,sreg_lo */
 			EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo));
 
-- 
2.7.4
^ permalink raw reply related	[flat|nested] 20+ messages in thread
- * [PATCH bpf-next v2 11/16] arm64: bpf: implement jitting of JMP32
  2019-01-21 13:15 [PATCH bpf-next v2 00/16] bpf: propose new jmp32 instructions Jiong Wang
                   ` (9 preceding siblings ...)
  2019-01-21 13:15 ` [PATCH bpf-next v2 10/16] x32: " Jiong Wang
@ 2019-01-21 13:15 ` Jiong Wang
  2019-01-21 13:15 ` [PATCH bpf-next v2 12/16] arm: " Jiong Wang
                   ` (4 subsequent siblings)
  15 siblings, 0 replies; 20+ messages in thread
From: Jiong Wang @ 2019-01-21 13:15 UTC (permalink / raw)
  To: ast, daniel; +Cc: netdev, oss-drivers, Jiong Wang, Zi Shen Lim
This patch implements code-gen for new JMP32 instructions on arm64.
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Zi Shen Lim <zlim.lnx@gmail.com>
Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
---
 arch/arm64/net/bpf_jit_comp.c | 37 ++++++++++++++++++++++++++++++-------
 1 file changed, 30 insertions(+), 7 deletions(-)
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 1542df0..aaddc02 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -362,7 +362,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
 	const s16 off = insn->off;
 	const s32 imm = insn->imm;
 	const int i = insn - ctx->prog->insnsi;
-	const bool is64 = BPF_CLASS(code) == BPF_ALU64;
+	const bool is64 = BPF_CLASS(code) == BPF_ALU64 ||
+			  BPF_CLASS(code) == BPF_JMP;
 	const bool isdw = BPF_SIZE(code) == BPF_DW;
 	u8 jmp_cond;
 	s32 jmp_offset;
@@ -559,7 +560,17 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
 	case BPF_JMP | BPF_JSLT | BPF_X:
 	case BPF_JMP | BPF_JSGE | BPF_X:
 	case BPF_JMP | BPF_JSLE | BPF_X:
-		emit(A64_CMP(1, dst, src), ctx);
+	case BPF_JMP32 | BPF_JEQ | BPF_X:
+	case BPF_JMP32 | BPF_JGT | BPF_X:
+	case BPF_JMP32 | BPF_JLT | BPF_X:
+	case BPF_JMP32 | BPF_JGE | BPF_X:
+	case BPF_JMP32 | BPF_JLE | BPF_X:
+	case BPF_JMP32 | BPF_JNE | BPF_X:
+	case BPF_JMP32 | BPF_JSGT | BPF_X:
+	case BPF_JMP32 | BPF_JSLT | BPF_X:
+	case BPF_JMP32 | BPF_JSGE | BPF_X:
+	case BPF_JMP32 | BPF_JSLE | BPF_X:
+		emit(A64_CMP(is64, dst, src), ctx);
 emit_cond_jmp:
 		jmp_offset = bpf2a64_offset(i + off, i, ctx);
 		check_imm19(jmp_offset);
@@ -601,7 +612,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
 		emit(A64_B_(jmp_cond, jmp_offset), ctx);
 		break;
 	case BPF_JMP | BPF_JSET | BPF_X:
-		emit(A64_TST(1, dst, src), ctx);
+	case BPF_JMP32 | BPF_JSET | BPF_X:
+		emit(A64_TST(is64, dst, src), ctx);
 		goto emit_cond_jmp;
 	/* IF (dst COND imm) JUMP off */
 	case BPF_JMP | BPF_JEQ | BPF_K:
@@ -614,12 +626,23 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
 	case BPF_JMP | BPF_JSLT | BPF_K:
 	case BPF_JMP | BPF_JSGE | BPF_K:
 	case BPF_JMP | BPF_JSLE | BPF_K:
-		emit_a64_mov_i(1, tmp, imm, ctx);
-		emit(A64_CMP(1, dst, tmp), ctx);
+	case BPF_JMP32 | BPF_JEQ | BPF_K:
+	case BPF_JMP32 | BPF_JGT | BPF_K:
+	case BPF_JMP32 | BPF_JLT | BPF_K:
+	case BPF_JMP32 | BPF_JGE | BPF_K:
+	case BPF_JMP32 | BPF_JLE | BPF_K:
+	case BPF_JMP32 | BPF_JNE | BPF_K:
+	case BPF_JMP32 | BPF_JSGT | BPF_K:
+	case BPF_JMP32 | BPF_JSLT | BPF_K:
+	case BPF_JMP32 | BPF_JSGE | BPF_K:
+	case BPF_JMP32 | BPF_JSLE | BPF_K:
+		emit_a64_mov_i(is64, tmp, imm, ctx);
+		emit(A64_CMP(is64, dst, tmp), ctx);
 		goto emit_cond_jmp;
 	case BPF_JMP | BPF_JSET | BPF_K:
-		emit_a64_mov_i(1, tmp, imm, ctx);
-		emit(A64_TST(1, dst, tmp), ctx);
+	case BPF_JMP32 | BPF_JSET | BPF_K:
+		emit_a64_mov_i(is64, tmp, imm, ctx);
+		emit(A64_TST(is64, dst, tmp), ctx);
 		goto emit_cond_jmp;
 	/* function call */
 	case BPF_JMP | BPF_CALL:
-- 
2.7.4
^ permalink raw reply related	[flat|nested] 20+ messages in thread
- * [PATCH bpf-next v2 12/16] arm: bpf: implement jitting of JMP32
  2019-01-21 13:15 [PATCH bpf-next v2 00/16] bpf: propose new jmp32 instructions Jiong Wang
                   ` (10 preceding siblings ...)
  2019-01-21 13:15 ` [PATCH bpf-next v2 11/16] arm64: " Jiong Wang
@ 2019-01-21 13:15 ` Jiong Wang
  2019-01-21 13:15 ` [PATCH bpf-next v2 13/16] ppc: " Jiong Wang
                   ` (3 subsequent siblings)
  15 siblings, 0 replies; 20+ messages in thread
From: Jiong Wang @ 2019-01-21 13:15 UTC (permalink / raw)
  To: ast, daniel; +Cc: netdev, oss-drivers, Jiong Wang, Shubham Bansal
This patch implements code-gen for new JMP32 instructions on arm.
For JSET, "ands" (AND with flags updated) is used, so corresponding
encoding helper is added.
Cc: Shubham Bansal <illusionist.neo@gmail.com>
Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
---
 arch/arm/net/bpf_jit_32.c | 53 +++++++++++++++++++++++++++++++++++++++--------
 arch/arm/net/bpf_jit_32.h |  2 ++
 2 files changed, 46 insertions(+), 9 deletions(-)
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index 25b3ee8..c8bfbbf 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -1083,12 +1083,17 @@ static inline void emit_ldx_r(const s8 dst[], const s8 src,
 
 /* Arithmatic Operation */
 static inline void emit_ar_r(const u8 rd, const u8 rt, const u8 rm,
-			     const u8 rn, struct jit_ctx *ctx, u8 op) {
+			     const u8 rn, struct jit_ctx *ctx, u8 op,
+			     bool is_jmp64) {
 	switch (op) {
 	case BPF_JSET:
-		emit(ARM_AND_R(ARM_IP, rt, rn), ctx);
-		emit(ARM_AND_R(ARM_LR, rd, rm), ctx);
-		emit(ARM_ORRS_R(ARM_IP, ARM_LR, ARM_IP), ctx);
+		if (is_jmp64) {
+			emit(ARM_AND_R(ARM_IP, rt, rn), ctx);
+			emit(ARM_AND_R(ARM_LR, rd, rm), ctx);
+			emit(ARM_ORRS_R(ARM_IP, ARM_LR, ARM_IP), ctx);
+		} else {
+			emit(ARM_ANDS_R(ARM_IP, rt, rn), ctx);
+		}
 		break;
 	case BPF_JEQ:
 	case BPF_JNE:
@@ -1096,18 +1101,25 @@ static inline void emit_ar_r(const u8 rd, const u8 rt, const u8 rm,
 	case BPF_JGE:
 	case BPF_JLE:
 	case BPF_JLT:
-		emit(ARM_CMP_R(rd, rm), ctx);
-		_emit(ARM_COND_EQ, ARM_CMP_R(rt, rn), ctx);
+		if (is_jmp64) {
+			emit(ARM_CMP_R(rd, rm), ctx);
+			/* Only compare low halve if high halve are equal. */
+			_emit(ARM_COND_EQ, ARM_CMP_R(rt, rn), ctx);
+		} else {
+			emit(ARM_CMP_R(rt, rn), ctx);
+		}
 		break;
 	case BPF_JSLE:
 	case BPF_JSGT:
 		emit(ARM_CMP_R(rn, rt), ctx);
-		emit(ARM_SBCS_R(ARM_IP, rm, rd), ctx);
+		if (is_jmp64)
+			emit(ARM_SBCS_R(ARM_IP, rm, rd), ctx);
 		break;
 	case BPF_JSLT:
 	case BPF_JSGE:
 		emit(ARM_CMP_R(rt, rn), ctx);
-		emit(ARM_SBCS_R(ARM_IP, rd, rm), ctx);
+		if (is_jmp64)
+			emit(ARM_SBCS_R(ARM_IP, rd, rm), ctx);
 		break;
 	}
 }
@@ -1615,6 +1627,17 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
 	case BPF_JMP | BPF_JLT | BPF_X:
 	case BPF_JMP | BPF_JSLT | BPF_X:
 	case BPF_JMP | BPF_JSLE | BPF_X:
+	case BPF_JMP32 | BPF_JEQ | BPF_X:
+	case BPF_JMP32 | BPF_JGT | BPF_X:
+	case BPF_JMP32 | BPF_JGE | BPF_X:
+	case BPF_JMP32 | BPF_JNE | BPF_X:
+	case BPF_JMP32 | BPF_JSGT | BPF_X:
+	case BPF_JMP32 | BPF_JSGE | BPF_X:
+	case BPF_JMP32 | BPF_JSET | BPF_X:
+	case BPF_JMP32 | BPF_JLE | BPF_X:
+	case BPF_JMP32 | BPF_JLT | BPF_X:
+	case BPF_JMP32 | BPF_JSLT | BPF_X:
+	case BPF_JMP32 | BPF_JSLE | BPF_X:
 		/* Setup source registers */
 		rm = arm_bpf_get_reg32(src_hi, tmp2[0], ctx);
 		rn = arm_bpf_get_reg32(src_lo, tmp2[1], ctx);
@@ -1641,6 +1664,17 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
 	case BPF_JMP | BPF_JLE | BPF_K:
 	case BPF_JMP | BPF_JSLT | BPF_K:
 	case BPF_JMP | BPF_JSLE | BPF_K:
+	case BPF_JMP32 | BPF_JEQ | BPF_K:
+	case BPF_JMP32 | BPF_JGT | BPF_K:
+	case BPF_JMP32 | BPF_JGE | BPF_K:
+	case BPF_JMP32 | BPF_JNE | BPF_K:
+	case BPF_JMP32 | BPF_JSGT | BPF_K:
+	case BPF_JMP32 | BPF_JSGE | BPF_K:
+	case BPF_JMP32 | BPF_JSET | BPF_K:
+	case BPF_JMP32 | BPF_JLT | BPF_K:
+	case BPF_JMP32 | BPF_JLE | BPF_K:
+	case BPF_JMP32 | BPF_JSLT | BPF_K:
+	case BPF_JMP32 | BPF_JSLE | BPF_K:
 		if (off == 0)
 			break;
 		rm = tmp2[0];
@@ -1652,7 +1686,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
 		rd = arm_bpf_get_reg64(dst, tmp, ctx);
 
 		/* Check for the condition */
-		emit_ar_r(rd[0], rd[1], rm, rn, ctx, BPF_OP(code));
+		emit_ar_r(rd[0], rd[1], rm, rn, ctx, BPF_OP(code),
+			  BPF_CLASS(code) == BPF_JMP);
 
 		/* Setup JUMP instruction */
 		jmp_offset = bpf2a32_offset(i+off, i, ctx);
diff --git a/arch/arm/net/bpf_jit_32.h b/arch/arm/net/bpf_jit_32.h
index f4e58bc..13a05f7 100644
--- a/arch/arm/net/bpf_jit_32.h
+++ b/arch/arm/net/bpf_jit_32.h
@@ -62,6 +62,7 @@
 #define ARM_INST_ADDS_I		0x02900000
 
 #define ARM_INST_AND_R		0x00000000
+#define ARM_INST_ANDS_R		0x00100000
 #define ARM_INST_AND_I		0x02000000
 
 #define ARM_INST_BIC_R		0x01c00000
@@ -172,6 +173,7 @@
 #define ARM_ADC_I(rd, rn, imm)	_AL3_I(ARM_INST_ADC, rd, rn, imm)
 
 #define ARM_AND_R(rd, rn, rm)	_AL3_R(ARM_INST_AND, rd, rn, rm)
+#define ARM_ANDS_R(rd, rn, rm)	_AL3_R(ARM_INST_ANDS, rd, rn, rm)
 #define ARM_AND_I(rd, rn, imm)	_AL3_I(ARM_INST_AND, rd, rn, imm)
 
 #define ARM_BIC_R(rd, rn, rm)	_AL3_R(ARM_INST_BIC, rd, rn, rm)
-- 
2.7.4
^ permalink raw reply related	[flat|nested] 20+ messages in thread
- * [PATCH bpf-next v2 13/16] ppc: bpf: implement jitting of JMP32
  2019-01-21 13:15 [PATCH bpf-next v2 00/16] bpf: propose new jmp32 instructions Jiong Wang
                   ` (11 preceding siblings ...)
  2019-01-21 13:15 ` [PATCH bpf-next v2 12/16] arm: " Jiong Wang
@ 2019-01-21 13:15 ` Jiong Wang
  2019-01-24  4:58   ` Sandipan Das
  2019-01-21 13:15 ` [PATCH bpf-next v2 14/16] s390: " Jiong Wang
                   ` (2 subsequent siblings)
  15 siblings, 1 reply; 20+ messages in thread
From: Jiong Wang @ 2019-01-21 13:15 UTC (permalink / raw)
  To: ast, daniel; +Cc: netdev, oss-drivers, Jiong Wang, Naveen N . Rao, Sandipan Das
This patch implements code-gen for new JMP32 instructions on ppc.
For JMP32 | JSET, instruction encoding for PPC_RLWINM_DOT is added to check
the result of ANDing low 32-bit of operands.
Cc: Naveen N. Rao <naveen.n.rao@linux.ibm.com>
Cc: Sandipan Das <sandipan@linux.ibm.com>
Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
---
 arch/powerpc/include/asm/ppc-opcode.h |  1 +
 arch/powerpc/net/bpf_jit.h            |  4 ++
 arch/powerpc/net/bpf_jit_comp64.c     | 98 ++++++++++++++++++++++++++++++-----
 3 files changed, 89 insertions(+), 14 deletions(-)
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index 19a8834..f9513ad 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -337,6 +337,7 @@
 #define PPC_INST_DIVWU			0x7c000396
 #define PPC_INST_DIVD			0x7c0003d2
 #define PPC_INST_RLWINM			0x54000000
+#define PPC_INST_RLWINM_DOT		0x54000001
 #define PPC_INST_RLWIMI			0x50000000
 #define PPC_INST_RLDICL			0x78000000
 #define PPC_INST_RLDICR			0x78000004
diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index c2d5192..549e949 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -165,6 +165,10 @@
 #define PPC_RLWINM(d, a, i, mb, me)	EMIT(PPC_INST_RLWINM | ___PPC_RA(d) | \
 					___PPC_RS(a) | __PPC_SH(i) |	      \
 					__PPC_MB(mb) | __PPC_ME(me))
+#define PPC_RLWINM_DOT(d, a, i, mb, me)	EMIT(PPC_INST_RLWINM_DOT |	      \
+					___PPC_RA(d) | ___PPC_RS(a) |	      \
+					__PPC_SH(i) | __PPC_MB(mb) |	      \
+					__PPC_ME(me))
 #define PPC_RLWIMI(d, a, i, mb, me)	EMIT(PPC_INST_RLWIMI | ___PPC_RA(d) | \
 					___PPC_RS(a) | __PPC_SH(i) |	      \
 					__PPC_MB(mb) | __PPC_ME(me))
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 7ce57657..2e40c2b 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -809,18 +809,44 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
 			case BPF_JMP | BPF_JLE | BPF_X:
 			case BPF_JMP | BPF_JEQ | BPF_X:
 			case BPF_JMP | BPF_JNE | BPF_X:
+			case BPF_JMP32 | BPF_JGT | BPF_X:
+			case BPF_JMP32 | BPF_JLT | BPF_X:
+			case BPF_JMP32 | BPF_JGE | BPF_X:
+			case BPF_JMP32 | BPF_JLE | BPF_X:
+			case BPF_JMP32 | BPF_JEQ | BPF_X:
+			case BPF_JMP32 | BPF_JNE | BPF_X:
 				/* unsigned comparison */
-				PPC_CMPLD(dst_reg, src_reg);
+				if (BPF_CLASS(code) == BPF_JMP32)
+					PPC_CMPLW(dst_reg, src_reg);
+				else
+					PPC_CMPLD(dst_reg, src_reg);
 				break;
 			case BPF_JMP | BPF_JSGT | BPF_X:
 			case BPF_JMP | BPF_JSLT | BPF_X:
 			case BPF_JMP | BPF_JSGE | BPF_X:
 			case BPF_JMP | BPF_JSLE | BPF_X:
+			case BPF_JMP32 | BPF_JSGT | BPF_X:
+			case BPF_JMP32 | BPF_JSLT | BPF_X:
+			case BPF_JMP32 | BPF_JSGE | BPF_X:
+			case BPF_JMP32 | BPF_JSLE | BPF_X:
 				/* signed comparison */
-				PPC_CMPD(dst_reg, src_reg);
+				if (BPF_CLASS(code) == BPF_JMP32)
+					PPC_CMPW(dst_reg, src_reg);
+				else
+					PPC_CMPD(dst_reg, src_reg);
 				break;
 			case BPF_JMP | BPF_JSET | BPF_X:
-				PPC_AND_DOT(b2p[TMP_REG_1], dst_reg, src_reg);
+			case BPF_JMP32 | BPF_JSET | BPF_X:
+				if (BPF_CLASS(code) == BPF_JMP) {
+					PPC_AND_DOT(b2p[TMP_REG_1], dst_reg,
+						    src_reg);
+				} else {
+					int tmp_reg = b2p[TMP_REG_1];
+
+					PPC_AND(tmp_reg, dst_reg, src_reg);
+					PPC_RLWINM_DOT(tmp_reg, tmp_reg, 0, 0,
+						       31);
+				}
 				break;
 			case BPF_JMP | BPF_JNE | BPF_K:
 			case BPF_JMP | BPF_JEQ | BPF_K:
@@ -828,43 +854,87 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
 			case BPF_JMP | BPF_JLT | BPF_K:
 			case BPF_JMP | BPF_JGE | BPF_K:
 			case BPF_JMP | BPF_JLE | BPF_K:
+			case BPF_JMP32 | BPF_JNE | BPF_K:
+			case BPF_JMP32 | BPF_JEQ | BPF_K:
+			case BPF_JMP32 | BPF_JGT | BPF_K:
+			case BPF_JMP32 | BPF_JLT | BPF_K:
+			case BPF_JMP32 | BPF_JGE | BPF_K:
+			case BPF_JMP32 | BPF_JLE | BPF_K:
+			{
+				bool is_jmp32 = BPF_CLASS(code) == BPF_JMP32;
+
 				/*
 				 * Need sign-extended load, so only positive
 				 * values can be used as imm in cmpldi
 				 */
-				if (imm >= 0 && imm < 32768)
-					PPC_CMPLDI(dst_reg, imm);
-				else {
+				if (imm >= 0 && imm < 32768) {
+					if (is_jmp32)
+						PPC_CMPLWI(dst_reg, imm);
+					else
+						PPC_CMPLDI(dst_reg, imm);
+				} else {
 					/* sign-extending load */
 					PPC_LI32(b2p[TMP_REG_1], imm);
 					/* ... but unsigned comparison */
-					PPC_CMPLD(dst_reg, b2p[TMP_REG_1]);
+					if (is_jmp32)
+						PPC_CMPLW(dst_reg,
+							  b2p[TMP_REG_1]);
+					else
+						PPC_CMPLD(dst_reg,
+							  b2p[TMP_REG_1]);
 				}
 				break;
+			}
 			case BPF_JMP | BPF_JSGT | BPF_K:
 			case BPF_JMP | BPF_JSLT | BPF_K:
 			case BPF_JMP | BPF_JSGE | BPF_K:
 			case BPF_JMP | BPF_JSLE | BPF_K:
+			case BPF_JMP32 | BPF_JSGT | BPF_K:
+			case BPF_JMP32 | BPF_JSLT | BPF_K:
+			case BPF_JMP32 | BPF_JSGE | BPF_K:
+			case BPF_JMP32 | BPF_JSLE | BPF_K:
+			{
+				bool is_jmp32 = BPF_CLASS(code) == BPF_JMP32;
+
 				/*
 				 * signed comparison, so any 16-bit value
 				 * can be used in cmpdi
 				 */
-				if (imm >= -32768 && imm < 32768)
-					PPC_CMPDI(dst_reg, imm);
-				else {
+				if (imm >= -32768 && imm < 32768) {
+					if (is_jmp32)
+						PPC_CMPWI(dst_reg, imm);
+					else
+						PPC_CMPDI(dst_reg, imm);
+				} else {
 					PPC_LI32(b2p[TMP_REG_1], imm);
-					PPC_CMPD(dst_reg, b2p[TMP_REG_1]);
+					if (is_jmp32)
+						PPC_CMPW(dst_reg,
+							 b2p[TMP_REG_1]);
+					else
+						PPC_CMPD(dst_reg,
+							 b2p[TMP_REG_1]);
 				}
 				break;
+			}
 			case BPF_JMP | BPF_JSET | BPF_K:
+			case BPF_JMP32 | BPF_JSET | BPF_K:
 				/* andi does not sign-extend the immediate */
 				if (imm >= 0 && imm < 32768)
 					/* PPC_ANDI is _only/always_ dot-form */
 					PPC_ANDI(b2p[TMP_REG_1], dst_reg, imm);
 				else {
-					PPC_LI32(b2p[TMP_REG_1], imm);
-					PPC_AND_DOT(b2p[TMP_REG_1], dst_reg,
-						    b2p[TMP_REG_1]);
+					int tmp_reg = b2p[TMP_REG_1];
+
+					PPC_LI32(tmp_reg, imm);
+					if (BPF_CLASS(code) == BPF_JMP) {
+						PPC_AND_DOT(tmp_reg, dst_reg,
+							    tmp_reg);
+					} else {
+						PPC_AND(tmp_reg, dst_reg,
+							tmp_reg);
+						PPC_RLWINM_DOT(tmp_reg, tmp_reg,
+							       0, 0, 31);
+					}
 				}
 				break;
 			}
-- 
2.7.4
^ permalink raw reply related	[flat|nested] 20+ messages in thread
- * Re: [PATCH bpf-next v2 13/16] ppc: bpf: implement jitting of JMP32
  2019-01-21 13:15 ` [PATCH bpf-next v2 13/16] ppc: " Jiong Wang
@ 2019-01-24  4:58   ` Sandipan Das
  2019-01-24  9:53     ` Jiong Wang
  0 siblings, 1 reply; 20+ messages in thread
From: Sandipan Das @ 2019-01-24  4:58 UTC (permalink / raw)
  To: Jiong Wang; +Cc: ast, daniel, netdev, oss-drivers, Naveen N . Rao
Hi Jiong,
On 21/01/19 6:45 PM, Jiong Wang wrote:
> This patch implements code-gen for new JMP32 instructions on ppc.
> 
> For JMP32 | JSET, instruction encoding for PPC_RLWINM_DOT is added to check
> the result of ANDing low 32-bit of operands.
> 
> Cc: Naveen N. Rao <naveen.n.rao@linux.ibm.com>
> Cc: Sandipan Das <sandipan@linux.ibm.com>
> Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
> ---
>  arch/powerpc/include/asm/ppc-opcode.h |  1 +
>  arch/powerpc/net/bpf_jit.h            |  4 ++
>  arch/powerpc/net/bpf_jit_comp64.c     | 98 ++++++++++++++++++++++++++++++-----
>  3 files changed, 89 insertions(+), 14 deletions(-)
> [...]
I ran the verifier selftests on a ppc64 test system and found that the jmp32 tests
were failing because the instructions were not being decoded by the JIT compiler.
The codegen logic looks good to me and the tests passed after I fixed the decoding
logic. Here are the changes that I had to make:
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 2e40c2b251ba..15bba765fa79 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -768,36 +768,58 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
                case BPF_JMP | BPF_JGT | BPF_X:
                case BPF_JMP | BPF_JSGT | BPF_K:
                case BPF_JMP | BPF_JSGT | BPF_X:
+               case BPF_JMP32 | BPF_JGT | BPF_K:
+               case BPF_JMP32 | BPF_JGT | BPF_X:
+               case BPF_JMP32 | BPF_JSGT | BPF_K:
+               case BPF_JMP32 | BPF_JSGT | BPF_X:
                        true_cond = COND_GT;
                        goto cond_branch;
                case BPF_JMP | BPF_JLT | BPF_K:
                case BPF_JMP | BPF_JLT | BPF_X:
                case BPF_JMP | BPF_JSLT | BPF_K:
                case BPF_JMP | BPF_JSLT | BPF_X:
+               case BPF_JMP32 | BPF_JLT | BPF_K:
+               case BPF_JMP32 | BPF_JLT | BPF_X:
+               case BPF_JMP32 | BPF_JSLT | BPF_K:
+               case BPF_JMP32 | BPF_JSLT | BPF_X:
                        true_cond = COND_LT;
                        goto cond_branch;
                case BPF_JMP | BPF_JGE | BPF_K:
                case BPF_JMP | BPF_JGE | BPF_X:
                case BPF_JMP | BPF_JSGE | BPF_K:
                case BPF_JMP | BPF_JSGE | BPF_X:
+               case BPF_JMP32 | BPF_JGE | BPF_K:
+               case BPF_JMP32 | BPF_JGE | BPF_X:
+               case BPF_JMP32 | BPF_JSGE | BPF_K:
+               case BPF_JMP32 | BPF_JSGE | BPF_X:
                        true_cond = COND_GE;
                        goto cond_branch;
                case BPF_JMP | BPF_JLE | BPF_K:
                case BPF_JMP | BPF_JLE | BPF_X:
                case BPF_JMP | BPF_JSLE | BPF_K:
                case BPF_JMP | BPF_JSLE | BPF_X:
+               case BPF_JMP32 | BPF_JLE | BPF_K:
+               case BPF_JMP32 | BPF_JLE | BPF_X:
+               case BPF_JMP32 | BPF_JSLE | BPF_K:
+               case BPF_JMP32 | BPF_JSLE | BPF_X:
                        true_cond = COND_LE;
                        goto cond_branch;
                case BPF_JMP | BPF_JEQ | BPF_K:
                case BPF_JMP | BPF_JEQ | BPF_X:
+               case BPF_JMP32 | BPF_JEQ | BPF_K:
+               case BPF_JMP32 | BPF_JEQ | BPF_X:
                        true_cond = COND_EQ;
                        goto cond_branch;
                case BPF_JMP | BPF_JNE | BPF_K:
                case BPF_JMP | BPF_JNE | BPF_X:
+               case BPF_JMP32 | BPF_JNE | BPF_K:
+               case BPF_JMP32 | BPF_JNE | BPF_X:
                        true_cond = COND_NE;
                        goto cond_branch;
                case BPF_JMP | BPF_JSET | BPF_K:
                case BPF_JMP | BPF_JSET | BPF_X:
+               case BPF_JMP32 | BPF_JSET | BPF_K:
+               case BPF_JMP32 | BPF_JSET | BPF_X:
                        true_cond = COND_NE;
                        /* Fall through */
 
--
With Regards,
Sandipan
^ permalink raw reply related	[flat|nested] 20+ messages in thread
- * Re: [PATCH bpf-next v2 13/16] ppc: bpf: implement jitting of JMP32
  2019-01-24  4:58   ` Sandipan Das
@ 2019-01-24  9:53     ` Jiong Wang
  0 siblings, 0 replies; 20+ messages in thread
From: Jiong Wang @ 2019-01-24  9:53 UTC (permalink / raw)
  To: Sandipan Das; +Cc: Jiong Wang, ast, daniel, netdev, oss-drivers, Naveen N . Rao
Sandipan Das writes:
> Hi Jiong,
>
> On 21/01/19 6:45 PM, Jiong Wang wrote:
>> This patch implements code-gen for new JMP32 instructions on ppc.
>> 
>> For JMP32 | JSET, instruction encoding for PPC_RLWINM_DOT is added to check
>> the result of ANDing low 32-bit of operands.
>> 
>> Cc: Naveen N. Rao <naveen.n.rao@linux.ibm.com>
>> Cc: Sandipan Das <sandipan@linux.ibm.com>
>> Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
>> ---
>>  arch/powerpc/include/asm/ppc-opcode.h |  1 +
>>  arch/powerpc/net/bpf_jit.h            |  4 ++
>>  arch/powerpc/net/bpf_jit_comp64.c     | 98 ++++++++++++++++++++++++++++++-----
>>  3 files changed, 89 insertions(+), 14 deletions(-)
>> [...]
>
> I ran the verifier selftests on a ppc64 test system and found that the jmp32 tests
> were failing because the instructions were not being decoded by the JIT compiler.
> The codegen logic looks good to me and the tests passed after I fixed the decoding
> logic. Here are the changes that I had to make:
Ah, I missed the outer layer decoding logic.
Thanks for the review, test and fix. Will integrate the following in v3.
Regards,
Jiong
>
> diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
> index 2e40c2b251ba..15bba765fa79 100644
> --- a/arch/powerpc/net/bpf_jit_comp64.c
> +++ b/arch/powerpc/net/bpf_jit_comp64.c
> @@ -768,36 +768,58 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
>                 case BPF_JMP | BPF_JGT | BPF_X:
>                 case BPF_JMP | BPF_JSGT | BPF_K:
>                 case BPF_JMP | BPF_JSGT | BPF_X:
> +               case BPF_JMP32 | BPF_JGT | BPF_K:
> +               case BPF_JMP32 | BPF_JGT | BPF_X:
> +               case BPF_JMP32 | BPF_JSGT | BPF_K:
> +               case BPF_JMP32 | BPF_JSGT | BPF_X:
>                         true_cond = COND_GT;
>                         goto cond_branch;
>                 case BPF_JMP | BPF_JLT | BPF_K:
>                 case BPF_JMP | BPF_JLT | BPF_X:
>                 case BPF_JMP | BPF_JSLT | BPF_K:
>                 case BPF_JMP | BPF_JSLT | BPF_X:
> +               case BPF_JMP32 | BPF_JLT | BPF_K:
> +               case BPF_JMP32 | BPF_JLT | BPF_X:
> +               case BPF_JMP32 | BPF_JSLT | BPF_K:
> +               case BPF_JMP32 | BPF_JSLT | BPF_X:
>                         true_cond = COND_LT;
>                         goto cond_branch;
>                 case BPF_JMP | BPF_JGE | BPF_K:
>                 case BPF_JMP | BPF_JGE | BPF_X:
>                 case BPF_JMP | BPF_JSGE | BPF_K:
>                 case BPF_JMP | BPF_JSGE | BPF_X:
> +               case BPF_JMP32 | BPF_JGE | BPF_K:
> +               case BPF_JMP32 | BPF_JGE | BPF_X:
> +               case BPF_JMP32 | BPF_JSGE | BPF_K:
> +               case BPF_JMP32 | BPF_JSGE | BPF_X:
>                         true_cond = COND_GE;
>                         goto cond_branch;
>                 case BPF_JMP | BPF_JLE | BPF_K:
>                 case BPF_JMP | BPF_JLE | BPF_X:
>                 case BPF_JMP | BPF_JSLE | BPF_K:
>                 case BPF_JMP | BPF_JSLE | BPF_X:
> +               case BPF_JMP32 | BPF_JLE | BPF_K:
> +               case BPF_JMP32 | BPF_JLE | BPF_X:
> +               case BPF_JMP32 | BPF_JSLE | BPF_K:
> +               case BPF_JMP32 | BPF_JSLE | BPF_X:
>                         true_cond = COND_LE;
>                         goto cond_branch;
>                 case BPF_JMP | BPF_JEQ | BPF_K:
>                 case BPF_JMP | BPF_JEQ | BPF_X:
> +               case BPF_JMP32 | BPF_JEQ | BPF_K:
> +               case BPF_JMP32 | BPF_JEQ | BPF_X:
>                         true_cond = COND_EQ;
>                         goto cond_branch;
>                 case BPF_JMP | BPF_JNE | BPF_K:
>                 case BPF_JMP | BPF_JNE | BPF_X:
> +               case BPF_JMP32 | BPF_JNE | BPF_K:
> +               case BPF_JMP32 | BPF_JNE | BPF_X:
>                         true_cond = COND_NE;
>                         goto cond_branch;
>                 case BPF_JMP | BPF_JSET | BPF_K:
>                 case BPF_JMP | BPF_JSET | BPF_X:
> +               case BPF_JMP32 | BPF_JSET | BPF_K:
> +               case BPF_JMP32 | BPF_JSET | BPF_X:
>                         true_cond = COND_NE;
>                         /* Fall through */
^ permalink raw reply	[flat|nested] 20+ messages in thread 
 
 
- * [PATCH bpf-next v2 14/16] s390: bpf: implement jitting of JMP32
  2019-01-21 13:15 [PATCH bpf-next v2 00/16] bpf: propose new jmp32 instructions Jiong Wang
                   ` (12 preceding siblings ...)
  2019-01-21 13:15 ` [PATCH bpf-next v2 13/16] ppc: " Jiong Wang
@ 2019-01-21 13:15 ` Jiong Wang
  2019-01-21 13:15 ` [PATCH bpf-next v2 15/16] nfp: " Jiong Wang
  2019-01-21 13:15 ` [PATCH bpf-next v2 16/16] selftests: bpf: makefile support sub-register code-gen test mode Jiong Wang
  15 siblings, 0 replies; 20+ messages in thread
From: Jiong Wang @ 2019-01-21 13:15 UTC (permalink / raw)
  To: ast, daniel
  Cc: netdev, oss-drivers, Jiong Wang, Martin Schwidefsky,
	Heiko Carstens
This patch implements code-gen for new JMP32 instructions on s390.
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
---
 arch/s390/net/bpf_jit_comp.c | 69 +++++++++++++++++++++++++++++++++++---------
 1 file changed, 55 insertions(+), 14 deletions(-)
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 3ff758e..aab4329 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -1110,103 +1110,144 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
 		mask = 0xf000; /* j */
 		goto branch_oc;
 	case BPF_JMP | BPF_JSGT | BPF_K: /* ((s64) dst > (s64) imm) */
+	case BPF_JMP32 | BPF_JSGT | BPF_K: /* ((s32) dst > (s32) imm) */
 		mask = 0x2000; /* jh */
 		goto branch_ks;
 	case BPF_JMP | BPF_JSLT | BPF_K: /* ((s64) dst < (s64) imm) */
+	case BPF_JMP32 | BPF_JSLT | BPF_K: /* ((s32) dst < (s32) imm) */
 		mask = 0x4000; /* jl */
 		goto branch_ks;
 	case BPF_JMP | BPF_JSGE | BPF_K: /* ((s64) dst >= (s64) imm) */
+	case BPF_JMP32 | BPF_JSGE | BPF_K: /* ((s32) dst >= (s32) imm) */
 		mask = 0xa000; /* jhe */
 		goto branch_ks;
 	case BPF_JMP | BPF_JSLE | BPF_K: /* ((s64) dst <= (s64) imm) */
+	case BPF_JMP32 | BPF_JSLE | BPF_K: /* ((s32) dst <= (s32) imm) */
 		mask = 0xc000; /* jle */
 		goto branch_ks;
 	case BPF_JMP | BPF_JGT | BPF_K: /* (dst_reg > imm) */
+	case BPF_JMP32 | BPF_JGT | BPF_K: /* ((u32) dst_reg > (u32) imm) */
 		mask = 0x2000; /* jh */
 		goto branch_ku;
 	case BPF_JMP | BPF_JLT | BPF_K: /* (dst_reg < imm) */
+	case BPF_JMP32 | BPF_JLT | BPF_K: /* ((u32) dst_reg < (u32) imm) */
 		mask = 0x4000; /* jl */
 		goto branch_ku;
 	case BPF_JMP | BPF_JGE | BPF_K: /* (dst_reg >= imm) */
+	case BPF_JMP32 | BPF_JGE | BPF_K: /* ((u32) dst_reg >= (u32) imm) */
 		mask = 0xa000; /* jhe */
 		goto branch_ku;
 	case BPF_JMP | BPF_JLE | BPF_K: /* (dst_reg <= imm) */
+	case BPF_JMP32 | BPF_JLE | BPF_K: /* ((u32) dst_reg <= (u32) imm) */
 		mask = 0xc000; /* jle */
 		goto branch_ku;
 	case BPF_JMP | BPF_JNE | BPF_K: /* (dst_reg != imm) */
+	case BPF_JMP32 | BPF_JNE | BPF_K: /* ((u32) dst_reg != (u32) imm) */
 		mask = 0x7000; /* jne */
 		goto branch_ku;
 	case BPF_JMP | BPF_JEQ | BPF_K: /* (dst_reg == imm) */
+	case BPF_JMP32 | BPF_JEQ | BPF_K: /* ((u32) dst_reg == (u32) imm) */
 		mask = 0x8000; /* je */
 		goto branch_ku;
 	case BPF_JMP | BPF_JSET | BPF_K: /* (dst_reg & imm) */
+	case BPF_JMP32 | BPF_JSET | BPF_K: /* ((u32) dst_reg & (u32) imm) */
 		mask = 0x7000; /* jnz */
-		/* lgfi %w1,imm (load sign extend imm) */
-		EMIT6_IMM(0xc0010000, REG_W1, imm);
-		/* ngr %w1,%dst */
-		EMIT4(0xb9800000, REG_W1, dst_reg);
+		if (BPF_CLASS(insn->code) == BPF_JMP32) {
+			/* llilf %w1,imm (load zero extend imm) */
+			EMIT6_IMM(0xc0010000, REG_W1, imm);
+			/* nr %w1,%dst */
+			EMIT2(0x1400, REG_W1, dst_reg);
+		} else {
+			/* lgfi %w1,imm (load sign extend imm) */
+			EMIT6_IMM(0xc0010000, REG_W1, imm);
+			/* ngr %w1,%dst */
+			EMIT4(0xb9800000, REG_W1, dst_reg);
+		}
 		goto branch_oc;
 
 	case BPF_JMP | BPF_JSGT | BPF_X: /* ((s64) dst > (s64) src) */
+	case BPF_JMP32 | BPF_JSGT | BPF_X: /* ((s32) dst > (s32) src) */
 		mask = 0x2000; /* jh */
 		goto branch_xs;
 	case BPF_JMP | BPF_JSLT | BPF_X: /* ((s64) dst < (s64) src) */
+	case BPF_JMP32 | BPF_JSLT | BPF_X: /* ((s32) dst < (s32) src) */
 		mask = 0x4000; /* jl */
 		goto branch_xs;
 	case BPF_JMP | BPF_JSGE | BPF_X: /* ((s64) dst >= (s64) src) */
+	case BPF_JMP32 | BPF_JSGE | BPF_X: /* ((s32) dst >= (s32) src) */
 		mask = 0xa000; /* jhe */
 		goto branch_xs;
 	case BPF_JMP | BPF_JSLE | BPF_X: /* ((s64) dst <= (s64) src) */
+	case BPF_JMP32 | BPF_JSLE | BPF_X: /* ((s32) dst <= (s32) src) */
 		mask = 0xc000; /* jle */
 		goto branch_xs;
 	case BPF_JMP | BPF_JGT | BPF_X: /* (dst > src) */
+	case BPF_JMP32 | BPF_JGT | BPF_X: /* ((u32) dst > (u32) src) */
 		mask = 0x2000; /* jh */
 		goto branch_xu;
 	case BPF_JMP | BPF_JLT | BPF_X: /* (dst < src) */
+	case BPF_JMP32 | BPF_JLT | BPF_X: /* ((u32) dst < (u32) src) */
 		mask = 0x4000; /* jl */
 		goto branch_xu;
 	case BPF_JMP | BPF_JGE | BPF_X: /* (dst >= src) */
+	case BPF_JMP32 | BPF_JGE | BPF_X: /* ((u32) dst >= (u32) src) */
 		mask = 0xa000; /* jhe */
 		goto branch_xu;
 	case BPF_JMP | BPF_JLE | BPF_X: /* (dst <= src) */
+	case BPF_JMP32 | BPF_JLE | BPF_X: /* ((u32) dst <= (u32) src) */
 		mask = 0xc000; /* jle */
 		goto branch_xu;
 	case BPF_JMP | BPF_JNE | BPF_X: /* (dst != src) */
+	case BPF_JMP32 | BPF_JNE | BPF_X: /* ((u32) dst != (u32) src) */
 		mask = 0x7000; /* jne */
 		goto branch_xu;
 	case BPF_JMP | BPF_JEQ | BPF_X: /* (dst == src) */
+	case BPF_JMP32 | BPF_JEQ | BPF_X: /* ((u32) dst == (u32) src) */
 		mask = 0x8000; /* je */
 		goto branch_xu;
 	case BPF_JMP | BPF_JSET | BPF_X: /* (dst & src) */
+	case BPF_JMP32 | BPF_JSET | BPF_X: /* ((u32) dst & (u32) src) */
+	{
+		bool is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
+
 		mask = 0x7000; /* jnz */
-		/* ngrk %w1,%dst,%src */
-		EMIT4_RRF(0xb9e40000, REG_W1, dst_reg, src_reg);
+		if (is_jmp32)
+			/* nrk %w1,%dst,%src */
+			EMIT4_RRF(0xb9f40000, REG_W1, dst_reg, src_reg);
+		else
+			/* ngrk %w1,%dst,%src */
+			EMIT4_RRF(0xb9e40000, REG_W1, dst_reg, src_reg);
 		goto branch_oc;
 branch_ks:
 		/* lgfi %w1,imm (load sign extend imm) */
 		EMIT6_IMM(0xc0010000, REG_W1, imm);
-		/* cgrj %dst,%w1,mask,off */
-		EMIT6_PCREL(0xec000000, 0x0064, dst_reg, REG_W1, i, off, mask);
+		/* cgrj or crj %dst,%w1,mask,off */
+		EMIT6_PCREL(0xec000000, is_jmp32 ? 0x0076 : 0x0064,
+			    dst_reg, REG_W1, i, off, mask);
 		break;
 branch_ku:
 		/* lgfi %w1,imm (load sign extend imm) */
 		EMIT6_IMM(0xc0010000, REG_W1, imm);
-		/* clgrj %dst,%w1,mask,off */
-		EMIT6_PCREL(0xec000000, 0x0065, dst_reg, REG_W1, i, off, mask);
+		/* clgrj or clrj %dst,%w1,mask,off */
+		EMIT6_PCREL(0xec000000, is_jmp32 ? 0x0077 : 0x0065,
+			    dst_reg, REG_W1, i, off, mask);
 		break;
 branch_xs:
-		/* cgrj %dst,%src,mask,off */
-		EMIT6_PCREL(0xec000000, 0x0064, dst_reg, src_reg, i, off, mask);
+		/* cgrj or crj %dst,%src,mask,off */
+		EMIT6_PCREL(0xec000000, is_jmp32 ? 0x0076 : 0x0064,
+			    dst_reg, src_reg, i, off, mask);
 		break;
 branch_xu:
-		/* clgrj %dst,%src,mask,off */
-		EMIT6_PCREL(0xec000000, 0x0065, dst_reg, src_reg, i, off, mask);
+		/* clgrj or clrj %dst,%src,mask,off */
+		EMIT6_PCREL(0xec000000, is_jmp32 ? 0x0077 : 0x0065,
+			    dst_reg, src_reg, i, off, mask);
 		break;
 branch_oc:
 		/* brc mask,jmp_off (branch instruction needs 4 bytes) */
 		jmp_off = addrs[i + off + 1] - (addrs[i + 1] - 4);
 		EMIT4_PCREL(0xa7040000 | mask << 8, jmp_off);
 		break;
+	}
 	default: /* too complex, give up */
 		pr_err("Unknown opcode %02x\n", insn->code);
 		return -1;
-- 
2.7.4
^ permalink raw reply related	[flat|nested] 20+ messages in thread
- * [PATCH bpf-next v2 15/16] nfp: bpf: implement jitting of JMP32
  2019-01-21 13:15 [PATCH bpf-next v2 00/16] bpf: propose new jmp32 instructions Jiong Wang
                   ` (13 preceding siblings ...)
  2019-01-21 13:15 ` [PATCH bpf-next v2 14/16] s390: " Jiong Wang
@ 2019-01-21 13:15 ` Jiong Wang
  2019-01-21 13:15 ` [PATCH bpf-next v2 16/16] selftests: bpf: makefile support sub-register code-gen test mode Jiong Wang
  15 siblings, 0 replies; 20+ messages in thread
From: Jiong Wang @ 2019-01-21 13:15 UTC (permalink / raw)
  To: ast, daniel; +Cc: netdev, oss-drivers, Jiong Wang
This patch implements code-gen for new JMP32 instructions on NFP.
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
---
 drivers/net/ethernet/netronome/nfp/bpf/jit.c  | 97 ++++++++++++++++++++-------
 drivers/net/ethernet/netronome/nfp/bpf/main.h | 15 +++++
 2 files changed, 88 insertions(+), 24 deletions(-)
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
index e23ca90..2eb7a79 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
@@ -1334,8 +1334,9 @@ wrp_test_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
 
 	wrp_test_reg_one(nfp_prog, insn->dst_reg * 2, alu_op,
 			 insn->src_reg * 2, br_mask, insn->off);
-	wrp_test_reg_one(nfp_prog, insn->dst_reg * 2 + 1, alu_op,
-			 insn->src_reg * 2 + 1, br_mask, insn->off);
+	if (is_mbpf_jmp64(meta))
+		wrp_test_reg_one(nfp_prog, insn->dst_reg * 2 + 1, alu_op,
+				 insn->src_reg * 2 + 1, br_mask, insn->off);
 
 	return 0;
 }
@@ -1390,13 +1391,15 @@ static int cmp_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 	else
 		emit_alu(nfp_prog, reg_none(), tmp_reg, alu_op, reg_a(reg));
 
-	tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
-	if (!code->swap)
-		emit_alu(nfp_prog, reg_none(),
-			 reg_a(reg + 1), carry_op, tmp_reg);
-	else
-		emit_alu(nfp_prog, reg_none(),
-			 tmp_reg, carry_op, reg_a(reg + 1));
+	if (is_mbpf_jmp64(meta)) {
+		tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
+		if (!code->swap)
+			emit_alu(nfp_prog, reg_none(),
+				 reg_a(reg + 1), carry_op, tmp_reg);
+		else
+			emit_alu(nfp_prog, reg_none(),
+				 tmp_reg, carry_op, reg_a(reg + 1));
+	}
 
 	emit_br(nfp_prog, code->br_mask, insn->off, 0);
 
@@ -1423,8 +1426,9 @@ static int cmp_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 	}
 
 	emit_alu(nfp_prog, reg_none(), reg_a(areg), ALU_OP_SUB, reg_b(breg));
-	emit_alu(nfp_prog, reg_none(),
-		 reg_a(areg + 1), ALU_OP_SUB_C, reg_b(breg + 1));
+	if (is_mbpf_jmp64(meta))
+		emit_alu(nfp_prog, reg_none(),
+			 reg_a(areg + 1), ALU_OP_SUB_C, reg_b(breg + 1));
 	emit_br(nfp_prog, code->br_mask, insn->off, 0);
 
 	return 0;
@@ -3048,6 +3052,19 @@ static int jeq_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 	return 0;
 }
 
+static int jeq32_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	const struct bpf_insn *insn = &meta->insn;
+	swreg tmp_reg;
+
+	tmp_reg = ur_load_imm_any(nfp_prog, insn->imm, imm_b(nfp_prog));
+	emit_alu(nfp_prog, reg_none(),
+		 reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg);
+	emit_br(nfp_prog, BR_BEQ, insn->off, 0);
+
+	return 0;
+}
+
 static int jset_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
 	const struct bpf_insn *insn = &meta->insn;
@@ -3061,9 +3078,10 @@ static int jset_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 	/* Upper word of the mask can only be 0 or ~0 from sign extension,
 	 * so either ignore it or OR the whole thing in.
 	 */
-	if (imm >> 32)
+	if (is_mbpf_jmp64(meta) && imm >> 32) {
 		emit_alu(nfp_prog, reg_none(),
 			 reg_a(dst_gpr + 1), ALU_OP_OR, imm_b(nfp_prog));
+	}
 	emit_br(nfp_prog, BR_BNE, insn->off, 0);
 
 	return 0;
@@ -3073,11 +3091,16 @@ static int jne_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
 	const struct bpf_insn *insn = &meta->insn;
 	u64 imm = insn->imm; /* sign extend */
+	bool is_jmp32 = is_mbpf_jmp32(meta);
 	swreg tmp_reg;
 
 	if (!imm) {
-		emit_alu(nfp_prog, reg_none(), reg_a(insn->dst_reg * 2),
-			 ALU_OP_OR, reg_b(insn->dst_reg * 2 + 1));
+		if (is_jmp32)
+			emit_alu(nfp_prog, reg_none(), reg_none(), ALU_OP_NONE,
+				 reg_b(insn->dst_reg * 2));
+		else
+			emit_alu(nfp_prog, reg_none(), reg_a(insn->dst_reg * 2),
+				 ALU_OP_OR, reg_b(insn->dst_reg * 2 + 1));
 		emit_br(nfp_prog, BR_BNE, insn->off, 0);
 		return 0;
 	}
@@ -3087,6 +3110,9 @@ static int jne_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 		 reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg);
 	emit_br(nfp_prog, BR_BNE, insn->off, 0);
 
+	if (is_jmp32)
+		return 0;
+
 	tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
 	emit_alu(nfp_prog, reg_none(),
 		 reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR, tmp_reg);
@@ -3101,10 +3127,13 @@ static int jeq_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 
 	emit_alu(nfp_prog, imm_a(nfp_prog), reg_a(insn->dst_reg * 2),
 		 ALU_OP_XOR, reg_b(insn->src_reg * 2));
-	emit_alu(nfp_prog, imm_b(nfp_prog), reg_a(insn->dst_reg * 2 + 1),
-		 ALU_OP_XOR, reg_b(insn->src_reg * 2 + 1));
-	emit_alu(nfp_prog, reg_none(),
-		 imm_a(nfp_prog), ALU_OP_OR, imm_b(nfp_prog));
+	if (is_mbpf_jmp64(meta)) {
+		emit_alu(nfp_prog, imm_b(nfp_prog),
+			 reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR,
+			 reg_b(insn->src_reg * 2 + 1));
+		emit_alu(nfp_prog, reg_none(), imm_a(nfp_prog), ALU_OP_OR,
+			 imm_b(nfp_prog));
+	}
 	emit_br(nfp_prog, BR_BEQ, insn->off, 0);
 
 	return 0;
@@ -3369,6 +3398,28 @@ static const instr_cb_t instr_cb[256] = {
 	[BPF_JMP | BPF_JSLE | BPF_X] =  cmp_reg,
 	[BPF_JMP | BPF_JSET | BPF_X] =	jset_reg,
 	[BPF_JMP | BPF_JNE | BPF_X] =	jne_reg,
+	[BPF_JMP32 | BPF_JEQ | BPF_K] =	jeq32_imm,
+	[BPF_JMP32 | BPF_JGT | BPF_K] =	cmp_imm,
+	[BPF_JMP32 | BPF_JGE | BPF_K] =	cmp_imm,
+	[BPF_JMP32 | BPF_JLT | BPF_K] =	cmp_imm,
+	[BPF_JMP32 | BPF_JLE | BPF_K] =	cmp_imm,
+	[BPF_JMP32 | BPF_JSGT | BPF_K] =cmp_imm,
+	[BPF_JMP32 | BPF_JSGE | BPF_K] =cmp_imm,
+	[BPF_JMP32 | BPF_JSLT | BPF_K] =cmp_imm,
+	[BPF_JMP32 | BPF_JSLE | BPF_K] =cmp_imm,
+	[BPF_JMP32 | BPF_JSET | BPF_K] =jset_imm,
+	[BPF_JMP32 | BPF_JNE | BPF_K] =	jne_imm,
+	[BPF_JMP32 | BPF_JEQ | BPF_X] =	jeq_reg,
+	[BPF_JMP32 | BPF_JGT | BPF_X] =	cmp_reg,
+	[BPF_JMP32 | BPF_JGE | BPF_X] =	cmp_reg,
+	[BPF_JMP32 | BPF_JLT | BPF_X] =	cmp_reg,
+	[BPF_JMP32 | BPF_JLE | BPF_X] =	cmp_reg,
+	[BPF_JMP32 | BPF_JSGT | BPF_X] =cmp_reg,
+	[BPF_JMP32 | BPF_JSGE | BPF_X] =cmp_reg,
+	[BPF_JMP32 | BPF_JSLT | BPF_X] =cmp_reg,
+	[BPF_JMP32 | BPF_JSLE | BPF_X] =cmp_reg,
+	[BPF_JMP32 | BPF_JSET | BPF_X] =jset_reg,
+	[BPF_JMP32 | BPF_JNE | BPF_X] =	jne_reg,
 	[BPF_JMP | BPF_CALL] =		call,
 	[BPF_JMP | BPF_EXIT] =		jmp_exit,
 };
@@ -3397,7 +3448,7 @@ static int nfp_fixup_branches(struct nfp_prog *nfp_prog)
 	list_for_each_entry(meta, &nfp_prog->insns, l) {
 		if (meta->skip)
 			continue;
-		if (BPF_CLASS(meta->insn.code) != BPF_JMP)
+		if (!is_mbpf_jmp(meta))
 			continue;
 		if (meta->insn.code == (BPF_JMP | BPF_EXIT) &&
 		    !nfp_is_main_function(meta))
@@ -3758,16 +3809,14 @@ static void nfp_bpf_opt_neg_add_sub(struct nfp_prog *nfp_prog)
 		if (meta->skip)
 			continue;
 
-		if (BPF_CLASS(insn.code) != BPF_ALU &&
-		    BPF_CLASS(insn.code) != BPF_ALU64 &&
-		    BPF_CLASS(insn.code) != BPF_JMP)
+		if (!is_mbpf_alu(meta) && !is_mbpf_jmp(meta))
 			continue;
 		if (BPF_SRC(insn.code) != BPF_K)
 			continue;
 		if (insn.imm >= 0)
 			continue;
 
-		if (BPF_CLASS(insn.code) == BPF_JMP) {
+		if (is_mbpf_jmp(meta)) {
 			switch (BPF_OP(insn.code)) {
 			case BPF_JGE:
 			case BPF_JSGE:
@@ -4336,7 +4385,7 @@ void nfp_bpf_jit_prepare(struct nfp_prog *nfp_prog, unsigned int cnt)
 		unsigned int dst_idx;
 		bool pseudo_call;
 
-		if (BPF_CLASS(code) != BPF_JMP)
+		if (!is_mbpf_jmp(meta))
 			continue;
 		if (BPF_OP(code) == BPF_EXIT)
 			continue;
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h
index 9412779..72c5c22 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.h
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h
@@ -357,6 +357,21 @@ static inline bool is_mbpf_load(const struct nfp_insn_meta *meta)
 	return (meta->insn.code & ~BPF_SIZE_MASK) == (BPF_LDX | BPF_MEM);
 }
 
+static inline bool is_mbpf_jmp32(const struct nfp_insn_meta *meta)
+{
+	return mbpf_class(meta) == BPF_JMP32;
+}
+
+static inline bool is_mbpf_jmp64(const struct nfp_insn_meta *meta)
+{
+	return mbpf_class(meta) == BPF_JMP;
+}
+
+static inline bool is_mbpf_jmp(const struct nfp_insn_meta *meta)
+{
+	return is_mbpf_jmp32(meta) || is_mbpf_jmp64(meta);
+}
+
 static inline bool is_mbpf_store(const struct nfp_insn_meta *meta)
 {
 	return (meta->insn.code & ~BPF_SIZE_MASK) == (BPF_STX | BPF_MEM);
-- 
2.7.4
^ permalink raw reply related	[flat|nested] 20+ messages in thread
- * [PATCH bpf-next v2 16/16] selftests: bpf: makefile support sub-register code-gen test mode
  2019-01-21 13:15 [PATCH bpf-next v2 00/16] bpf: propose new jmp32 instructions Jiong Wang
                   ` (14 preceding siblings ...)
  2019-01-21 13:15 ` [PATCH bpf-next v2 15/16] nfp: " Jiong Wang
@ 2019-01-21 13:15 ` Jiong Wang
  15 siblings, 0 replies; 20+ messages in thread
From: Jiong Wang @ 2019-01-21 13:15 UTC (permalink / raw)
  To: ast, daniel; +Cc: netdev, oss-drivers, Jiong Wang
This patch enables testing some eBPF programs under sub-register
compilation mode.
Only enable this when there is BPF_JMP32 support on both LLVM and kernel.
This is because only after BPF_JMP32 added, code-gen for complex program
under sub-register mode will be clean enough to pass verification.
This patch splits TEST_GEN_FILES into BPF_OBJ_FILES and
BPF_OBJ_FILES_DUAL_COMPILE. The latter are those objects we would like to
compile for both default and sub-register mode. They are also objects used
by "test_progs".
A "alu32" sub directory is created, all object files compiled under
sub-register mode are placed there, and a new TEST_CUSTOM_PROGS
"test_progs_32" is added, so it could be run automatically for run_tests.
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
---
 tools/testing/selftests/bpf/Makefile | 93 ++++++++++++++++++++++++++----------
 1 file changed, 69 insertions(+), 24 deletions(-)
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 70229de..859a644 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -10,15 +10,13 @@ ifneq ($(wildcard $(GENHDR)),)
   GENFLAGS := -DHAVE_GENHDR
 endif
 
+CLANG		?= clang
+LLC		?= llc
+LLVM_OBJCOPY	?= llvm-objcopy
+BTF_PAHOLE	?= pahole
 CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(BPFDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include
 LDLIBS += -lcap -lelf -lrt -lpthread
 
-TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read
-all: $(TEST_CUSTOM_PROGS)
-
-$(TEST_CUSTOM_PROGS): $(OUTPUT)/%: %.c
-	$(CC) -o $(TEST_CUSTOM_PROGS) -static $< -Wl,--build-id
-
 # Order correspond to 'make run_tests' order
 TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
 	test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \
@@ -26,21 +24,42 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
 	test_socket_cookie test_cgroup_storage test_select_reuseport test_section_names \
 	test_netcnt test_tcpnotify_user
 
-TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \
-	test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o     \
-	sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \
-	test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \
-	test_tcpnotify_kern.o \
-	sample_map_ret0.o test_tcpbpf_kern.o test_stacktrace_build_id.o \
-	sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o test_adjust_tail.o \
-	test_btf_haskv.o test_btf_nokv.o test_sockmap_kern.o test_tunnel_kern.o \
-	test_get_stack_rawtp.o test_sockmap_kern.o test_sockhash_kern.o \
-	test_lwt_seg6local.o sendmsg4_prog.o sendmsg6_prog.o test_lirc_mode2_kern.o \
+BPF_OBJ_FILES = \
+	test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \
+	sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o \
+	test_tcpnotify_kern.o sample_map_ret0.o test_tcpbpf_kern.o \
+	sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o \
+	test_btf_haskv.o test_btf_nokv.o test_sockmap_kern.o \
+	test_tunnel_kern.o test_sockhash_kern.o test_lwt_seg6local.o \
+	sendmsg4_prog.o sendmsg6_prog.o test_lirc_mode2_kern.o \
 	get_cgroup_id_kern.o socket_cookie_prog.o test_select_reuseport_kern.o \
-	test_skb_cgroup_id_kern.o bpf_flow.o netcnt_prog.o \
-	test_sk_lookup_kern.o test_xdp_vlan.o test_queue_map.o test_stack_map.o \
+	test_skb_cgroup_id_kern.o bpf_flow.o netcnt_prog.o test_xdp_vlan.o \
 	xdp_dummy.o test_map_in_map.o
 
+# Objects are built with default compilation flags and with sub-register
+# code-gen enabled.
+BPF_OBJ_FILES_DUAL_COMPILE = \
+	test_pkt_access.o test_pkt_access.o test_xdp.o test_adjust_tail.o \
+	test_l4lb.o test_l4lb_noinline.o test_xdp_noinline.o test_tcp_estats.o \
+	test_obj_id.o test_pkt_md_access.o test_tracepoint.o \
+	test_stacktrace_map.o test_stacktrace_map.o test_stacktrace_build_id.o \
+	test_stacktrace_build_id.o test_get_stack_rawtp.o \
+	test_get_stack_rawtp.o test_tracepoint.o test_sk_lookup_kern.o \
+	test_queue_map.o test_stack_map.o
+
+TEST_GEN_FILES = $(BPF_OBJ_FILES) $(BPF_OBJ_FILES_DUAL_COMPILE)
+
+# Also test sub-register code-gen if LLVM + kernel both has eBPF v3 processor
+# support which is the first version to contain both ALU32 and JMP32
+# instructions.
+SUBREG_CODEGEN := $(shell echo "int cal(int a) { return a > 0; }" | \
+			$(CLANG) -target bpf -O2 -emit-llvm -S -x c - -o - | \
+			$(LLC) -mattr=+alu32 -mcpu=probe 2>&1 | \
+			grep 'if w')
+ifneq ($(SUBREG_CODEGEN),)
+TEST_GEN_FILES += $(patsubst %.o,alu32/%.o, $(BPF_OBJ_FILES_DUAL_COMPILE))
+endif
+
 # Order correspond to 'make run_tests' order
 TEST_PROGS := test_kmod.sh \
 	test_libbpf.sh \
@@ -65,6 +84,13 @@ TEST_GEN_PROGS_EXTENDED = test_libbpf_open test_sock_addr test_skb_cgroup_id_use
 
 include ../lib.mk
 
+# NOTE: $(OUTPUT) won't get default value if used before lib.mk
+TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read
+all: $(TEST_CUSTOM_PROGS)
+
+$(OUTPUT)/urandom_read: $(OUTPUT)/%: %.c
+	$(CC) -o $@ -static $< -Wl,--build-id
+
 BPFOBJ := $(OUTPUT)/libbpf.a
 
 $(TEST_GEN_PROGS): $(BPFOBJ)
@@ -92,11 +118,6 @@ force:
 $(BPFOBJ): force
 	$(MAKE) -C $(BPFDIR) OUTPUT=$(OUTPUT)/
 
-CLANG ?= clang
-LLC   ?= llc
-LLVM_OBJCOPY ?= llvm-objcopy
-BTF_PAHOLE ?= pahole
-
 PROBE := $(shell $(LLC) -march=bpf -mcpu=probe -filetype=null /dev/null 2>&1)
 
 # Let newer LLVM versions transparently probe the kernel for availability
@@ -148,6 +169,30 @@ endif
 endif
 endif
 
+ifneq ($(SUBREG_CODEGEN),)
+ALU32_BUILD_DIR = $(OUTPUT)/alu32
+TEST_CUSTOM_PROGS += $(ALU32_BUILD_DIR)/test_progs_32
+$(ALU32_BUILD_DIR):
+	mkdir -p $@
+
+$(ALU32_BUILD_DIR)/urandom_read: $(OUTPUT)/urandom_read
+	cp $< $@
+
+$(ALU32_BUILD_DIR)/test_progs_32: test_progs.c $(ALU32_BUILD_DIR) \
+						$(ALU32_BUILD_DIR)/urandom_read
+	$(CC) $(CFLAGS) -o $(ALU32_BUILD_DIR)/test_progs_32 $< \
+		trace_helpers.c $(OUTPUT)/libbpf.a $(LDLIBS)
+
+$(ALU32_BUILD_DIR)/%.o: %.c $(ALU32_BUILD_DIR) $(ALU32_BUILD_DIR)/test_progs_32
+	$(CLANG) $(CLANG_FLAGS) \
+		 -O2 -target bpf -emit-llvm -c $< -o - |      \
+	$(LLC) -march=bpf -mattr=+alu32 -mcpu=$(CPU) $(LLC_FLAGS) \
+		-filetype=obj -o $@
+ifeq ($(DWARF2BTF),y)
+	$(BTF_PAHOLE) -J $@
+endif
+endif
+
 # Have one program compiled without "-target bpf" to test whether libbpf loads
 # it successfully
 $(OUTPUT)/test_xdp.o: test_xdp.c
@@ -166,4 +211,4 @@ ifeq ($(DWARF2BTF),y)
 	$(BTF_PAHOLE) -J $@
 endif
 
-EXTRA_CLEAN := $(TEST_CUSTOM_PROGS)
+EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(ALU32_BUILD_DIR)
-- 
2.7.4
^ permalink raw reply related	[flat|nested] 20+ messages in thread