Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH bpf-next 3/3] nfp: bpf: support arithmetic indirect right shift (BPF_ARSH | BPF_X)
From: Jakub Kicinski @ 2018-05-18 19:12 UTC (permalink / raw)
  To: alexei.starovoitov, daniel; +Cc: oss-drivers, netdev, Jiong Wang
In-Reply-To: <20180518191211.18670-1-jakub.kicinski@netronome.com>

From: Jiong Wang <jiong.wang@netronome.com>

Code logic is similar with arithmetic right shift by constant, and NFP
get indirect shift amount through source A operand of PREV_ALU.

It is possible to fall back to logic right shift if the MSB is known to be
zero from range info, however there is no benefit to do this given logic
indirect right shift use the same number and cycle of instruction sequence.

Suppose the MSB of regX is the bit we want to replicate to fill in all the
vacant positions, and regY contains the shift amount, then we could use
single instruction to set up both.

  [alu, --, regY, OR, regX]

  --
  NOTE: the PREV_ALU result doesn't need to write to any destination
        register.

Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 drivers/net/ethernet/netronome/nfp/bpf/jit.c | 99 ++++++++++++++++++--
 1 file changed, 89 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
index f73242c4da2f..8a92088df0d7 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
@@ -1919,29 +1919,26 @@ static int shr_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 /* Code logic is the same as __shr_imm64 except ashr requires signedness bit
  * told through PREV_ALU result.
  */
-static int ashr_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+static int __ashr_imm64(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt)
 {
-	const struct bpf_insn *insn = &meta->insn;
-	u8 dst = insn->dst_reg * 2;
-
-	if (insn->imm < 32) {
+	if (shift_amt < 32) {
 		emit_shf(nfp_prog, reg_both(dst), reg_a(dst + 1), SHF_OP_NONE,
-			 reg_b(dst), SHF_SC_R_DSHF, insn->imm);
+			 reg_b(dst), SHF_SC_R_DSHF, shift_amt);
 		/* Set signedness bit. */
 		emit_alu(nfp_prog, reg_none(), reg_a(dst + 1), ALU_OP_OR,
 			 reg_imm(0));
 		emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR,
-			 reg_b(dst + 1), SHF_SC_R_SHF, insn->imm);
-	} else if (insn->imm == 32) {
+			 reg_b(dst + 1), SHF_SC_R_SHF, shift_amt);
+	} else if (shift_amt == 32) {
 		/* NOTE: this also helps setting signedness bit. */
 		wrp_reg_mov(nfp_prog, dst, dst + 1);
 		emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR,
 			 reg_b(dst + 1), SHF_SC_R_SHF, 31);
-	} else if (insn->imm > 32) {
+	} else if (shift_amt > 32) {
 		emit_alu(nfp_prog, reg_none(), reg_a(dst + 1), ALU_OP_OR,
 			 reg_imm(0));
 		emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_ASHR,
-			 reg_b(dst + 1), SHF_SC_R_SHF, insn->imm - 32);
+			 reg_b(dst + 1), SHF_SC_R_SHF, shift_amt - 32);
 		emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR,
 			 reg_b(dst + 1), SHF_SC_R_SHF, 31);
 	}
@@ -1949,6 +1946,87 @@ static int ashr_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 	return 0;
 }
 
+static int ashr_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	const struct bpf_insn *insn = &meta->insn;
+	u8 dst = insn->dst_reg * 2;
+
+	return __ashr_imm64(nfp_prog, dst, insn->imm);
+}
+
+static void ashr_reg64_lt32_high(struct nfp_prog *nfp_prog, u8 dst, u8 src)
+{
+	/* NOTE: the first insn will set both indirect shift amount (source A)
+	 * and signedness bit (MSB of result).
+	 */
+	emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_b(dst + 1));
+	emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR,
+		       reg_b(dst + 1), SHF_SC_R_SHF);
+}
+
+static void ashr_reg64_lt32_low(struct nfp_prog *nfp_prog, u8 dst, u8 src)
+{
+	/* NOTE: it is the same as logic shift because we don't need to shift in
+	 * signedness bit when the shift amount is less than 32.
+	 */
+	return shr_reg64_lt32_low(nfp_prog, dst, src);
+}
+
+static void ashr_reg64_lt32(struct nfp_prog *nfp_prog, u8 dst, u8 src)
+{
+	ashr_reg64_lt32_low(nfp_prog, dst, src);
+	ashr_reg64_lt32_high(nfp_prog, dst, src);
+}
+
+static void ashr_reg64_ge32(struct nfp_prog *nfp_prog, u8 dst, u8 src)
+{
+	emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_b(dst + 1));
+	emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_ASHR,
+		       reg_b(dst + 1), SHF_SC_R_SHF);
+	emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR,
+		 reg_b(dst + 1), SHF_SC_R_SHF, 31);
+}
+
+/* Like ashr_imm64, but need to use indirect shift. */
+static int ashr_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	const struct bpf_insn *insn = &meta->insn;
+	u64 umin, umax;
+	u8 dst, src;
+
+	dst = insn->dst_reg * 2;
+	umin = meta->umin;
+	umax = meta->umax;
+	if (umin == umax)
+		return __ashr_imm64(nfp_prog, dst, umin);
+
+	src = insn->src_reg * 2;
+	if (umax < 32) {
+		ashr_reg64_lt32(nfp_prog, dst, src);
+	} else if (umin >= 32) {
+		ashr_reg64_ge32(nfp_prog, dst, src);
+	} else {
+		u16 label_ge32, label_end;
+
+		label_ge32 = nfp_prog_current_offset(nfp_prog) + 6;
+		emit_br_bset(nfp_prog, reg_a(src), 5, label_ge32, 0);
+		ashr_reg64_lt32_low(nfp_prog, dst, src);
+		label_end = nfp_prog_current_offset(nfp_prog) + 6;
+		emit_br(nfp_prog, BR_UNC, label_end, 2);
+		/* ashr_reg64_lt32_high packed in delay slot. */
+		ashr_reg64_lt32_high(nfp_prog, dst, src);
+
+		if (!nfp_prog_confirm_current_offset(nfp_prog, label_ge32))
+			return -EINVAL;
+		ashr_reg64_ge32(nfp_prog, dst, src);
+
+		if (!nfp_prog_confirm_current_offset(nfp_prog, label_end))
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int mov_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
 	const struct bpf_insn *insn = &meta->insn;
@@ -2775,6 +2853,7 @@ static const instr_cb_t instr_cb[256] = {
 	[BPF_ALU64 | BPF_LSH | BPF_K] =	shl_imm64,
 	[BPF_ALU64 | BPF_RSH | BPF_X] =	shr_reg64,
 	[BPF_ALU64 | BPF_RSH | BPF_K] =	shr_imm64,
+	[BPF_ALU64 | BPF_ARSH | BPF_X] = ashr_reg64,
 	[BPF_ALU64 | BPF_ARSH | BPF_K] = ashr_imm64,
 	[BPF_ALU | BPF_MOV | BPF_X] =	mov_reg,
 	[BPF_ALU | BPF_MOV | BPF_K] =	mov_imm,
-- 
2.17.0

^ permalink raw reply related

* [PATCH bpf-next 2/3] nfp: bpf: support arithmetic right shift by constant (BPF_ARSH | BPF_K)
From: Jakub Kicinski @ 2018-05-18 19:12 UTC (permalink / raw)
  To: alexei.starovoitov, daniel; +Cc: oss-drivers, netdev, Jiong Wang
In-Reply-To: <20180518191211.18670-1-jakub.kicinski@netronome.com>

From: Jiong Wang <jiong.wang@netronome.com>

Code logic is similar with logic right shift except we also need to set
PREV_ALU result properly, the MSB of which is the bit that will be
replicated to fill in all the vacant positions.

Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 drivers/net/ethernet/netronome/nfp/bpf/jit.c | 34 ++++++++++++++++++++
 drivers/net/ethernet/netronome/nfp/nfp_asm.h |  1 +
 2 files changed, 35 insertions(+)

diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
index 4cff08771951..f73242c4da2f 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
@@ -1916,6 +1916,39 @@ static int shr_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 	return 0;
 }
 
+/* Code logic is the same as __shr_imm64 except ashr requires signedness bit
+ * told through PREV_ALU result.
+ */
+static int ashr_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	const struct bpf_insn *insn = &meta->insn;
+	u8 dst = insn->dst_reg * 2;
+
+	if (insn->imm < 32) {
+		emit_shf(nfp_prog, reg_both(dst), reg_a(dst + 1), SHF_OP_NONE,
+			 reg_b(dst), SHF_SC_R_DSHF, insn->imm);
+		/* Set signedness bit. */
+		emit_alu(nfp_prog, reg_none(), reg_a(dst + 1), ALU_OP_OR,
+			 reg_imm(0));
+		emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR,
+			 reg_b(dst + 1), SHF_SC_R_SHF, insn->imm);
+	} else if (insn->imm == 32) {
+		/* NOTE: this also helps setting signedness bit. */
+		wrp_reg_mov(nfp_prog, dst, dst + 1);
+		emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR,
+			 reg_b(dst + 1), SHF_SC_R_SHF, 31);
+	} else if (insn->imm > 32) {
+		emit_alu(nfp_prog, reg_none(), reg_a(dst + 1), ALU_OP_OR,
+			 reg_imm(0));
+		emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_ASHR,
+			 reg_b(dst + 1), SHF_SC_R_SHF, insn->imm - 32);
+		emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR,
+			 reg_b(dst + 1), SHF_SC_R_SHF, 31);
+	}
+
+	return 0;
+}
+
 static int mov_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
 	const struct bpf_insn *insn = &meta->insn;
@@ -2742,6 +2775,7 @@ static const instr_cb_t instr_cb[256] = {
 	[BPF_ALU64 | BPF_LSH | BPF_K] =	shl_imm64,
 	[BPF_ALU64 | BPF_RSH | BPF_X] =	shr_reg64,
 	[BPF_ALU64 | BPF_RSH | BPF_K] =	shr_imm64,
+	[BPF_ALU64 | BPF_ARSH | BPF_K] = ashr_imm64,
 	[BPF_ALU | BPF_MOV | BPF_X] =	mov_reg,
 	[BPF_ALU | BPF_MOV | BPF_K] =	mov_imm,
 	[BPF_ALU | BPF_XOR | BPF_X] =	xor_reg,
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.h b/drivers/net/ethernet/netronome/nfp/nfp_asm.h
index fa826bd9c668..f6677bc9875a 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_asm.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.h
@@ -174,6 +174,7 @@ enum shf_op {
 	SHF_OP_NONE = 0,
 	SHF_OP_AND = 2,
 	SHF_OP_OR = 5,
+	SHF_OP_ASHR = 6,
 };
 
 enum shf_sc {
-- 
2.17.0

^ permalink raw reply related

* [PATCH bpf-next 1/3] nfp: bpf: support logic indirect shifts (BPF_[L|R]SH | BPF_X)
From: Jakub Kicinski @ 2018-05-18 19:12 UTC (permalink / raw)
  To: alexei.starovoitov, daniel; +Cc: oss-drivers, netdev, Jiong Wang
In-Reply-To: <20180518191211.18670-1-jakub.kicinski@netronome.com>

From: Jiong Wang <jiong.wang@netronome.com>

For indirect shifts, shift amount is not specified as constant, NFP needs
to get the shift amount through the low 5 bits of source A operand in
PREV_ALU, therefore extra instructions are needed compared with shifts by
constants.

Because NFP is 32-bit, so we are using register pair for 64-bit shifts and
therefore would need different instruction sequences depending on whether
shift amount is less than 32 or not.

NFP branch-on-bit-test instruction emitter is added by this patch and is
used for efficient runtime check on shift amount. We'd think the shift
amount is less than 32 if bit 5 is clear and greater or equal than 32
otherwise. Shift amount is greater than or equal to 64 will result in
undefined behavior.

This patch also use range info to avoid generating unnecessary runtime code
if we are certain shift amount is less than 32 or not.

Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 drivers/net/ethernet/netronome/nfp/bpf/jit.c  | 299 ++++++++++++++++--
 drivers/net/ethernet/netronome/nfp/bpf/main.h |  28 ++
 .../net/ethernet/netronome/nfp/bpf/offload.c  |   2 +
 .../net/ethernet/netronome/nfp/bpf/verifier.c |   8 +
 drivers/net/ethernet/netronome/nfp/nfp_asm.h  |  17 +-
 5 files changed, 322 insertions(+), 32 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
index a4d3da215863..4cff08771951 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
@@ -211,6 +211,60 @@ emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, u16 addr, u8 defer)
 	emit_br_relo(nfp_prog, mask, addr, defer, RELO_BR_REL);
 }
 
+static void
+__emit_br_bit(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 addr, u8 defer,
+	      bool set, bool src_lmextn)
+{
+	u16 addr_lo, addr_hi;
+	u64 insn;
+
+	addr_lo = addr & (OP_BR_BIT_ADDR_LO >> __bf_shf(OP_BR_BIT_ADDR_LO));
+	addr_hi = addr != addr_lo;
+
+	insn = OP_BR_BIT_BASE |
+		FIELD_PREP(OP_BR_BIT_A_SRC, areg) |
+		FIELD_PREP(OP_BR_BIT_B_SRC, breg) |
+		FIELD_PREP(OP_BR_BIT_BV, set) |
+		FIELD_PREP(OP_BR_BIT_DEFBR, defer) |
+		FIELD_PREP(OP_BR_BIT_ADDR_LO, addr_lo) |
+		FIELD_PREP(OP_BR_BIT_ADDR_HI, addr_hi) |
+		FIELD_PREP(OP_BR_BIT_SRC_LMEXTN, src_lmextn);
+
+	nfp_prog_push(nfp_prog, insn);
+}
+
+static void
+emit_br_bit_relo(struct nfp_prog *nfp_prog, swreg src, u8 bit, u16 addr,
+		 u8 defer, bool set, enum nfp_relo_type relo)
+{
+	struct nfp_insn_re_regs reg;
+	int err;
+
+	/* NOTE: The bit to test is specified as an rotation amount, such that
+	 *	 the bit to test will be placed on the MSB of the result when
+	 *	 doing a rotate right. For bit X, we need right rotate X + 1.
+	 */
+	bit += 1;
+
+	err = swreg_to_restricted(reg_none(), src, reg_imm(bit), &reg, false);
+	if (err) {
+		nfp_prog->error = err;
+		return;
+	}
+
+	__emit_br_bit(nfp_prog, reg.areg, reg.breg, addr, defer, set,
+		      reg.src_lmextn);
+
+	nfp_prog->prog[nfp_prog->prog_len - 1] |=
+		FIELD_PREP(OP_RELO_TYPE, relo);
+}
+
+static void
+emit_br_bset(struct nfp_prog *nfp_prog, swreg src, u8 bit, u16 addr, u8 defer)
+{
+	emit_br_bit_relo(nfp_prog, src, bit, addr, defer, true, RELO_BR_REL);
+}
+
 static void
 __emit_immed(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 imm_hi,
 	     enum immed_width width, bool invert,
@@ -309,6 +363,19 @@ emit_shf(struct nfp_prog *nfp_prog, swreg dst,
 		   reg.dst_lmextn, reg.src_lmextn);
 }
 
+static void
+emit_shf_indir(struct nfp_prog *nfp_prog, swreg dst,
+	       swreg lreg, enum shf_op op, swreg rreg, enum shf_sc sc)
+{
+	if (sc == SHF_SC_R_ROT) {
+		pr_err("indirect shift is not allowed on rotation\n");
+		nfp_prog->error = -EFAULT;
+		return;
+	}
+
+	emit_shf(nfp_prog, dst, lreg, op, rreg, sc, 0);
+}
+
 static void
 __emit_alu(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab,
 	   u16 areg, enum alu_op op, u16 breg, bool swap, bool wr_both,
@@ -1629,56 +1696,226 @@ static int neg_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 	return 0;
 }
 
-static int shl_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
-{
-	const struct bpf_insn *insn = &meta->insn;
-	u8 dst = insn->dst_reg * 2;
-
-	if (insn->imm < 32) {
-		emit_shf(nfp_prog, reg_both(dst + 1),
-			 reg_a(dst + 1), SHF_OP_NONE, reg_b(dst),
-			 SHF_SC_R_DSHF, 32 - insn->imm);
-		emit_shf(nfp_prog, reg_both(dst),
-			 reg_none(), SHF_OP_NONE, reg_b(dst),
-			 SHF_SC_L_SHF, insn->imm);
-	} else if (insn->imm == 32) {
+/* Pseudo code:
+ *   if shift_amt >= 32
+ *     dst_high = dst_low << shift_amt[4:0]
+ *     dst_low = 0;
+ *   else
+ *     dst_high = (dst_high, dst_low) >> (32 - shift_amt)
+ *     dst_low = dst_low << shift_amt
+ *
+ * The indirect shift will use the same logic at runtime.
+ */
+static int __shl_imm64(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt)
+{
+	if (shift_amt < 32) {
+		emit_shf(nfp_prog, reg_both(dst + 1), reg_a(dst + 1),
+			 SHF_OP_NONE, reg_b(dst), SHF_SC_R_DSHF,
+			 32 - shift_amt);
+		emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
+			 reg_b(dst), SHF_SC_L_SHF, shift_amt);
+	} else if (shift_amt == 32) {
 		wrp_reg_mov(nfp_prog, dst + 1, dst);
 		wrp_immed(nfp_prog, reg_both(dst), 0);
-	} else if (insn->imm > 32) {
-		emit_shf(nfp_prog, reg_both(dst + 1),
-			 reg_none(), SHF_OP_NONE, reg_b(dst),
-			 SHF_SC_L_SHF, insn->imm - 32);
+	} else if (shift_amt > 32) {
+		emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_NONE,
+			 reg_b(dst), SHF_SC_L_SHF, shift_amt - 32);
 		wrp_immed(nfp_prog, reg_both(dst), 0);
 	}
 
 	return 0;
 }
 
-static int shr_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+static int shl_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
 	const struct bpf_insn *insn = &meta->insn;
 	u8 dst = insn->dst_reg * 2;
 
-	if (insn->imm < 32) {
-		emit_shf(nfp_prog, reg_both(dst),
-			 reg_a(dst + 1), SHF_OP_NONE, reg_b(dst),
-			 SHF_SC_R_DSHF, insn->imm);
-		emit_shf(nfp_prog, reg_both(dst + 1),
-			 reg_none(), SHF_OP_NONE, reg_b(dst + 1),
-			 SHF_SC_R_SHF, insn->imm);
-	} else if (insn->imm == 32) {
+	return __shl_imm64(nfp_prog, dst, insn->imm);
+}
+
+static void shl_reg64_lt32_high(struct nfp_prog *nfp_prog, u8 dst, u8 src)
+{
+	emit_alu(nfp_prog, imm_both(nfp_prog), reg_imm(32), ALU_OP_SUB,
+		 reg_b(src));
+	emit_alu(nfp_prog, reg_none(), imm_a(nfp_prog), ALU_OP_OR, reg_imm(0));
+	emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_a(dst + 1), SHF_OP_NONE,
+		       reg_b(dst), SHF_SC_R_DSHF);
+}
+
+/* NOTE: for indirect left shift, HIGH part should be calculated first. */
+static void shl_reg64_lt32_low(struct nfp_prog *nfp_prog, u8 dst, u8 src)
+{
+	emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0));
+	emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
+		       reg_b(dst), SHF_SC_L_SHF);
+}
+
+static void shl_reg64_lt32(struct nfp_prog *nfp_prog, u8 dst, u8 src)
+{
+	shl_reg64_lt32_high(nfp_prog, dst, src);
+	shl_reg64_lt32_low(nfp_prog, dst, src);
+}
+
+static void shl_reg64_ge32(struct nfp_prog *nfp_prog, u8 dst, u8 src)
+{
+	emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0));
+	emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_NONE,
+		       reg_b(dst), SHF_SC_L_SHF);
+	wrp_immed(nfp_prog, reg_both(dst), 0);
+}
+
+static int shl_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	const struct bpf_insn *insn = &meta->insn;
+	u64 umin, umax;
+	u8 dst, src;
+
+	dst = insn->dst_reg * 2;
+	umin = meta->umin;
+	umax = meta->umax;
+	if (umin == umax)
+		return __shl_imm64(nfp_prog, dst, umin);
+
+	src = insn->src_reg * 2;
+	if (umax < 32) {
+		shl_reg64_lt32(nfp_prog, dst, src);
+	} else if (umin >= 32) {
+		shl_reg64_ge32(nfp_prog, dst, src);
+	} else {
+		/* Generate different instruction sequences depending on runtime
+		 * value of shift amount.
+		 */
+		u16 label_ge32, label_end;
+
+		label_ge32 = nfp_prog_current_offset(nfp_prog) + 7;
+		emit_br_bset(nfp_prog, reg_a(src), 5, label_ge32, 0);
+
+		shl_reg64_lt32_high(nfp_prog, dst, src);
+		label_end = nfp_prog_current_offset(nfp_prog) + 6;
+		emit_br(nfp_prog, BR_UNC, label_end, 2);
+		/* shl_reg64_lt32_low packed in delay slot. */
+		shl_reg64_lt32_low(nfp_prog, dst, src);
+
+		if (!nfp_prog_confirm_current_offset(nfp_prog, label_ge32))
+			return -EINVAL;
+		shl_reg64_ge32(nfp_prog, dst, src);
+
+		if (!nfp_prog_confirm_current_offset(nfp_prog, label_end))
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+/* Pseudo code:
+ *   if shift_amt >= 32
+ *     dst_high = 0;
+ *     dst_low = dst_high >> shift_amt[4:0]
+ *   else
+ *     dst_high = dst_high >> shift_amt
+ *     dst_low = (dst_high, dst_low) >> shift_amt
+ *
+ * The indirect shift will use the same logic at runtime.
+ */
+static int __shr_imm64(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt)
+{
+	if (shift_amt < 32) {
+		emit_shf(nfp_prog, reg_both(dst), reg_a(dst + 1), SHF_OP_NONE,
+			 reg_b(dst), SHF_SC_R_DSHF, shift_amt);
+		emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_NONE,
+			 reg_b(dst + 1), SHF_SC_R_SHF, shift_amt);
+	} else if (shift_amt == 32) {
 		wrp_reg_mov(nfp_prog, dst, dst + 1);
 		wrp_immed(nfp_prog, reg_both(dst + 1), 0);
-	} else if (insn->imm > 32) {
-		emit_shf(nfp_prog, reg_both(dst),
-			 reg_none(), SHF_OP_NONE, reg_b(dst + 1),
-			 SHF_SC_R_SHF, insn->imm - 32);
+	} else if (shift_amt > 32) {
+		emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
+			 reg_b(dst + 1), SHF_SC_R_SHF, shift_amt - 32);
 		wrp_immed(nfp_prog, reg_both(dst + 1), 0);
 	}
 
 	return 0;
 }
 
+static int shr_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	const struct bpf_insn *insn = &meta->insn;
+	u8 dst = insn->dst_reg * 2;
+
+	return __shr_imm64(nfp_prog, dst, insn->imm);
+}
+
+/* NOTE: for indirect right shift, LOW part should be calculated first. */
+static void shr_reg64_lt32_high(struct nfp_prog *nfp_prog, u8 dst, u8 src)
+{
+	emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0));
+	emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_NONE,
+		       reg_b(dst + 1), SHF_SC_R_SHF);
+}
+
+static void shr_reg64_lt32_low(struct nfp_prog *nfp_prog, u8 dst, u8 src)
+{
+	emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0));
+	emit_shf_indir(nfp_prog, reg_both(dst), reg_a(dst + 1), SHF_OP_NONE,
+		       reg_b(dst), SHF_SC_R_DSHF);
+}
+
+static void shr_reg64_lt32(struct nfp_prog *nfp_prog, u8 dst, u8 src)
+{
+	shr_reg64_lt32_low(nfp_prog, dst, src);
+	shr_reg64_lt32_high(nfp_prog, dst, src);
+}
+
+static void shr_reg64_ge32(struct nfp_prog *nfp_prog, u8 dst, u8 src)
+{
+	emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0));
+	emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
+		       reg_b(dst + 1), SHF_SC_R_SHF);
+	wrp_immed(nfp_prog, reg_both(dst + 1), 0);
+}
+
+static int shr_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	const struct bpf_insn *insn = &meta->insn;
+	u64 umin, umax;
+	u8 dst, src;
+
+	dst = insn->dst_reg * 2;
+	umin = meta->umin;
+	umax = meta->umax;
+	if (umin == umax)
+		return __shr_imm64(nfp_prog, dst, umin);
+
+	src = insn->src_reg * 2;
+	if (umax < 32) {
+		shr_reg64_lt32(nfp_prog, dst, src);
+	} else if (umin >= 32) {
+		shr_reg64_ge32(nfp_prog, dst, src);
+	} else {
+		/* Generate different instruction sequences depending on runtime
+		 * value of shift amount.
+		 */
+		u16 label_ge32, label_end;
+
+		label_ge32 = nfp_prog_current_offset(nfp_prog) + 6;
+		emit_br_bset(nfp_prog, reg_a(src), 5, label_ge32, 0);
+		shr_reg64_lt32_low(nfp_prog, dst, src);
+		label_end = nfp_prog_current_offset(nfp_prog) + 6;
+		emit_br(nfp_prog, BR_UNC, label_end, 2);
+		/* shr_reg64_lt32_high packed in delay slot. */
+		shr_reg64_lt32_high(nfp_prog, dst, src);
+
+		if (!nfp_prog_confirm_current_offset(nfp_prog, label_ge32))
+			return -EINVAL;
+		shr_reg64_ge32(nfp_prog, dst, src);
+
+		if (!nfp_prog_confirm_current_offset(nfp_prog, label_end))
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int mov_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
 	const struct bpf_insn *insn = &meta->insn;
@@ -2501,7 +2738,9 @@ static const instr_cb_t instr_cb[256] = {
 	[BPF_ALU64 | BPF_SUB | BPF_X] =	sub_reg64,
 	[BPF_ALU64 | BPF_SUB | BPF_K] =	sub_imm64,
 	[BPF_ALU64 | BPF_NEG] =		neg_reg64,
+	[BPF_ALU64 | BPF_LSH | BPF_X] =	shl_reg64,
 	[BPF_ALU64 | BPF_LSH | BPF_K] =	shl_imm64,
+	[BPF_ALU64 | BPF_RSH | BPF_X] =	shr_reg64,
 	[BPF_ALU64 | BPF_RSH | BPF_K] =	shr_imm64,
 	[BPF_ALU | BPF_MOV | BPF_X] =	mov_reg,
 	[BPF_ALU | BPF_MOV | BPF_K] =	mov_imm,
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h
index 8b143546ae85..654fe7823e5e 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.h
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h
@@ -263,6 +263,8 @@ struct nfp_bpf_reg_state {
  * @func_id: function id for call instructions
  * @arg1: arg1 for call instructions
  * @arg2: arg2 for call instructions
+ * @umin: copy of core verifier umin_value.
+ * @umax: copy of core verifier umax_value.
  * @off: index of first generated machine instruction (in nfp_prog.prog)
  * @n: eBPF instruction number
  * @flags: eBPF instruction extra optimization flags
@@ -298,6 +300,13 @@ struct nfp_insn_meta {
 			struct bpf_reg_state arg1;
 			struct nfp_bpf_reg_state arg2;
 		};
+		/* We are interested in range info for some operands,
+		 * for example, the shift amount.
+		 */
+		struct {
+			u64 umin;
+			u64 umax;
+		};
 	};
 	unsigned int off;
 	unsigned short n;
@@ -375,6 +384,25 @@ static inline bool is_mbpf_xadd(const struct nfp_insn_meta *meta)
 	return (meta->insn.code & ~BPF_SIZE_MASK) == (BPF_STX | BPF_XADD);
 }
 
+static inline bool is_mbpf_indir_shift(const struct nfp_insn_meta *meta)
+{
+	u8 code = meta->insn.code;
+	bool is_alu, is_shift;
+	u8 opclass, opcode;
+
+	opclass = BPF_CLASS(code);
+	is_alu = opclass == BPF_ALU64 || opclass == BPF_ALU;
+	if (!is_alu)
+		return false;
+
+	opcode = BPF_OP(code);
+	is_shift = opcode == BPF_LSH || opcode == BPF_RSH || opcode == BPF_ARSH;
+	if (!is_shift)
+		return false;
+
+	return BPF_SRC(code) == BPF_X;
+}
+
 /**
  * struct nfp_prog - nfp BPF program
  * @bpf: backpointer to the bpf app priv structure
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
index 4db0ac1e42a8..7eae4c0266f8 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
@@ -190,6 +190,8 @@ nfp_prog_prepare(struct nfp_prog *nfp_prog, const struct bpf_insn *prog,
 
 		meta->insn = prog[i];
 		meta->n = i;
+		if (is_mbpf_indir_shift(meta))
+			meta->umin = U64_MAX;
 
 		list_add_tail(&meta->l, &nfp_prog->insns);
 	}
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
index 844a9be6e55a..4bfeba7b21b2 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
@@ -551,6 +551,14 @@ nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx)
 	if (is_mbpf_xadd(meta))
 		return nfp_bpf_check_xadd(nfp_prog, meta, env);
 
+	if (is_mbpf_indir_shift(meta)) {
+		const struct bpf_reg_state *sreg =
+			cur_regs(env) + meta->insn.src_reg;
+
+		meta->umin = min(meta->umin, sreg->umin_value);
+		meta->umax = max(meta->umax, sreg->umax_value);
+	}
+
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.h b/drivers/net/ethernet/netronome/nfp/nfp_asm.h
index faa4e131c136..fa826bd9c668 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_asm.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.h
@@ -72,8 +72,21 @@
 #define OP_BR_ADDR_LO		0x007ffc00000ULL
 #define OP_BR_ADDR_HI		0x10000000000ULL
 
-#define nfp_is_br(_insn)				\
-	(((_insn) & OP_BR_BASE_MASK) == OP_BR_BASE)
+#define OP_BR_BIT_BASE		0x0d000000000ULL
+#define OP_BR_BIT_BASE_MASK	0x0f800080300ULL
+#define OP_BR_BIT_A_SRC		0x000000000ffULL
+#define OP_BR_BIT_B_SRC		0x0000003fc00ULL
+#define OP_BR_BIT_BV		0x00000040000ULL
+#define OP_BR_BIT_SRC_LMEXTN	0x40000000000ULL
+#define OP_BR_BIT_DEFBR		OP_BR_DEFBR
+#define OP_BR_BIT_ADDR_LO	OP_BR_ADDR_LO
+#define OP_BR_BIT_ADDR_HI	OP_BR_ADDR_HI
+
+static inline bool nfp_is_br(u64 insn)
+{
+	return (insn & OP_BR_BASE_MASK) == OP_BR_BASE ||
+	       (insn & OP_BR_BIT_BASE_MASK) == OP_BR_BIT_BASE;
+}
 
 enum br_mask {
 	BR_BEQ = 0x00,
-- 
2.17.0

^ permalink raw reply related

* [PATCH bpf-next 0/3] nfp: bpf: complete shift supports on NFP JIT
From: Jakub Kicinski @ 2018-05-18 19:12 UTC (permalink / raw)
  To: alexei.starovoitov, daniel; +Cc: oss-drivers, netdev, Jakub Kicinski

Jiong says:

NFP eBPF JIT is missing logic indirect shifts (both left and right) and
arithmetic right shift (both indirect shift and shift by constant).

This patch adds support for them.

For indirect shifts, shift amount is not specified as constant, NFP needs
to get the shift amount through the low 5 bits of source A operand in
PREV_ALU, therefore extra instructions are needed compared with shifts by
constants.

Because NFP is 32-bit, so we are using register pair for 64-bit shifts and
therefore would need different instruction sequences depending on whether
shift amount is less than 32 or not.

NFP branch-on-bit-test instruction emitter is added by this patch set and
is used for efficient runtime check on shift amount. We'd think the shift
amount is less than 32 if bit 5 is clear and greater or equal then 32
otherwise. Shift amount is greater than or equal to 64 will result in
undefined behavior.

This patch also use range info to avoid generating unnecessary runtime code
if we are certain shift amount is less than 32 or not.

Jiong Wang (3):
  nfp: bpf: support logic indirect shifts (BPF_[L|R]SH | BPF_X)
  nfp: bpf: support arithmetic right shift by constant (BPF_ARSH |
    BPF_K)
  nfp: bpf: support arithmetic indirect right shift (BPF_ARSH | BPF_X)

 drivers/net/ethernet/netronome/nfp/bpf/jit.c  | 410 ++++++++++++++++--
 drivers/net/ethernet/netronome/nfp/bpf/main.h |  28 ++
 .../net/ethernet/netronome/nfp/bpf/offload.c  |   2 +
 .../net/ethernet/netronome/nfp/bpf/verifier.c |   8 +
 drivers/net/ethernet/netronome/nfp/nfp_asm.h  |  18 +-
 5 files changed, 435 insertions(+), 31 deletions(-)

-- 
2.17.0

^ permalink raw reply

* [PATCH RFC net-next 1/1] tcp: close socket without reset on incoming data
From: Debabrata Banerjee @ 2018-05-18 19:01 UTC (permalink / raw)
  To: David S . Miller, netdev; +Cc: Alexey Kuznetsov, Hideaki YOSHIFUJI, dbanerje
In-Reply-To: <20180518190141.899-1-dbanerje@akamai.com>

When TCP_CLOSE_NORST is set before a close(), offload sinking of
unwanted data to the kernel with low resource usage, with a timeout of
TCP_LINGER2. The socket will transition to FIN_WAIT1 and then FIN_WAIT2
where it will ack data until either the timeout is hit, or a RST or FIN
is received.

Signed-off-by: Debabrata Banerjee <dbanerje@akamai.com>
---
 include/linux/tcp.h      |  4 +++-
 include/uapi/linux/tcp.h |  2 +-
 net/ipv4/tcp.c           | 23 +++++++++++++++++++++--
 net/ipv4/tcp_input.c     | 16 ++++++++++++----
 net/ipv4/tcp_minisocks.c | 15 +++++++++++++++
 5 files changed, 52 insertions(+), 8 deletions(-)

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 72705eaf4b84..bd44bc99b480 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -226,7 +226,8 @@ struct tcp_sock {
 		fastopen_connect:1, /* FASTOPEN_CONNECT sockopt */
 		fastopen_no_cookie:1, /* Allow send/recv SYN+data without a cookie */
 		is_sack_reneg:1,    /* in recovery from loss with SACK reneg? */
-		unused:2;
+		norst:1,	/* Don't send RST on shutdown() socket */
+		unused:1;
 	u8	nonagle     : 4,/* Disable Nagle algorithm?             */
 		thin_lto    : 1,/* Use linear timeouts for thin streams */
 		recvmsg_inq : 1,/* Indicate # of bytes in queue upon recvmsg */
@@ -429,6 +430,7 @@ struct tcp_timewait_sock {
 #ifdef CONFIG_TCP_MD5SIG
 	struct tcp_md5sig_key	  *tw_md5_key;
 #endif
+	int			  tw_norst;
 };
 
 static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk)
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index 29eb659aa77a..369f3402b669 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -124,8 +124,8 @@ enum {
 #define TCP_FASTOPEN_NO_COOKIE	34	/* Enable TFO without a TFO cookie */
 #define TCP_ZEROCOPY_RECEIVE	35
 #define TCP_INQ			36	/* Notify bytes available to read as a cmsg on read */
-
 #define TCP_CM_INQ		TCP_INQ
+#define TCP_CLOSE_NORST		37	/* Don't send RST on close()'d socket */
 
 struct tcp_repair_opt {
 	__u32	opt_code;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 0a2ea0bbf867..29fe763002e5 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2318,8 +2318,10 @@ void tcp_close(struct sock *sk, long timeout)
 	struct sk_buff *skb;
 	int data_was_unread = 0;
 	int state;
+	struct tcp_sock *tp;
 
 	lock_sock(sk);
+	tp = tcp_sk(sk);
 	sk->sk_shutdown = SHUTDOWN_MASK;
 
 	if (sk->sk_state == TCP_LISTEN) {
@@ -2362,8 +2364,19 @@ void tcp_close(struct sock *sk, long timeout)
 	} else if (data_was_unread) {
 		/* Unread data was tossed, zap the connection. */
 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE);
-		tcp_set_state(sk, TCP_CLOSE);
-		tcp_send_active_reset(sk, sk->sk_allocation);
+
+		if (unlikely(tp->norst)) {
+			if (tcp_close_state(sk)) {
+				/* We will discard all new incoming data
+				 * set window to max of current or init.
+				 */
+				tp->rcv_wnd = max(tp->rcv_wnd, MAX_TCP_WINDOW);
+				tcp_send_fin(sk);
+			}
+		} else {
+			tcp_set_state(sk, TCP_CLOSE);
+			tcp_send_active_reset(sk, sk->sk_allocation);
+		}
 	} else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
 		/* Check zero linger _after_ checking for unread data. */
 		sk->sk_prot->disconnect(sk, 0);
@@ -3040,6 +3053,9 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 		else
 			tp->recvmsg_inq = val;
 		break;
+	case TCP_CLOSE_NORST:
+		tp->norst = !!val;
+		break;
 	default:
 		err = -ENOPROTOOPT;
 		break;
@@ -3523,6 +3539,9 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
 		return err;
 	}
 #endif
+	case TCP_CLOSE_NORST:
+		val = tp->norst;
+		break;
 	default:
 		return -ENOPROTOOPT;
 	}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index aebb29ab2fdf..e0aa6e126700 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -6054,7 +6054,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 			break;
 		}
 
-		if (tp->linger2 < 0) {
+		if (likely(!tp->norst) && tp->linger2 < 0) {
 			tcp_done(sk);
 			NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
 			return 1;
@@ -6064,9 +6064,12 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 			/* Receive out of order FIN after close() */
 			if (tp->syn_fastopen && th->fin)
 				tcp_fastopen_active_disable(sk);
-			tcp_done(sk);
-			NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
-			return 1;
+
+			if (likely(!tp->norst)) {
+				tcp_done(sk);
+				NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
+				return 1;
+			}
 		}
 
 		tmo = tcp_fin_time(sk);
@@ -6123,6 +6126,11 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 		if (sk->sk_shutdown & RCV_SHUTDOWN) {
 			if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
 			    after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
+				if (unlikely(tp->norst)) {
+					tcp_send_ack(sk);
+					goto discard;
+				}
+
 				NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
 				tcp_reset(sk);
 				return 1;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index f867658b4b30..48a9d5351478 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -133,6 +133,20 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
 			return TCP_TW_SUCCESS;
 		}
 
+		if (tcptw->tw_norst) {
+			/* ack and discard new data */
+			tcptw->tw_rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+			if (tmp_opt.saw_tstamp) {
+				tcptw->tw_ts_recent_stamp = get_seconds();
+				tcptw->tw_ts_recent	  = tmp_opt.rcv_tsval;
+			}
+
+			if (th->fin) /* active remote close, we can die now */
+				inet_twsk_deschedule_put(tw);
+
+			return TCP_TW_ACK;
+		}
+
 		/* New data or FIN. If new data arrive after half-duplex close,
 		 * reset.
 		 */
@@ -272,6 +286,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
 		tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp;
 		tcptw->tw_ts_offset	= tp->tsoffset;
 		tcptw->tw_last_oow_ack_time = 0;
+		tcptw->tw_norst		= tp->norst;
 
 #if IS_ENABLED(CONFIG_IPV6)
 		if (tw->tw_family == PF_INET6) {
-- 
2.17.0

^ permalink raw reply related

* [PATCH RFC net-next 0/1] tcp: close socket without reset on incoming data
From: Debabrata Banerjee @ 2018-05-18 19:01 UTC (permalink / raw)
  To: David S . Miller, netdev; +Cc: Alexey Kuznetsov, Hideaki YOSHIFUJI, dbanerje

There is a basic problem with TCP sockets, where sending and closing of
data is unreliable. One good example of this is a web server that wants
to send an error back on a HTTP POST and close the socket, however
assuming the POST was of any significant size what really happens is
that the browser gets a broken socket while it is trying to post, and
never reads the error, possible retrying the whole POST a number of
times. This has been well documented by other people, for example this
blog post:

https://blog.netherlabs.nl/articles/2009/01/18/the-ultimate-so_linger-page-or-why-is-my-tcp-not-reliable

Without this patch, our server application has to hang on to a socket
sink all of the POST data, eating up memory and cpu. With this patch
the task is offloaded to the kernel, which uses only a timewait socket
to efficiently ack and discard any incoming data. We've been using a
similar patch internally for years, I think it has applications for
everyone.

Debabrata Banerjee (1):
  tcp: close socket without reset on incoming data

 include/linux/tcp.h      |  4 +++-
 include/uapi/linux/tcp.h |  2 +-
 net/ipv4/tcp.c           | 23 +++++++++++++++++++++--
 net/ipv4/tcp_input.c     | 16 ++++++++++++----
 net/ipv4/tcp_minisocks.c | 15 +++++++++++++++
 5 files changed, 52 insertions(+), 8 deletions(-)

-- 
2.17.0

^ permalink raw reply

* Re: WARNING in ip_recv_error
From: Willem de Bruijn @ 2018-05-18 18:59 UTC (permalink / raw)
  To: David Miller
  Cc: Eric Dumazet, DaeLyong Jeong, Alexey Kuznetsov, Hideaki YOSHIFUJI,
	Network Development, LKML, Byoungyoung Lee, Kyungtae Kim,
	bammanag, Willem de Bruijn
In-Reply-To: <CAF=yD-+QLz63Z6h5OpC-ar+nHvpCpkVi79h9Vtn=7fzmHCK8Lg@mail.gmail.com>

On Fri, May 18, 2018 at 2:46 PM, Willem de Bruijn
<willemdebruijn.kernel@gmail.com> wrote:
> On Fri, May 18, 2018 at 2:44 PM, Willem de Bruijn
> <willemdebruijn.kernel@gmail.com> wrote:
>> On Fri, May 18, 2018 at 1:09 PM, Willem de Bruijn
>> <willemdebruijn.kernel@gmail.com> wrote:
>>> On Fri, May 18, 2018 at 11:44 AM, David Miller <davem@davemloft.net> wrote:
>>>> From: Eric Dumazet <eric.dumazet@gmail.com>
>>>> Date: Fri, 18 May 2018 08:30:43 -0700
>>>>
>>>>> We probably need to revert Willem patch (7ce875e5ecb8562fd44040f69bda96c999e38bbc)
>>>>
>>>> Is it really valid to reach ip_recv_err with an ipv6 socket?
>>>
>>> I guess the issue is that setsockopt IPV6_ADDRFORM is not an
>>> atomic operation, so that the socket is neither fully ipv4 nor fully
>>> ipv6 by the time it reaches ip_recv_error.
>>>
>>>   sk->sk_socket->ops = &inet_dgram_ops;
>>>   < HERE >
>>>   sk->sk_family = PF_INET;
>>>
>>> Even calling inet_recv_error to demux would not necessarily help.
>>>
>>> Safest would be to look up by skb->protocol, similar to what
>>> ipv6_recv_error does to handle v4-mapped-v6.
>>>
>>> Or to make that function safe with PF_INET and swap the order
>>> of the above two operations.
>>>
>>> All sound needlessly complicated for this rare socket option, but
>>> I don't have a better idea yet. Dropping on the floor is not nice,
>>> either.
>>
>> Ensuring that ip_recv_error correctly handles packets from either
>> socket and removing the warning should indeed be good.
>>
>> It is robust against v4-mapped packets from an AF_INET6 socket,
>> but see caveat on reconnect below.
>>
>> The code between ipv6_recv_error for v4-mapped addresses and
>> ip_recv_error is essentially the same, the main difference being
>> whether to return network headers as sockaddr_in with SOL_IP
>> or sockaddr_in6 with SOL_IPV6.
>>
>> There are very few other locations in the stack that explicitly test
>> sk_family in this way and thus would be vulnerable to races with
>> IPV6_ADDRFORM.
>>
>> I'm not sure whether it is possible for a udpv6 socket to queue a
>> real ipv6 packet on the error queue, disconnect, connect to an
>> ipv4 address, call IPV6_ADDRFORM and then call ip_recv_error
>> on a true ipv6 packet. That would return buggy data, e.g., in
>> msg_name.
>
> In do_ipv6_setsockopt IPV6_ADDRFORM we can test that the
> error queue is empty, and then take its lock for the duration of the
> operation.

Actually, no reason to hold the lock. This setsockopt holds the socket
lock, which connect would need, too. So testing that the queue
is empty after testing that it is connected to a v4 address is
sufficient to ensure that no ipv6 packets are queued for reception.

diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 4d780c7f0130..a975d6311341 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -199,6 +199,11 @@ static int do_ipv6_setsockopt(struct sock *sk,
int level, int optname,

                        if (ipv6_only_sock(sk) ||
                            !ipv6_addr_v4mapped(&sk->sk_v6_daddr)) {
                                retv = -EADDRNOTAVAIL;
                                break;
                        }

+                       if (!skb_queue_empty(&sk->sk_error_queue)) {
+                               retv = -EBUSY;
+                               break;
+                       }
+
                        fl6_free_socklist(sk);
                        __ipv6_sock_mc_close(sk);

After this it should be safe to remove the warning in ip_recv_error.

^ permalink raw reply related

* [net-next] Revert "ixgbe: release lock for the duration of ixgbe_suspend_close()"
From: Jeff Kirsher @ 2018-05-18 18:58 UTC (permalink / raw)
  To: davem; +Cc: Jeff Kirsher, netdev, nhorman, sassmann, jogreene

This reverts commit 6710f970d9979d8f03f6e292bb729b2ee1526d0e.

Gotta love when developers have offline discussions, thinking everyone
is reading their responses/dialog.

The change had the potential for a number of race conditions on
shutdown, which is why we are reverting the change.

Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 5ddfb93ed491..a52d92e182ee 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -6698,15 +6698,8 @@ static int __ixgbe_shutdown(struct pci_dev *pdev, bool *enable_wake)
 	rtnl_lock();
 	netif_device_detach(netdev);
 
-	if (netif_running(netdev)) {
-		/* Suspend takes a long time, device_shutdown may be
-		 * parallelized this function, so drop lock for the
-		 * duration of this call.
-		 */
-		rtnl_unlock();
+	if (netif_running(netdev))
 		ixgbe_close_suspend(adapter);
-		rtnl_lock();
-	}
 
 	ixgbe_clear_interrupt_scheme(adapter);
 	rtnl_unlock();
-- 
2.17.0

^ permalink raw reply related

* Re: WARNING in ip_recv_error
From: Willem de Bruijn @ 2018-05-18 18:46 UTC (permalink / raw)
  To: David Miller
  Cc: Eric Dumazet, DaeLyong Jeong, Alexey Kuznetsov, Hideaki YOSHIFUJI,
	Network Development, LKML, Byoungyoung Lee, Kyungtae Kim,
	bammanag, Willem de Bruijn
In-Reply-To: <CAF=yD-LvZk+gdyf_Kq1j+hoJBmFnTKgR=DResvMAC7tEJCgGJw@mail.gmail.com>

On Fri, May 18, 2018 at 2:44 PM, Willem de Bruijn
<willemdebruijn.kernel@gmail.com> wrote:
> On Fri, May 18, 2018 at 1:09 PM, Willem de Bruijn
> <willemdebruijn.kernel@gmail.com> wrote:
>> On Fri, May 18, 2018 at 11:44 AM, David Miller <davem@davemloft.net> wrote:
>>> From: Eric Dumazet <eric.dumazet@gmail.com>
>>> Date: Fri, 18 May 2018 08:30:43 -0700
>>>
>>>> We probably need to revert Willem patch (7ce875e5ecb8562fd44040f69bda96c999e38bbc)
>>>
>>> Is it really valid to reach ip_recv_err with an ipv6 socket?
>>
>> I guess the issue is that setsockopt IPV6_ADDRFORM is not an
>> atomic operation, so that the socket is neither fully ipv4 nor fully
>> ipv6 by the time it reaches ip_recv_error.
>>
>>   sk->sk_socket->ops = &inet_dgram_ops;
>>   < HERE >
>>   sk->sk_family = PF_INET;
>>
>> Even calling inet_recv_error to demux would not necessarily help.
>>
>> Safest would be to look up by skb->protocol, similar to what
>> ipv6_recv_error does to handle v4-mapped-v6.
>>
>> Or to make that function safe with PF_INET and swap the order
>> of the above two operations.
>>
>> All sound needlessly complicated for this rare socket option, but
>> I don't have a better idea yet. Dropping on the floor is not nice,
>> either.
>
> Ensuring that ip_recv_error correctly handles packets from either
> socket and removing the warning should indeed be good.
>
> It is robust against v4-mapped packets from an AF_INET6 socket,
> but see caveat on reconnect below.
>
> The code between ipv6_recv_error for v4-mapped addresses and
> ip_recv_error is essentially the same, the main difference being
> whether to return network headers as sockaddr_in with SOL_IP
> or sockaddr_in6 with SOL_IPV6.
>
> There are very few other locations in the stack that explicitly test
> sk_family in this way and thus would be vulnerable to races with
> IPV6_ADDRFORM.
>
> I'm not sure whether it is possible for a udpv6 socket to queue a
> real ipv6 packet on the error queue, disconnect, connect to an
> ipv4 address, call IPV6_ADDRFORM and then call ip_recv_error
> on a true ipv6 packet. That would return buggy data, e.g., in
> msg_name.

In do_ipv6_setsockopt IPV6_ADDRFORM we can test that the
error queue is empty, and then take its lock for the duration of the
operation.

^ permalink raw reply

* Re: WARNING in ip_recv_error
From: Willem de Bruijn @ 2018-05-18 18:44 UTC (permalink / raw)
  To: David Miller
  Cc: Eric Dumazet, DaeLyong Jeong, Alexey Kuznetsov, Hideaki YOSHIFUJI,
	Network Development, LKML, Byoungyoung Lee, Kyungtae Kim,
	bammanag, Willem de Bruijn
In-Reply-To: <CAF=yD-Kb4ZfJ3sMAnWEOB_s9Y=TTnLbRZxrDPiGZLO=trPjY5Q@mail.gmail.com>

On Fri, May 18, 2018 at 1:09 PM, Willem de Bruijn
<willemdebruijn.kernel@gmail.com> wrote:
> On Fri, May 18, 2018 at 11:44 AM, David Miller <davem@davemloft.net> wrote:
>> From: Eric Dumazet <eric.dumazet@gmail.com>
>> Date: Fri, 18 May 2018 08:30:43 -0700
>>
>>> We probably need to revert Willem patch (7ce875e5ecb8562fd44040f69bda96c999e38bbc)
>>
>> Is it really valid to reach ip_recv_err with an ipv6 socket?
>
> I guess the issue is that setsockopt IPV6_ADDRFORM is not an
> atomic operation, so that the socket is neither fully ipv4 nor fully
> ipv6 by the time it reaches ip_recv_error.
>
>   sk->sk_socket->ops = &inet_dgram_ops;
>   < HERE >
>   sk->sk_family = PF_INET;
>
> Even calling inet_recv_error to demux would not necessarily help.
>
> Safest would be to look up by skb->protocol, similar to what
> ipv6_recv_error does to handle v4-mapped-v6.
>
> Or to make that function safe with PF_INET and swap the order
> of the above two operations.
>
> All sound needlessly complicated for this rare socket option, but
> I don't have a better idea yet. Dropping on the floor is not nice,
> either.

Ensuring that ip_recv_error correctly handles packets from either
socket and removing the warning should indeed be good.

It is robust against v4-mapped packets from an AF_INET6 socket,
but see caveat on reconnect below.

The code between ipv6_recv_error for v4-mapped addresses and
ip_recv_error is essentially the same, the main difference being
whether to return network headers as sockaddr_in with SOL_IP
or sockaddr_in6 with SOL_IPV6.

There are very few other locations in the stack that explicitly test
sk_family in this way and thus would be vulnerable to races with
IPV6_ADDRFORM.

I'm not sure whether it is possible for a udpv6 socket to queue a
real ipv6 packet on the error queue, disconnect, connect to an
ipv4 address, call IPV6_ADDRFORM and then call ip_recv_error
on a true ipv6 packet. That would return buggy data, e.g., in
msg_name.

^ permalink raw reply

* cascaded switch
From: Ran Shalit @ 2018-05-18 18:35 UTC (permalink / raw)
  To: netdev

Hello,

I am trying to understand the concept of cascaded switch.
I haven't find much information on this topic.

Can anyone please explain the general concept, when is it used, and
why does the device tree need to know about cascaded switch ?

Thank you,
ranran

^ permalink raw reply

* [PATCH v2 3/3] sh_eth: add R8A77980 support
From: Sergei Shtylyov @ 2018-05-18 18:32 UTC (permalink / raw)
  To: netdev, devicetree, David S. Miller, Rob Herring
  Cc: Mark Rutland, linux-renesas-soc
In-Reply-To: <f30c98ff-f6da-9e7f-c637-49076a428885@cogentembedded.com>

Finally, add support for the DT probing of the R-Car V3H (AKA R8A77980) --
it's the only R-Car gen3 SoC having the GEther controller -- others have
only EtherAVB...

Based on the original (and large) patch by Vladimir Barinov.

Signed-off-by: Vladimir Barinov <vladimir.barinov@cogentembedded.com>
Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Reviewed-by: Simon Horman <horms+renesas@verge.net.au>

---
Changes in version 2:
- added Simon's tag.

 Documentation/devicetree/bindings/net/sh_eth.txt |    1 
 drivers/net/ethernet/renesas/sh_eth.c            |   44 +++++++++++++++++++++++
 2 files changed, 45 insertions(+)

Index: net-next/Documentation/devicetree/bindings/net/sh_eth.txt
===================================================================
--- net-next.orig/Documentation/devicetree/bindings/net/sh_eth.txt
+++ net-next/Documentation/devicetree/bindings/net/sh_eth.txt
@@ -14,6 +14,7 @@ Required properties:
 	      "renesas,ether-r8a7791"  if the device is a part of R8A7791 SoC.
 	      "renesas,ether-r8a7793"  if the device is a part of R8A7793 SoC.
 	      "renesas,ether-r8a7794"  if the device is a part of R8A7794 SoC.
+	      "renesas,gether-r8a77980" if the device is a part of R8A77980 SoC.
 	      "renesas,ether-r7s72100" if the device is a part of R7S72100 SoC.
 	      "renesas,rcar-gen1-ether" for a generic R-Car Gen1 device.
 	      "renesas,rcar-gen2-ether" for a generic R-Car Gen2 or RZ/G1
Index: net-next/drivers/net/ethernet/renesas/sh_eth.c
===================================================================
--- net-next.orig/drivers/net/ethernet/renesas/sh_eth.c
+++ net-next/drivers/net/ethernet/renesas/sh_eth.c
@@ -753,6 +753,49 @@ static struct sh_eth_cpu_data rcar_gen2_
 	.rmiimode	= 1,
 	.magic		= 1,
 };
+
+/* R8A77980 */
+static struct sh_eth_cpu_data r8a77980_data = {
+	.soft_reset	= sh_eth_soft_reset_gether,
+
+	.set_duplex	= sh_eth_set_duplex,
+	.set_rate	= sh_eth_set_rate_gether,
+
+	.register_type  = SH_ETH_REG_GIGABIT,
+
+	.edtrr_trns	= EDTRR_TRNS_GETHER,
+	.ecsr_value	= ECSR_PSRTO | ECSR_LCHNG | ECSR_ICD | ECSR_MPD,
+	.ecsipr_value	= ECSIPR_PSRTOIP | ECSIPR_LCHNGIP | ECSIPR_ICDIP |
+			  ECSIPR_MPDIP,
+	.eesipr_value	= EESIPR_RFCOFIP | EESIPR_ECIIP |
+			  EESIPR_FTCIP | EESIPR_TDEIP | EESIPR_TFUFIP |
+			  EESIPR_FRIP | EESIPR_RDEIP | EESIPR_RFOFIP |
+			  EESIPR_RMAFIP | EESIPR_RRFIP |
+			  EESIPR_RTLFIP | EESIPR_RTSFIP |
+			  EESIPR_PREIP | EESIPR_CERFIP,
+
+	.tx_check       = EESR_FTC | EESR_CD | EESR_RTO,
+	.eesr_err_check = EESR_TWB1 | EESR_TWB | EESR_TABT | EESR_RABT |
+			  EESR_RFE | EESR_RDE | EESR_RFRMER |
+			  EESR_TFE | EESR_TDE | EESR_ECI,
+	.fdr_value	= 0x0000070f,
+
+	.apr		= 1,
+	.mpr		= 1,
+	.tpauser	= 1,
+	.bculr		= 1,
+	.hw_swap	= 1,
+	.nbst		= 1,
+	.rpadir		= 1,
+	.rpadir_value   = 2 << 16,
+	.no_trimd	= 1,
+	.no_ade		= 1,
+	.xdfar_rw	= 1,
+	.hw_checksum	= 1,
+	.select_mii	= 1,
+	.magic		= 1,
+	.cexcr		= 1,
+};
 #endif /* CONFIG_OF */
 
 static void sh_eth_set_rate_sh7724(struct net_device *ndev)
@@ -3134,6 +3177,7 @@ static const struct of_device_id sh_eth_
 	{ .compatible = "renesas,ether-r8a7791", .data = &rcar_gen2_data },
 	{ .compatible = "renesas,ether-r8a7793", .data = &rcar_gen2_data },
 	{ .compatible = "renesas,ether-r8a7794", .data = &rcar_gen2_data },
+	{ .compatible = "renesas,gether-r8a77980", .data = &r8a77980_data },
 	{ .compatible = "renesas,ether-r7s72100", .data = &r7s72100_data },
 	{ .compatible = "renesas,rcar-gen1-ether", .data = &rcar_gen1_data },
 	{ .compatible = "renesas,rcar-gen2-ether", .data = &rcar_gen2_data },

^ permalink raw reply

* [PATCH v2 2/3] sh_eth: add EDMR.NBST support
From: Sergei Shtylyov @ 2018-05-18 18:31 UTC (permalink / raw)
  To: netdev, devicetree, David S. Miller, Rob Herring
  Cc: Mark Rutland, linux-renesas-soc
In-Reply-To: <f30c98ff-f6da-9e7f-c637-49076a428885@cogentembedded.com>

The R-Car V3H (AKA R8A77980) GEther controller adds the DMA burst mode bit
(NBST) in EDMR and the manual tells to always set it before doing any DMA.

Based on the original (and large) patch by Vladimir Barinov.

Signed-off-by: Vladimir Barinov <vladimir.barinov@cogentembedded.com>
Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Reviewed-by: Simon Horman <horms+renesas@verge.net.au>

---
Changes in version 2:
- added Simon's tag.

 drivers/net/ethernet/renesas/sh_eth.c |    4 ++++
 drivers/net/ethernet/renesas/sh_eth.h |    2 ++
 2 files changed, 6 insertions(+)

Index: net-next/drivers/net/ethernet/renesas/sh_eth.c
===================================================================
--- net-next.orig/drivers/net/ethernet/renesas/sh_eth.c
+++ net-next/drivers/net/ethernet/renesas/sh_eth.c
@@ -1434,6 +1434,10 @@ static int sh_eth_dev_init(struct net_de
 
 	sh_eth_write(ndev, mdp->cd->trscer_err_mask, TRSCER);
 
+	/* DMA transfer burst mode */
+	if (mdp->cd->nbst)
+		sh_eth_modify(ndev, EDMR, EDMR_NBST, EDMR_NBST);
+
 	if (mdp->cd->bculr)
 		sh_eth_write(ndev, 0x800, BCULR);	/* Burst sycle set */
 
Index: net-next/drivers/net/ethernet/renesas/sh_eth.h
===================================================================
--- net-next.orig/drivers/net/ethernet/renesas/sh_eth.h
+++ net-next/drivers/net/ethernet/renesas/sh_eth.h
@@ -184,6 +184,7 @@ enum GECMR_BIT {
 
 /* EDMR */
 enum DMAC_M_BIT {
+	EDMR_NBST = 0x80,
 	EDMR_EL = 0x40, /* Litte endian */
 	EDMR_DL1 = 0x20, EDMR_DL0 = 0x10,
 	EDMR_SRST_GETHER = 0x03,
@@ -505,6 +506,7 @@ struct sh_eth_cpu_data {
 	unsigned bculr:1;	/* EtherC have BCULR */
 	unsigned tsu:1;		/* EtherC have TSU */
 	unsigned hw_swap:1;	/* E-DMAC have DE bit in EDMR */
+	unsigned nbst:1;	/* E-DMAC has NBST bit in EDMR */
 	unsigned rpadir:1;	/* E-DMAC have RPADIR */
 	unsigned no_trimd:1;	/* E-DMAC DO NOT have TRIMD */
 	unsigned no_ade:1;	/* E-DMAC DO NOT have ADE bit in EESR */

^ permalink raw reply

* [PATCH v2 1/1] sh_eth: add RGMII support
From: Sergei Shtylyov @ 2018-05-18 18:30 UTC (permalink / raw)
  To: netdev, David S. Miller; +Cc: linux-renesas-soc
In-Reply-To: <f30c98ff-f6da-9e7f-c637-49076a428885@cogentembedded.com>

The R-Car V3H (AKA R8A77980) GEther controller  adds support for the RGMII
PHY interface mode as a new  value  for the RMII_MII register.

Based on the original (and large) patch by Vladimir Barinov.

Signed-off-by: Vladimir Barinov <vladimir.barinov@cogentembedded.com>
Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>

---
Changes in version 2:
- included PHY_INTERFACE_MODE_RGMII_{|RX|TX}ID in the RGMII *case*.

 drivers/net/ethernet/renesas/sh_eth.c |    3 +++
 1 file changed, 3 insertions(+)

Index: net-next/drivers/net/ethernet/renesas/sh_eth.c
===================================================================
--- net-next.orig/drivers/net/ethernet/renesas/sh_eth.c
+++ net-next/drivers/net/ethernet/renesas/sh_eth.c
@@ -466,6 +466,9 @@ static void sh_eth_select_mii(struct net
 	u32 value;
 
 	switch (mdp->phy_interface) {
+	case PHY_INTERFACE_MODE_RGMII ... PHY_INTERFACE_MODE_RGMII_TXID:
+		value = 0x3;
+		break;
 	case PHY_INTERFACE_MODE_GMII:
 		value = 0x2;
 		break;

^ permalink raw reply

* [PATCH v2 0/3] Add Renesas R8A77980 GEther support
From: Sergei Shtylyov @ 2018-05-18 18:28 UTC (permalink / raw)
  To: netdev, devicetree, David S. Miller, Rob Herring
  Cc: Mark Rutland, linux-renesas-soc

Hello!

Here's a set of 3 patches against DaveM's 'net-next.git' repo. They (gradually)
add R8A77980 GEther support to the 'sh_eth' driver, starting with couple new
register bits/values introduced with this chip, and ending with adding a new
'struct sh_eth_cpu_data' instance connected to the new DT "compatible" prop
value...

[1/1] sh_eth: add RGMII support
[2/3] sh_eth: add EDMR.NBST support
[3/3] sh_eth: add R8A77980 support

MBR, Sergei

^ permalink raw reply

* [PATCH] selftests: bpf: config: enable NET_SCH_INGRESS for xdp_meta.sh
From: Anders Roxell @ 2018-05-18 18:23 UTC (permalink / raw)
  To: ast, daniel, shuah; +Cc: netdev, linux-kernel, linux-kselftest, Anders Roxell

When running bpf's selftest test_xdp_meta.sh it fails:
./test_xdp_meta.sh
Error: Specified qdisc not found.
selftests: test_xdp_meta [FAILED]

Need to enable CONFIG_NET_SCH_INGRESS and CONFIG_NET_CLS_ACT to get the
test to pass.

Fixes: 22c8852624fc ("bpf: improve selftests and add tests for meta pointer")
Signed-off-by: Anders Roxell <anders.roxell@linaro.org>
---
 tools/testing/selftests/bpf/config | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 983dd25d49f4..1eefe211a4a8 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -5,3 +5,5 @@ CONFIG_BPF_EVENTS=y
 CONFIG_TEST_BPF=m
 CONFIG_CGROUP_BPF=y
 CONFIG_NETDEVSIM=m
+CONFIG_NET_CLS_ACT=y
+CONFIG_NET_SCH_INGRESS=y
-- 
2.17.0

^ permalink raw reply related

* Re: [PATCH v2 net-next] net: stmmac: Populate missing callbacks in HWIF initialization
From: David Miller @ 2018-05-18 17:56 UTC (permalink / raw)
  To: Jose.Abreu
  Cc: netdev, clabbe.montjoie, Joao.Pinto, peppe.cavallaro,
	alexandre.torgue
In-Reply-To: <6f26505abcb91747df73f09019c7e2bcb0c3835a.1526658592.git.joabreu@synopsys.com>

From: Jose Abreu <Jose.Abreu@synopsys.com>
Date: Fri, 18 May 2018 16:54:38 +0100

> Some HW specific setups, like sun8i, do not populate all the necessary
> callbacks, which is what HWIF helpers were expecting.
> 
> Fix this by always trying to get the generic helpers and populate them
> if they were not previously populated by HW specific setup.
> 
> Signed-off-by: Jose Abreu <joabreu@synopsys.com>
> Fixes: 5f0456b43140 ("net: stmmac: Implement logic to automatically
> select HW Interface")

Please don't split up Fixes: tag lines like this in the future.  No matter
how long it is, keep it a single line.

> Reported-by: Corentin Labbe <clabbe.montjoie@gmail.com>
> Tested-by: Corentin Labbe <clabbe.montjoie@gmail.com>

Applied, thank you.

^ permalink raw reply

* Re: [PATCH net] cxgb4: fix offset in collecting TX rate limit info
From: David Miller @ 2018-05-18 17:55 UTC (permalink / raw)
  To: rahul.lakkireddy; +Cc: netdev, ganeshgr, nirranjan, indranil
In-Reply-To: <1526651017-20795-1-git-send-email-rahul.lakkireddy@chelsio.com>

From: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
Date: Fri, 18 May 2018 19:13:37 +0530

> Correct the indirect register offsets in collecting TX rate limit info
> in UP CIM logs.
> 
> Also, T5 doesn't support these indirect register offsets, so remove
> them from collection logic.
> 
> Fixes: be6e36d916b1 ("cxgb4: collect TX rate limit info in UP CIM logs")
> Signed-off-by: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
> Signed-off-by: Ganesh Goudar <ganeshgr@chelsio.com>

Applied and queued up for -stable, thanks.

^ permalink raw reply

* Re: [PATCH net-next] cxgb4: collect SGE PF/VF queue map
From: David Miller @ 2018-05-18 17:55 UTC (permalink / raw)
  To: rahul.lakkireddy; +Cc: netdev, ganeshgr, nirranjan, indranil
In-Reply-To: <1526650973-20752-1-git-send-email-rahul.lakkireddy@chelsio.com>

From: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
Date: Fri, 18 May 2018 19:12:53 +0530

> For T6, collect info on queue mapping to corresponding PF/VF in SGE.
> 
> Signed-off-by: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
> Signed-off-by: Ganesh Goudar <ganeshgr@chelsio.com>

Applied.

^ permalink raw reply

* Re: [PATCH net] net: sched: red: avoid hashing NULL child
From: David Miller @ 2018-05-18 17:53 UTC (permalink / raw)
  To: pabeni; +Cc: netdev, jhs, xiyou.wangcong, jiri, liuhangbin, jkosina
In-Reply-To: <286744ab66eaa07d61d5d8ce8a07cb0b1615d35c.1526647813.git.pabeni@redhat.com>

From: Paolo Abeni <pabeni@redhat.com>
Date: Fri, 18 May 2018 14:51:44 +0200

> Hangbin reported an Oops triggered by the syzkaller qdisc rules:
 ...
> When a red qdisc is updated with a 0 limit, the child qdisc is left
> unmodified, no additional scheduler is created in red_change(),
> the 'child' local variable is rightfully NULL and must not add it
> to the hash table.
> 
> This change addresses the above issue moving qdisc_hash_add() right
> after the child qdisc creation. It additionally removes unneeded checks
> for noop_qdisc.
> 
> Reported-by: Hangbin Liu <liuhangbin@gmail.com>
> Fixes: 49b499718fa1 ("net: sched: make default fifo qdiscs appear in the dump")
> Signed-off-by: Paolo Abeni <pabeni@redhat.com>

Applied and queued up for -stable, thanks Paolo.

^ permalink raw reply

* Re: [PATCH 1/4] arcnet: com20020: Add com20020 io mapped version
From: David Miller @ 2018-05-18 17:51 UTC (permalink / raw)
  To: andrea.greco.gapmilano; +Cc: tobin, a.greco, m.grzeschik, linux-kernel, netdev
In-Reply-To: <CAPoXtQJJ-Fv6dBWxFs+r+B-+jhQAF+AxdqDg9g55AwSyBJ0U6g@mail.gmail.com>

From: Andrea Greco <andrea.greco.gapmilano@gmail.com>
Date: Fri, 18 May 2018 14:18:41 +0200

> In com20020.c found this:
> /* FIXME: do this some other way! */
> if (!dev->dev_addr[0])
> dev->dev_addr[0] = arcnet_inb(ioaddr, 8);
> 
> NODE-ID, must be univoque, for all arcnet network.
> My previews idea was take random value but, this could create a
> collision over network.
> 
> A possible solution is:
> In case of collision com20020 set a bit in status register.
> Then peak a new NODE-ID and repeat this while correct NODE-ID is found.
> 
> Other ideas is pass it via DTS.
> But suppose have 2 same product in same network, same address same problem.
> For this reason i prefer left standard driver behavior.
> 
> Other ideas for solve this ?

Is there no way to obtain a unique value from the device?

If having a unique ID to talk on the ARCNET is so critical, there must
be some way to properly allocation and use a unique ID.

I guess this must be a general problem with this driver already.

You still need to address the issue of 'dev' being leaked on probe
error paths.

Thank you.

^ permalink raw reply

* Re: [PATCH] net: mvpp2: typo and cosmetic fixes
From: David Miller @ 2018-05-18 17:48 UTC (permalink / raw)
  To: antoine.tenart
  Cc: netdev, linux-kernel, thomas.petazzoni, maxime.chevallier,
	gregory.clement, miquel.raynal, nadavh, stefanc, ymarkman, mw
In-Reply-To: <20180518123451.31365-1-antoine.tenart@bootlin.com>

From: Antoine Tenart <antoine.tenart@bootlin.com>
Date: Fri, 18 May 2018 14:34:51 +0200

> This patch on the Marvell PPv2 driver is only cosmetic. Two typos are
> removed as well as other cosmetic fixes, such as extra new lines or tabs
> vs spaces.
> 
> Suggested-by: Stefan Chulski <stefanc@marvell.com>
> Signed-off-by: Antoine Tenart <antoine.tenart@bootlin.com>

Applied, thanks.

^ permalink raw reply

* Re: [PATCH net] sock_diag: fix use-after-free read in __sk_free
From: David Miller @ 2018-05-18 17:48 UTC (permalink / raw)
  To: edumazet; +Cc: netdev, eric.dumazet, kraig
In-Reply-To: <20180518114755.56152-1-edumazet@google.com>

From: Eric Dumazet <edumazet@google.com>
Date: Fri, 18 May 2018 04:47:55 -0700

> We must not call sock_diag_has_destroy_listeners(sk) on a socket
> that has no reference on net structure.
 ...
> Fixes: b922622ec6ef ("sock_diag: don't broadcast kernel sockets")
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> Cc: Craig Gallek <kraig@google.com>
> Reported-by: syzbot <syzkaller@googlegroups.com>

Applied and queued up for -stable, thanks Eric.

^ permalink raw reply

* Re: [PATCH v2] sh_eth: Change platform check to CONFIG_ARCH_RENESAS
From: David Miller @ 2018-05-18 17:46 UTC (permalink / raw)
  To: geert+renesas; +Cc: sergei.shtylyov, netdev, linux-renesas-soc
In-Reply-To: <1526640771-473-1-git-send-email-geert+renesas@glider.be>

From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Fri, 18 May 2018 12:52:51 +0200

> Since commit 9b5ba0df4ea4f940 ("ARM: shmobile: Introduce ARCH_RENESAS")
> is CONFIG_ARCH_RENESAS a more appropriate platform check than the legacy
> CONFIG_ARCH_SHMOBILE, hence use the former.
> 
> Renesas SuperH SH-Mobile SoCs are still covered by the CONFIG_CPU_SH4
> check.
> 
> This will allow to drop ARCH_SHMOBILE on ARM and ARM64 in the near
> future.
> 
> Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
> Acked-by: Arnd Bergmann <arnd@arndb.de>
> Acked-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
> Reviewed-by: Simon Horman <horms+renesas@verge.net.au>

Applied.

^ permalink raw reply

* Re: [PATCH] hippi: fix spelling mistake: "Framming" -> "Framing"
From: David Miller @ 2018-05-18 17:46 UTC (permalink / raw)
  To: colin.king; +Cc: jes, linux-hippi, netdev, kernel-janitors, linux-kernel
In-Reply-To: <20180518100922.28790-1-colin.king@canonical.com>

From: Colin King <colin.king@canonical.com>
Date: Fri, 18 May 2018 11:09:22 +0100

> From: Colin Ian King <colin.king@canonical.com>
> 
> Trivial fix to spelling mistake in printk message text
> 
> Signed-off-by: Colin Ian King <colin.king@canonical.com>

Applied.

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox