Netdev List
 help / color / mirror / Atom feed
* [PATCH net 2/5] net: dsa: b53: Properly account for VLAN filtering
From: Florian Fainelli @ 2019-02-15 20:16 UTC (permalink / raw)
  To: netdev; +Cc: davem, andrew, vivien.didelot, Florian Fainelli
In-Reply-To: <20190215201653.20988-1-f.fainelli@gmail.com>

VLAN filtering can be built into the kernel, and also dynamically turned
on/off through the bridge master device. Allow re-configuring the switch
appropriately to account for that by deciding whether VLAN table
(v_table) misses should lead to a drop or forward.

Fixes: a2482d2ce349 ("net: dsa: b53: Plug in VLAN support")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
---
 drivers/net/dsa/b53/b53_common.c | 59 +++++++++++++++++++++++++++++---
 drivers/net/dsa/b53/b53_priv.h   |  3 ++
 2 files changed, 57 insertions(+), 5 deletions(-)

diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index 964a9ec4652a..2fef4c564420 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -344,7 +344,8 @@ static void b53_set_forwarding(struct b53_device *dev, int enable)
 	b53_write8(dev, B53_CTRL_PAGE, B53_SWITCH_CTRL, mgmt);
 }
 
-static void b53_enable_vlan(struct b53_device *dev, bool enable)
+static void b53_enable_vlan(struct b53_device *dev, bool enable,
+			    bool enable_filtering)
 {
 	u8 mgmt, vc0, vc1, vc4 = 0, vc5;
 
@@ -369,8 +370,13 @@ static void b53_enable_vlan(struct b53_device *dev, bool enable)
 		vc0 |= VC0_VLAN_EN | VC0_VID_CHK_EN | VC0_VID_HASH_VID;
 		vc1 |= VC1_RX_MCST_UNTAG_EN | VC1_RX_MCST_FWD_EN;
 		vc4 &= ~VC4_ING_VID_CHECK_MASK;
-		vc4 |= VC4_ING_VID_VIO_DROP << VC4_ING_VID_CHECK_S;
-		vc5 |= VC5_DROP_VTABLE_MISS;
+		if (enable_filtering) {
+			vc4 |= VC4_ING_VID_VIO_DROP << VC4_ING_VID_CHECK_S;
+			vc5 |= VC5_DROP_VTABLE_MISS;
+		} else {
+			vc4 |= VC4_ING_VID_VIO_FWD << VC4_ING_VID_CHECK_S;
+			vc5 &= ~VC5_DROP_VTABLE_MISS;
+		}
 
 		if (is5325(dev))
 			vc0 &= ~VC0_RESERVED_1;
@@ -420,6 +426,9 @@ static void b53_enable_vlan(struct b53_device *dev, bool enable)
 	}
 
 	b53_write8(dev, B53_CTRL_PAGE, B53_SWITCH_MODE, mgmt);
+
+	dev->vlan_enabled = enable;
+	dev->vlan_filtering_enabled = enable_filtering;
 }
 
 static int b53_set_jumbo(struct b53_device *dev, bool enable, bool allow_10_100)
@@ -656,7 +665,7 @@ int b53_configure_vlan(struct dsa_switch *ds)
 		b53_do_vlan_op(dev, VTA_CMD_CLEAR);
 	}
 
-	b53_enable_vlan(dev, false);
+	b53_enable_vlan(dev, false, dev->vlan_filtering_enabled);
 
 	b53_for_each_port(dev, i)
 		b53_write16(dev, B53_VLAN_PAGE,
@@ -1265,6 +1274,46 @@ EXPORT_SYMBOL(b53_phylink_mac_link_up);
 
 int b53_vlan_filtering(struct dsa_switch *ds, int port, bool vlan_filtering)
 {
+	struct b53_device *dev = ds->priv;
+	struct net_device *bridge_dev;
+	unsigned int i;
+	u16 pvid, new_pvid;
+
+	/* Handle the case were multiple bridges span the same switch device
+	 * and one of them has a different setting than what is being requested
+	 * which would be breaking filtering semantics for any of the other
+	 * bridge devices.
+	 */
+	b53_for_each_port(dev, i) {
+		bridge_dev = dsa_to_port(ds, i)->bridge_dev;
+		if (bridge_dev &&
+		    bridge_dev != dsa_to_port(ds, port)->bridge_dev &&
+		    br_vlan_enabled(bridge_dev) != vlan_filtering) {
+			netdev_err(bridge_dev,
+				   "VLAN filtering is global to the switch!\n");
+			return -EINVAL;
+		}
+	}
+
+	b53_read16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(port), &pvid);
+	new_pvid = pvid;
+	if (dev->vlan_filtering_enabled && !vlan_filtering) {
+		/* Filtering is currently enabled, use the default PVID since
+		 * the bridge does not expect tagging anymore
+		 */
+		dev->ports[port].pvid = pvid;
+		new_pvid = b53_default_pvid(dev);
+	} else if (!dev->vlan_filtering_enabled && vlan_filtering) {
+		/* Filtering is currently disabled, restore the previous PVID */
+		new_pvid = dev->ports[port].pvid;
+	}
+
+	if (pvid != new_pvid)
+		b53_write16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(port),
+			    new_pvid);
+
+	b53_enable_vlan(dev, dev->vlan_enabled, vlan_filtering);
+
 	return 0;
 }
 EXPORT_SYMBOL(b53_vlan_filtering);
@@ -1280,7 +1329,7 @@ int b53_vlan_prepare(struct dsa_switch *ds, int port,
 	if (vlan->vid_end > dev->num_vlans)
 		return -ERANGE;
 
-	b53_enable_vlan(dev, true);
+	b53_enable_vlan(dev, true, dev->vlan_filtering_enabled);
 
 	return 0;
 }
diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h
index ec796482792d..4dc7ee38b258 100644
--- a/drivers/net/dsa/b53/b53_priv.h
+++ b/drivers/net/dsa/b53/b53_priv.h
@@ -91,6 +91,7 @@ enum {
 struct b53_port {
 	u16		vlan_ctl_mask;
 	struct ethtool_eee eee;
+	u16		pvid;
 };
 
 struct b53_vlan {
@@ -137,6 +138,8 @@ struct b53_device {
 
 	unsigned int num_vlans;
 	struct b53_vlan *vlans;
+	bool vlan_enabled;
+	bool vlan_filtering_enabled;
 	unsigned int num_ports;
 	struct b53_port *ports;
 };
-- 
2.17.1


^ permalink raw reply related

* [PATCH net 1/5] net: dsa: b53: Fix default VLAN ID
From: Florian Fainelli @ 2019-02-15 20:16 UTC (permalink / raw)
  To: netdev; +Cc: davem, andrew, vivien.didelot, Florian Fainelli
In-Reply-To: <20190215201653.20988-1-f.fainelli@gmail.com>

We were not consistent in how the default VID of a given port was
defined, b53_br_leave() would make sure the VLAN ID would be either 0/1
depending on the switch generation, but b53_configure_vlan(), which is
the default configuration would unconditionally set it to 1. The correct
value is 1 for 5325/5365 series and 0 otherwise. To avoid repeating that
mistake ever again, introduce a helper function: b53_default_pvid() to
factor that out.

Fixes: 967dd82ffc52 ("net: dsa: b53: Add support for Broadcom RoboSwitch")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
---
 drivers/net/dsa/b53/b53_common.c | 29 ++++++++++++++++-------------
 1 file changed, 16 insertions(+), 13 deletions(-)

diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index 0e4bbdcc614f..964a9ec4652a 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -632,15 +632,25 @@ static void b53_enable_mib(struct b53_device *dev)
 	b53_write8(dev, B53_MGMT_PAGE, B53_GLOBAL_CONFIG, gc);
 }
 
+static u16 b53_default_pvid(struct b53_device *dev)
+{
+	if (is5325(dev) || is5365(dev))
+		return 1;
+	else
+		return 0;
+}
+
 int b53_configure_vlan(struct dsa_switch *ds)
 {
 	struct b53_device *dev = ds->priv;
 	struct b53_vlan vl = { 0 };
-	int i;
+	int i, def_vid;
+
+	def_vid = b53_default_pvid(dev);
 
 	/* clear all vlan entries */
 	if (is5325(dev) || is5365(dev)) {
-		for (i = 1; i < dev->num_vlans; i++)
+		for (i = def_vid; i < dev->num_vlans; i++)
 			b53_set_vlan_entry(dev, i, &vl);
 	} else {
 		b53_do_vlan_op(dev, VTA_CMD_CLEAR);
@@ -650,7 +660,7 @@ int b53_configure_vlan(struct dsa_switch *ds)
 
 	b53_for_each_port(dev, i)
 		b53_write16(dev, B53_VLAN_PAGE,
-			    B53_VLAN_PORT_DEF_TAG(i), 1);
+			    B53_VLAN_PORT_DEF_TAG(i), def_vid);
 
 	if (!is5325(dev) && !is5365(dev))
 		b53_set_jumbo(dev, dev->enable_jumbo, false);
@@ -1326,12 +1336,8 @@ int b53_vlan_del(struct dsa_switch *ds, int port,
 
 		vl->members &= ~BIT(port);
 
-		if (pvid == vid) {
-			if (is5325(dev) || is5365(dev))
-				pvid = 1;
-			else
-				pvid = 0;
-		}
+		if (pvid == vid)
+			pvid = b53_default_pvid(dev);
 
 		if (untagged && !dsa_is_cpu_port(ds, port))
 			vl->untag &= ~(BIT(port));
@@ -1644,10 +1650,7 @@ void b53_br_leave(struct dsa_switch *ds, int port, struct net_device *br)
 	b53_write16(dev, B53_PVLAN_PAGE, B53_PVLAN_PORT_MASK(port), pvlan);
 	dev->ports[port].vlan_ctl_mask = pvlan;
 
-	if (is5325(dev) || is5365(dev))
-		pvid = 1;
-	else
-		pvid = 0;
+	pvid = b53_default_pvid(dev);
 
 	/* Make this port join all VLANs without VLAN entries */
 	if (is58xx(dev)) {
-- 
2.17.1


^ permalink raw reply related

* [PATCH net 0/5] net: dsa: b53: VLAN and L2 fixes
From: Florian Fainelli @ 2019-02-15 20:16 UTC (permalink / raw)
  To: netdev; +Cc: davem, andrew, vivien.didelot, Florian Fainelli

Hi David,

This patch series contains a collection of fixes to the b53 driver in
order to:

- consistently program the same default VLAN ID when a port is bridged
  or not
- properly account for VLAN filtering being turned on/off and turning
  on ingress VID checking accordingly
- have SYSTEMPORT properly forward BPDU frames to the network stack
  (which it did not)
- do not assume that WoL is supported by the DSA master network device
  we are connected to

Thank you!

Florian Fainelli (5):
  net: dsa: b53: Fix default VLAN ID
  net: dsa: b53: Properly account for VLAN filtering
  net: systemport: Fix reception of BPDUs
  net: dsa: bcm_sf2: Do not assume DSA master supports WoL
  net: dsa: b53: Do not program CPU port's PVID

 drivers/net/dsa/b53/b53_common.c           | 90 +++++++++++++++++-----
 drivers/net/dsa/b53/b53_priv.h             |  3 +
 drivers/net/dsa/bcm_sf2.c                  | 10 ++-
 drivers/net/ethernet/broadcom/bcmsysport.c |  4 +
 4 files changed, 84 insertions(+), 23 deletions(-)

-- 
2.17.1


^ permalink raw reply

* [PATCH 1/2] MIPS: eBPF: Always return sign extended 32b values
From: Paul Burton @ 2019-02-15 20:14 UTC (permalink / raw)
  To: linux-mips@vger.kernel.org, Alexei Starovoitov, Daniel Borkmann,
	Martin KaFai Lau, Song Liu, Yonghong Song, netdev@vger.kernel.org
  Cc: linux-kernel@vger.kernel.org, Jiong Wang, Paul Burton,
	stable@vger.kernel.org

The function prototype used to call JITed eBPF code (ie. the type of the
struct bpf_prog bpf_func field) returns an unsigned int. The MIPS n64
ABI that MIPS64 kernels target defines that 32 bit integers should
always be sign extended when passed in registers as either arguments or
return values.

This means that when returning any value which may not already be sign
extended (ie. of type REG_64BIT or REG_32BIT_ZERO_EX) we need to perform
that sign extension in order to comply with the n64 ABI. Without this we
see strange looking test failures from test_bpf.ko, such as:

  test_bpf: #65 ALU64_MOV_X:
    dst = 4294967295 jited:1 ret -1 != -1 FAIL (1 times)

Although the return value printed matches the expected value, this is
only because printf is only examining the least significant 32 bits of
the 64 bit register value we returned. The register holding the expected
value is sign extended whilst the v0 register was set to a zero extended
value by our JITed code, so when compared by a conditional branch
instruction the values are not equal.

We already handle this when the return value register is of type
REG_32BIT_ZERO_EX, so simply extend this to also cover REG_64BIT.

Signed-off-by: Paul Burton <paul.burton@mips.com>
Fixes: b6bd53f9c4e8 ("MIPS: Add missing file for eBPF JIT.")
Cc: stable@vger.kernel.org # v4.13+
---

 arch/mips/net/ebpf_jit.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/arch/mips/net/ebpf_jit.c b/arch/mips/net/ebpf_jit.c
index b16710a8a9e7..715415fa2345 100644
--- a/arch/mips/net/ebpf_jit.c
+++ b/arch/mips/net/ebpf_jit.c
@@ -343,12 +343,15 @@ static int build_int_epilogue(struct jit_ctx *ctx, int dest_reg)
 	const struct bpf_prog *prog = ctx->skf;
 	int stack_adjust = ctx->stack_size;
 	int store_offset = stack_adjust - 8;
+	enum reg_val_type td;
 	int r0 = MIPS_R_V0;
 
-	if (dest_reg == MIPS_R_RA &&
-	    get_reg_val_type(ctx, prog->len, BPF_REG_0) == REG_32BIT_ZERO_EX)
+	if (dest_reg == MIPS_R_RA) {
 		/* Don't let zero extended value escape. */
-		emit_instr(ctx, sll, r0, r0, 0);
+		td = get_reg_val_type(ctx, prog->len, BPF_REG_0);
+		if (td == REG_64BIT || td == REG_32BIT_ZERO_EX)
+			emit_instr(ctx, sll, r0, r0, 0);
+	}
 
 	if (ctx->flags & EBPF_SAVE_RA) {
 		emit_instr(ctx, ld, MIPS_R_RA, store_offset, MIPS_R_SP);
-- 
2.20.1


^ permalink raw reply related

* [PATCH 2/2] MIPS: eBPF: Remove REG_32BIT_ZERO_EX
From: Paul Burton @ 2019-02-15 20:14 UTC (permalink / raw)
  To: linux-mips@vger.kernel.org, Alexei Starovoitov, Daniel Borkmann,
	Martin KaFai Lau, Song Liu, Yonghong Song, netdev@vger.kernel.org
  Cc: linux-kernel@vger.kernel.org, Jiong Wang, Paul Burton
In-Reply-To: <20190215201321.15725-1-paul.burton@mips.com>

REG_32BIT_ZERO_EX and REG_64BIT are always handled in exactly the same
way, and reg_val_propagate_range() never actually sets any register to
type REG_32BIT_ZERO_EX.

Remove the redundant & unused REG_32BIT_ZERO_EX.

Signed-off-by: Paul Burton <paul.burton@mips.com>
---

 arch/mips/net/ebpf_jit.c | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/arch/mips/net/ebpf_jit.c b/arch/mips/net/ebpf_jit.c
index 715415fa2345..76e9bf88d3b9 100644
--- a/arch/mips/net/ebpf_jit.c
+++ b/arch/mips/net/ebpf_jit.c
@@ -79,8 +79,6 @@ enum reg_val_type {
 	REG_64BIT_32BIT,
 	/* 32-bit compatible, need truncation for 64-bit ops. */
 	REG_32BIT,
-	/* 32-bit zero extended. */
-	REG_32BIT_ZERO_EX,
 	/* 32-bit no sign/zero extension needed. */
 	REG_32BIT_POS
 };
@@ -349,7 +347,7 @@ static int build_int_epilogue(struct jit_ctx *ctx, int dest_reg)
 	if (dest_reg == MIPS_R_RA) {
 		/* Don't let zero extended value escape. */
 		td = get_reg_val_type(ctx, prog->len, BPF_REG_0);
-		if (td == REG_64BIT || td == REG_32BIT_ZERO_EX)
+		if (td == REG_64BIT)
 			emit_instr(ctx, sll, r0, r0, 0);
 	}
 
@@ -695,7 +693,7 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
 		if (dst < 0)
 			return dst;
 		td = get_reg_val_type(ctx, this_idx, insn->dst_reg);
-		if (td == REG_64BIT || td == REG_32BIT_ZERO_EX) {
+		if (td == REG_64BIT) {
 			/* sign extend */
 			emit_instr(ctx, sll, dst, dst, 0);
 		}
@@ -710,7 +708,7 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
 		if (dst < 0)
 			return dst;
 		td = get_reg_val_type(ctx, this_idx, insn->dst_reg);
-		if (td == REG_64BIT || td == REG_32BIT_ZERO_EX) {
+		if (td == REG_64BIT) {
 			/* sign extend */
 			emit_instr(ctx, sll, dst, dst, 0);
 		}
@@ -724,7 +722,7 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
 		if (dst < 0)
 			return dst;
 		td = get_reg_val_type(ctx, this_idx, insn->dst_reg);
-		if (td == REG_64BIT || td == REG_32BIT_ZERO_EX)
+		if (td == REG_64BIT)
 			/* sign extend */
 			emit_instr(ctx, sll, dst, dst, 0);
 		if (insn->imm == 1) {
@@ -863,13 +861,13 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
 		if (src < 0 || dst < 0)
 			return -EINVAL;
 		td = get_reg_val_type(ctx, this_idx, insn->dst_reg);
-		if (td == REG_64BIT || td == REG_32BIT_ZERO_EX) {
+		if (td == REG_64BIT) {
 			/* sign extend */
 			emit_instr(ctx, sll, dst, dst, 0);
 		}
 		did_move = false;
 		ts = get_reg_val_type(ctx, this_idx, insn->src_reg);
-		if (ts == REG_64BIT || ts == REG_32BIT_ZERO_EX) {
+		if (ts == REG_64BIT) {
 			int tmp_reg = MIPS_R_AT;
 
 			if (bpf_op == BPF_MOV) {
@@ -1257,8 +1255,7 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
 		if (insn->imm == 64 && td == REG_32BIT)
 			emit_instr(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32);
 
-		if (insn->imm != 64 &&
-		    (td == REG_64BIT || td == REG_32BIT_ZERO_EX)) {
+		if (insn->imm != 64 && td == REG_64BIT) {
 			/* sign extend */
 			emit_instr(ctx, sll, dst, dst, 0);
 		}
-- 
2.20.1


^ permalink raw reply related

* [PATCH][unix] missing barriers in some of unix_sock ->addr and ->path accesses
From: Al Viro @ 2019-02-15 20:09 UTC (permalink / raw)
  To: netdev; +Cc: David Miller

Several u->addr and u->path users are not holding any locks in
common with unix_bind().  unix_state_lock() is useless for those
purposes.

u->addr is assign-once and *(u->addr) is fully set up by the time
we set u->addr (all under unix_table_lock).  u->path is also
set in the same critical area, also before setting u->addr, and
any unix_sock with ->path filled will have non-NULL ->addr.

So setting ->addr with smp_store_release() is all we need for those
"lockless" users - just have them fetch ->addr with smp_load_acquire()
and don't even bother looking at ->path if they see NULL ->addr.

Users of ->addr and ->path fall into several classes now:
    1) ones that do smp_load_acquire(u->addr) and access *(u->addr)
and u->path only if smp_load_acquire() has returned non-NULL.
    2) places holding unix_table_lock.  These are guaranteed that
*(u->addr) is seen fully initialized.  If unix_sock is in one of the
"bound" chains, so's ->path.
    3) unix_sock_destructor() using ->addr is safe.  All places
that set u->addr are guaranteed to have seen all stores *(u->addr)
while holding a reference to u and unix_sock_destructor() is called
when (atomic) refcount hits zero.
    4) unix_release_sock() using ->path is safe.  unix_bind()
is serialized wrt unix_release() (normally - by struct file
refcount), and for the instances that had ->path set by unix_bind()
unix_release_sock() comes from unix_release(), so they are fine.
Instances that had it set in unix_stream_connect() either end up
attached to a socket (in unix_accept()), in which case the call
chain to unix_release_sock() and serialization are the same as in
the previous case, or they never get accept'ed and unix_release_sock()
is called when the listener is shut down and its queue gets purged.
In that case the listener's queue lock provides the barriers needed -
unix_stream_connect() shoves our unix_sock into listener's queue
under that lock right after having set ->path and eventual
unix_release_sock() caller picks them from that queue under the
same lock right before calling unix_release_sock().
    5) unix_find_other() use of ->path is pointless, but safe -
it happens with successful lookup by (abstract) name, so ->path.dentry
is guaranteed to be NULL there.

Cc: stable@kernel.org
earlier-variant-reviewed-by: "Paul E. McKenney" <paulmck@linux.ibm.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

---
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 74d1eed7cbd4..a95d479caeea 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -890,7 +890,7 @@ static int unix_autobind(struct socket *sock)
 	addr->hash ^= sk->sk_type;
 
 	__unix_remove_socket(sk);
-	u->addr = addr;
+	smp_store_release(&u->addr, addr);
 	__unix_insert_socket(&unix_socket_table[addr->hash], sk);
 	spin_unlock(&unix_table_lock);
 	err = 0;
@@ -1060,7 +1060,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 
 	err = 0;
 	__unix_remove_socket(sk);
-	u->addr = addr;
+	smp_store_release(&u->addr, addr);
 	__unix_insert_socket(list, sk);
 
 out_unlock:
@@ -1331,15 +1331,29 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
 	RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
 	otheru = unix_sk(other);
 
-	/* copy address information from listening to new sock*/
-	if (otheru->addr) {
-		refcount_inc(&otheru->addr->refcnt);
-		newu->addr = otheru->addr;
-	}
+	/* copy address information from listening to new sock
+	 *
+	 * The contents of *(otheru->addr) and otheru->path
+	 * are seen fully set up here, since we have found
+	 * otheru in hash under unix_table_lock.  Insertion
+	 * into the hash chain we'd found it in had been done
+	 * in an earlier critical area protected by unix_table_lock,
+	 * the same one where we'd set *(otheru->addr) contents,
+	 * as well as otheru->path and otheru->addr itself.
+	 *
+	 * Using smp_store_release() here to set newu->addr
+	 * is enough to make those stores, as well as stores
+	 * to newu->path visible to anyone who gets newu->addr
+	 * by smp_load_acquire().  IOW, the same warranties
+	 * as for unix_sock instances bound in unix_bind() or
+	 * in unix_autobind().
+	 */
 	if (otheru->path.dentry) {
 		path_get(&otheru->path);
 		newu->path = otheru->path;
 	}
+	refcount_inc(&otheru->addr->refcnt);
+	smp_store_release(&newu->addr, otheru->addr);
 
 	/* Set credentials */
 	copy_peercred(sk, other);
@@ -1453,7 +1467,7 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
 {
 	struct sock *sk = sock->sk;
-	struct unix_sock *u;
+	struct unix_address *addr;
 	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
 	int err = 0;
 
@@ -1468,19 +1482,15 @@ static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
 		sock_hold(sk);
 	}
 
-	u = unix_sk(sk);
-	unix_state_lock(sk);
-	if (!u->addr) {
+	addr = smp_load_acquire(&unix_sk(sk)->addr);
+	if (!addr) {
 		sunaddr->sun_family = AF_UNIX;
 		sunaddr->sun_path[0] = 0;
 		err = sizeof(short);
 	} else {
-		struct unix_address *addr = u->addr;
-
 		err = addr->len;
 		memcpy(sunaddr, addr->name, addr->len);
 	}
-	unix_state_unlock(sk);
 	sock_put(sk);
 out:
 	return err;
@@ -2073,11 +2083,11 @@ static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
 
 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
 {
-	struct unix_sock *u = unix_sk(sk);
+	struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
 
-	if (u->addr) {
-		msg->msg_namelen = u->addr->len;
-		memcpy(msg->msg_name, u->addr->name, u->addr->len);
+	if (addr) {
+		msg->msg_namelen = addr->len;
+		memcpy(msg->msg_name, addr->name, addr->len);
 	}
 }
 
@@ -2581,15 +2591,14 @@ static int unix_open_file(struct sock *sk)
 	if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
 		return -EPERM;
 
-	unix_state_lock(sk);
+	if (!smp_load_acquire(&unix_sk(sk)->addr))
+		return -ENOENT;
+
 	path = unix_sk(sk)->path;
-	if (!path.dentry) {
-		unix_state_unlock(sk);
+	if (!path.dentry)
 		return -ENOENT;
-	}
 
 	path_get(&path);
-	unix_state_unlock(sk);
 
 	fd = get_unused_fd_flags(O_CLOEXEC);
 	if (fd < 0)
@@ -2830,7 +2839,7 @@ static int unix_seq_show(struct seq_file *seq, void *v)
 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
 			sock_i_ino(s));
 
-		if (u->addr) {
+		if (u->addr) {	// under unix_table_lock here
 			int i, len;
 			seq_putc(seq, ' ');
 
diff --git a/net/unix/diag.c b/net/unix/diag.c
index 384c84e83462..3183d9b8ab33 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -10,7 +10,8 @@
 
 static int sk_diag_dump_name(struct sock *sk, struct sk_buff *nlskb)
 {
-	struct unix_address *addr = unix_sk(sk)->addr;
+	/* might or might not have unix_table_lock */
+	struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
 
 	if (!addr)
 		return 0;
diff --git a/security/lsm_audit.c b/security/lsm_audit.c
index f84001019356..33028c098ef3 100644
--- a/security/lsm_audit.c
+++ b/security/lsm_audit.c
@@ -321,6 +321,7 @@ static void dump_common_audit_data(struct audit_buffer *ab,
 		if (a->u.net->sk) {
 			struct sock *sk = a->u.net->sk;
 			struct unix_sock *u;
+			struct unix_address *addr;
 			int len = 0;
 			char *p = NULL;
 
@@ -351,14 +352,15 @@ static void dump_common_audit_data(struct audit_buffer *ab,
 #endif
 			case AF_UNIX:
 				u = unix_sk(sk);
+				addr = smp_load_acquire(&u->addr);
+				if (!addr)
+					break;
 				if (u->path.dentry) {
 					audit_log_d_path(ab, " path=", &u->path);
 					break;
 				}
-				if (!u->addr)
-					break;
-				len = u->addr->len-sizeof(short);
-				p = &u->addr->name->sun_path[0];
+				len = addr->len-sizeof(short);
+				p = &addr->name->sun_path[0];
 				audit_log_format(ab, " path=");
 				if (*p)
 					audit_log_untrustedstring(ab, p);

^ permalink raw reply related

* Re: [PATCH] NETWORKING: avoid use IPCB in cipso_v4_error
From: Paul Moore @ 2019-02-15 20:04 UTC (permalink / raw)
  To: David Miller; +Cc: s-nazarov, netdev, linux-security-module, kuznet, yoshfuji
In-Reply-To: <20190215.120009.1549205062473501080.davem@davemloft.net>

On Fri, Feb 15, 2019 at 3:00 PM David Miller <davem@davemloft.net> wrote:
> From: Paul Moore <paul@paul-moore.com>
> Date: Fri, 15 Feb 2019 14:02:31 -0500
>
> > On Thu, Feb 14, 2019 at 11:43 AM David Miller <davem@davemloft.net> wrote:
> >> From: Nazarov Sergey <s-nazarov@yandex.ru>
> >> Date: Tue, 12 Feb 2019 18:10:03 +0300
> >>
> >> > Since cipso_v4_error might be called from different network stack layers, we can't safely use icmp_send there.
> >> > icmp_send copies IP options with ip_option_echo, which uses IPCB to take access to IP header compiled data.
> >> > But after commit 971f10ec ("tcp: better TCP_SKB_CB layout to reduce cache line misses"), IPCB can't be used
> >> > above IP layer.
> >> > This patch fixes the problem by creating in cipso_v4_error a local copy of compiled IP options and using it with
> >> > introduced __icmp_send function. This looks some overloaded, but in quite rare error conditions only.
> >> >
> >> > The original discussion is here:
> >> > https://lore.kernel.org/linux-security-module/16659801547571984@sas1-890ba5c2334a.qloud-c.yandex.net/
> >> >
> >> > Signed-off-by: Sergey Nazarov <s-nazarov@yandex.ru>
> >>
> >> This problem is not unique to Cipso, net/atm/clip.c's error handler
> >> has the same exact issue.
> >>
> >> I didn't scan more of the tree, there are probably a couple more
> >> locations as well.
> >
> > David, are you happy with Sergey's solution as a fix for this?
> >
> > If so, would you prefer a respin of this patch to apply the to the
> > other broken callers (e.g. net/atm/clip.c), or would you rather merge
> > this patch and deal with the other callers in separate patches?
>
> I'd like the other broken callers to be handled.

Sergey, do you think you could fix the other callers too, or do you
want some help with that?

-- 
paul moore
www.paul-moore.com

^ permalink raw reply

* Re: [PATCH] NETWORKING: avoid use IPCB in cipso_v4_error
From: David Miller @ 2019-02-15 20:00 UTC (permalink / raw)
  To: paul; +Cc: s-nazarov, netdev, linux-security-module, kuznet, yoshfuji
In-Reply-To: <CAHC9VhQK6Cmcx5kwYAPipR4YGyBHdkLEe3UH+VmNYGHu4PWqjQ@mail.gmail.com>

From: Paul Moore <paul@paul-moore.com>
Date: Fri, 15 Feb 2019 14:02:31 -0500

> On Thu, Feb 14, 2019 at 11:43 AM David Miller <davem@davemloft.net> wrote:
>> From: Nazarov Sergey <s-nazarov@yandex.ru>
>> Date: Tue, 12 Feb 2019 18:10:03 +0300
>>
>> > Since cipso_v4_error might be called from different network stack layers, we can't safely use icmp_send there.
>> > icmp_send copies IP options with ip_option_echo, which uses IPCB to take access to IP header compiled data.
>> > But after commit 971f10ec ("tcp: better TCP_SKB_CB layout to reduce cache line misses"), IPCB can't be used
>> > above IP layer.
>> > This patch fixes the problem by creating in cipso_v4_error a local copy of compiled IP options and using it with
>> > introduced __icmp_send function. This looks some overloaded, but in quite rare error conditions only.
>> >
>> > The original discussion is here:
>> > https://lore.kernel.org/linux-security-module/16659801547571984@sas1-890ba5c2334a.qloud-c.yandex.net/
>> >
>> > Signed-off-by: Sergey Nazarov <s-nazarov@yandex.ru>
>>
>> This problem is not unique to Cipso, net/atm/clip.c's error handler
>> has the same exact issue.
>>
>> I didn't scan more of the tree, there are probably a couple more
>> locations as well.
> 
> David, are you happy with Sergey's solution as a fix for this?
> 
> If so, would you prefer a respin of this patch to apply the to the
> other broken callers (e.g. net/atm/clip.c), or would you rather merge
> this patch and deal with the other callers in separate patches?

I'd like the other broken callers to be handled.

^ permalink raw reply

* Re: [RESEND PATCH net] mm: page_alloc: fix ref bias in page_frag_alloc() for 1-byte allocs
From: David Miller @ 2019-02-15 19:58 UTC (permalink / raw)
  To: alexander.duyck; +Cc: jannh, netdev, linux-kernel
In-Reply-To: <CAKgT0UeOGPQO0viM0w3JD71357uj7uuZ+wX2tCUTP6u_ZQG78Q@mail.gmail.com>

From: Alexander Duyck <alexander.duyck@gmail.com>
Date: Fri, 15 Feb 2019 10:35:18 -0800

> I'll take care of it. I'm kind of annoyed that you resubmitted this to
> netdev before anyone had a chance to even provide review comments
> though.

He was only following instructions from Andrew Morton.

^ permalink raw reply

* Re: [PATCH net] net: ip6_gre: initialize erspan_ver just for erspan tunnels
From: Gregory Rose @ 2019-02-15 19:58 UTC (permalink / raw)
  To: Lorenzo Bianconi, davem; +Cc: netdev, petrm
In-Reply-To: <a08d2d1dbe3a498244867308adf37259bed30f37.1550239457.git.lorenzo.bianconi@redhat.com>


On 2/15/2019 6:10 AM, Lorenzo Bianconi wrote:
> After commit c706863bc890 ("net: ip6_gre: always reports o_key to
> userspace"), ip6gre and ip6gretap tunnels started reporting TUNNEL_KEY
> output flag even if it is not configured.
> ip6gre_fill_info checks erspan_ver value to add TUNNEL_KEY for
> erspan tunnels, however in commit 84581bdae9587 ("erspan: set
> erspan_ver to 1 by default when adding an erspan dev")
> erspan_ver is initialized to 1 even for ip6gre or ip6gretap
> Fix the issue moving erspan_ver initialization in a dedicated routine
>
> Fixes: c706863bc890 ("net: ip6_gre: always reports o_key to userspace")
> Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
> ---
>   net/ipv6/ip6_gre.c | 34 ++++++++++++++++++++--------------
>   1 file changed, 20 insertions(+), 14 deletions(-)
>
> diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
> index 801a9a0c217e..43890898b0b5 100644
> --- a/net/ipv6/ip6_gre.c
> +++ b/net/ipv6/ip6_gre.c
> @@ -1719,6 +1719,24 @@ static int ip6erspan_tap_validate(struct nlattr *tb[], struct nlattr *data[],
>   	return 0;
>   }
>   
> +static void ip6erspan_set_version(struct nlattr *data[],
> +				  struct __ip6_tnl_parm *parms)
> +{
> +	parms->erspan_ver = 1;
> +	if (data[IFLA_GRE_ERSPAN_VER])
> +		parms->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
> +
> +	if (parms->erspan_ver == 1) {
> +		if (data[IFLA_GRE_ERSPAN_INDEX])
> +			parms->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
> +	} else if (parms->erspan_ver == 2) {
> +		if (data[IFLA_GRE_ERSPAN_DIR])
> +			parms->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]);
> +		if (data[IFLA_GRE_ERSPAN_HWID])
> +			parms->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]);
> +	}
> +}
> +
>   static void ip6gre_netlink_parms(struct nlattr *data[],
>   				struct __ip6_tnl_parm *parms)
>   {
> @@ -1767,20 +1785,6 @@ static void ip6gre_netlink_parms(struct nlattr *data[],
>   
>   	if (data[IFLA_GRE_COLLECT_METADATA])
>   		parms->collect_md = true;
> -
> -	parms->erspan_ver = 1;
> -	if (data[IFLA_GRE_ERSPAN_VER])
> -		parms->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
> -
> -	if (parms->erspan_ver == 1) {
> -		if (data[IFLA_GRE_ERSPAN_INDEX])
> -			parms->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
> -	} else if (parms->erspan_ver == 2) {
> -		if (data[IFLA_GRE_ERSPAN_DIR])
> -			parms->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]);
> -		if (data[IFLA_GRE_ERSPAN_HWID])
> -			parms->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]);
> -	}
>   }
>   
>   static int ip6gre_tap_init(struct net_device *dev)
> @@ -2203,6 +2207,7 @@ static int ip6erspan_newlink(struct net *src_net, struct net_device *dev,
>   	int err;
>   
>   	ip6gre_netlink_parms(data, &nt->parms);
> +	ip6erspan_set_version(data, &nt->parms);
>   	ign = net_generic(net, ip6gre_net_id);
>   
>   	if (nt->parms.collect_md) {
> @@ -2248,6 +2253,7 @@ static int ip6erspan_changelink(struct net_device *dev, struct nlattr *tb[],
>   	if (IS_ERR(t))
>   		return PTR_ERR(t);
>   
> +	ip6erspan_set_version(data, &p);
>   	ip6gre_tunnel_unlink_md(ign, t);
>   	ip6gre_tunnel_unlink(ign, t);
>   	ip6erspan_tnl_change(t, &p, !tb[IFLA_MTU]);

LGTM.  Thanks Lorenzo.

Reviewed-by: Greg Rose <gvrose8192@gmail.com>

^ permalink raw reply

* Re: Three questions about busy poll
From: Cong Wang @ 2019-02-15 19:18 UTC (permalink / raw)
  To: Samudrala, Sridhar
  Cc: Alexander Duyck, Eric Dumazet, Linux Kernel Network Developers
In-Reply-To: <bfaff1c6-1269-1c33-31e1-0a78f78d7214@intel.com>

On Thu, Feb 14, 2019 at 4:43 PM Samudrala, Sridhar
<sridhar.samudrala@intel.com> wrote:
>
>
> On 2/14/2019 12:15 PM, Cong Wang wrote:
>
> Hello,
>
> While looking into the busy polling in Linux kernel, three questions
> come into my mind:
>
> 1. In the document[1], it claims sysctl.net.busy_poll depends on
> either SO_BUSY_POLL or sysctl.net.busy_read. However, from the code in
> ep_set_busy_poll_napi_id(), I don't see such a dependency. It simply
> checks sysctl_net_busy_poll and sk->sk_napi_id, but sk->sk_napi_id is
> always set as long as we enable CONFIG_NET_RX_BUSY_POLL. So what I am
> missing here?
>
> epoll based busypoll is only based on global sysctl_net_busy_poll.
> busy_poll value is used with poll()/select()/epoll and and busy_read is used
> with socket recvmsg

Right, I was confused by what the term "poll" refers to.


>
> 2. Why there is no socket option for sysctl.net.busy_poll? Clearly
> sysctl_net_busy_poll is global and SO_BUSY_POLL only works for
> sysctl.net.busy_read.
>
>
> Not sure if it is useful to make it a per socket option. I think it could
> be a per poll/epoll fd option


It is useful, however, from Willem's reply it looks like hard to push it
down from epoll() interface to each socket.

>
> 3. How is SO_INCOMING_NAPI_ID supposed to be used? I can't find any
> useful documents online. Any example or more detailed doc?
>
>
> A app can create one worker thread per device queue and a worker thread
> for an incoming connection can be selected based on SO_INCOMING_NAPI_ID so that
> all connections coming on a queue are processed by the same thread. This will
> allow epoll from a thread to be associated with sockets that receive packets
> from a single queue allowing busy polling.
>

This information is very useful. It also requires each thread pinning to each
CPU/RX queue, right?

Anyway, I will add this information to socket.7 man page.

Thanks!

^ permalink raw reply

* Re: Three questions about busy poll
From: Cong Wang @ 2019-02-15 19:04 UTC (permalink / raw)
  To: Willem de Bruijn
  Cc: Alexander Duyck, Eric Dumazet, sridhar.samudrala,
	Linux Kernel Network Developers
In-Reply-To: <CA+FuTSdEhZ3GdODpvdkw8_icnTrawf45WNNtohHCkje4TpqD4g@mail.gmail.com>

On Thu, Feb 14, 2019 at 4:39 PM Willem de Bruijn
<willemdebruijn.kernel@gmail.com> wrote:
>
> On Thu, Feb 14, 2019 at 3:15 PM Cong Wang <xiyou.wangcong@gmail.com> wrote:
> >
> > Hello,
> >
> > While looking into the busy polling in Linux kernel, three questions
> > come into my mind:
> >
> > 1. In the document[1], it claims sysctl.net.busy_poll depends on
> > either SO_BUSY_POLL or sysctl.net.busy_read. However, from the code in
> > ep_set_busy_poll_napi_id(), I don't see such a dependency. It simply
> > checks sysctl_net_busy_poll and sk->sk_napi_id, but sk->sk_napi_id is
> > always set as long as we enable CONFIG_NET_RX_BUSY_POLL. So what I am
> > missing here?
>
> That documentation refers to sock_poll. This does call sk_busy_loop
> individually on each socket in the pollset and thus respects those values.
> Epoll was added later, after both sock_poll and that documentation.


Ah, yeah, this explains my confusion. I thought busy_poll refers to
all polling related syscalls, that is select()/poll()/epoll(), it looks like
epoll() is so special here. Probably we need some clarification in
net.txt.


>
> > 2. Why there is no socket option for sysctl.net.busy_poll? Clearly
> > sysctl_net_busy_poll is global and SO_BUSY_POLL only works for
> > sysctl.net.busy_read.
>
> I guess because of how sock_poll works. In that case it is not needed.
> The poll duration applies more to the pollset than any of the
> individual sockets, too.


Good point, it's probably like struct eventpoll vs. struct epitem.

The reason why I am looking for a per-socket tuning is to minimize
the impact of setting busy_poll. I don't know if it is possible to somehow
make this per-socket via epoll interfaces, perhaps fundamentally
it is impossible?


>
> > 3. How is SO_INCOMING_NAPI_ID supposed to be used? I can't find any
> > useful documents online. Any example or more detailed doc?
>
> From the commit message of 6d4339028b35 ("net: Introduce
> SO_INCOMING_NAPI_ID") it sounds like a sharding mechanism that
> maintains flow affinity by sharding based on rxqueue (assuming that
> something like RSS was used to ensure flow affinity in the first
> place).

That commit message is the only thing I can find too. I kinda
need a formal documentation in man page and hopefully
an example too.

Thanks for your explanations!

^ permalink raw reply

* Re: [PATCH] NETWORKING: avoid use IPCB in cipso_v4_error
From: Paul Moore @ 2019-02-15 19:02 UTC (permalink / raw)
  To: David Miller; +Cc: s-nazarov, netdev, linux-security-module, kuznet, yoshfuji
In-Reply-To: <20190214.084343.1138362153341500718.davem@davemloft.net>

On Thu, Feb 14, 2019 at 11:43 AM David Miller <davem@davemloft.net> wrote:
> From: Nazarov Sergey <s-nazarov@yandex.ru>
> Date: Tue, 12 Feb 2019 18:10:03 +0300
>
> > Since cipso_v4_error might be called from different network stack layers, we can't safely use icmp_send there.
> > icmp_send copies IP options with ip_option_echo, which uses IPCB to take access to IP header compiled data.
> > But after commit 971f10ec ("tcp: better TCP_SKB_CB layout to reduce cache line misses"), IPCB can't be used
> > above IP layer.
> > This patch fixes the problem by creating in cipso_v4_error a local copy of compiled IP options and using it with
> > introduced __icmp_send function. This looks some overloaded, but in quite rare error conditions only.
> >
> > The original discussion is here:
> > https://lore.kernel.org/linux-security-module/16659801547571984@sas1-890ba5c2334a.qloud-c.yandex.net/
> >
> > Signed-off-by: Sergey Nazarov <s-nazarov@yandex.ru>
>
> This problem is not unique to Cipso, net/atm/clip.c's error handler
> has the same exact issue.
>
> I didn't scan more of the tree, there are probably a couple more
> locations as well.

David, are you happy with Sergey's solution as a fix for this?

If so, would you prefer a respin of this patch to apply the to the
other broken callers (e.g. net/atm/clip.c), or would you rather merge
this patch and deal with the other callers in separate patches?

-- 
paul moore
www.paul-moore.com

^ permalink raw reply

* Re: [PATCH v2 perf,bpf 08/11] perf, bpf: save btf information as headers to perf.data
From: Song Liu @ 2019-02-15 18:59 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: Jiri Olsa, Stephane Eranian, Netdev, linux-kernel,
	Alexei Starovoitov, daniel@iogearbox.net, Kernel Team,
	peterz@infradead.org, namhyung@kernel.org
In-Reply-To: <20190215182047.GL31177@kernel.org>



> On Feb 15, 2019, at 10:20 AM, Arnaldo Carvalho de Melo <acme@kernel.org> wrote:
> 
> Em Fri, Feb 15, 2019 at 05:47:58PM +0000, Song Liu escreveu:
>> 
>> 
>>> On Feb 15, 2019, at 9:40 AM, Arnaldo Carvalho de Melo <acme@kernel.org> wrote:
>>> 
>>> Em Fri, Feb 15, 2019 at 05:25:01PM +0000, Song Liu escreveu:
>>>>> On Feb 15, 2019, at 6:26 AM, Arnaldo Carvalho de Melo <acme@redhat.com> wrote:
>>>>> Em Thu, Feb 14, 2019 at 04:00:09PM -0800, Song Liu escreveu:
>>>>>> This patch enables perf-record to save btf information as headers to
>>>>>> perf.data A new header type HEADER_BTF is introduced for this data.
>>> 
>>>>> 	Wouldn't it be better for this HEADER_BTF to be introduced
>>>>> already as an user space event, Song, see:
>>> 
>>>>> tools/perf/util/event.h
>>> 
>>>>> and:
>>> 
>>>>> tools/perf/util/event.c
>>> 
>>>>> perf_event__synthesize_cpu_map()
>>> 
>>>> BTF would be short living for short living BPF programs. I guess 
>>>> saving them as header is easier than merging them with samples. 
>>> 
>>>> What's the benefit of saving them as user space events?
>>> 
>>> When we work with pipe mode, i.e.:
>>> 
>>> 	perf record -o - | perf report -i -
>>> 
>>> and other combinations (with 'perf script', 'perf inject', etc), we need
>>> a way to pass the headers to the other side, and the way was via user
>>> space events.
>>> 
>>> This is something Stephane and Jiri have been discussing recently,
>>> probably they have more justifications, Stephane, Jiri?
>>> 
>>> - Arnaldo
>> 
>> I see. In this case, we will need some synchronization between main
>> thread and the polling thread, as they are both writing to the same
>> pipe. 
> 
> So, the whole context is that we need to have 'perf record' to start a
> thread per CPU and then read the already per-cpu mmap buffers in the
> matching thread, with the right affinity, numa settings to have the
> record phase not cause contention, etc, so it ends up dumping one stream
> per CPU in a separate file in a 'perf.data' directory instead of a
> perf.data file.
> 
> Jiri is working on that, so, if you dump one more stream into that
> directory, it would, at post processing time, be ordered together with
> the other stream, the per-cpu ones.
> 
> - Arnaldo

I see. This solution looks great. 

For this set, how about I keep this part as-is (at least for v3)? In 
this case, it will goes to the header file after Jiri's change. Once 
Jiri's work is done, I will move them into per-cpu files. 

Thanks,
Song


^ permalink raw reply

* Re: [PATCH net-next 1/2] net: phy: improve phy_resolve_aneg_linkmode
From: Heiner Kallweit @ 2019-02-15 18:57 UTC (permalink / raw)
  To: Andrew Lunn; +Cc: Florian Fainelli, David Miller, netdev@vger.kernel.org
In-Reply-To: <20190215135714.GI5699@lunn.ch>

On 15.02.2019 14:57, Andrew Lunn wrote:
> On Thu, Feb 14, 2019 at 10:15:31PM +0100, Heiner Kallweit wrote:
>> We have the settings array of modes which is sorted based on aneg
>> priority. Instead of checking each mode manually let's simply iterate
>> over the sorted settings.
>>
>> Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
>> ---
>>  drivers/net/phy/phy-core.c | 43 +++++++-------------------------------
>>  1 file changed, 7 insertions(+), 36 deletions(-)
>>
>> diff --git a/drivers/net/phy/phy-core.c b/drivers/net/phy/phy-core.c
>> index cdea028d1..5d43106fe 100644
>> --- a/drivers/net/phy/phy-core.c
>> +++ b/drivers/net/phy/phy-core.c
>> @@ -349,45 +349,16 @@ size_t phy_speeds(unsigned int *speeds, size_t size,
>>  void phy_resolve_aneg_linkmode(struct phy_device *phydev)
>>  {
>>  	__ETHTOOL_DECLARE_LINK_MODE_MASK(common);
>> +	int i;
>>  
>>  	linkmode_and(common, phydev->lp_advertising, phydev->advertising);
>>  
>> -	if (linkmode_test_bit(ETHTOOL_LINK_MODE_10000baseT_Full_BIT, common)) {
>> -		phydev->speed = SPEED_10000;
>> -		phydev->duplex = DUPLEX_FULL;
>> -	} else if (linkmode_test_bit(ETHTOOL_LINK_MODE_5000baseT_Full_BIT,
>> -				     common)) {
>> -		phydev->speed = SPEED_5000;
>> -		phydev->duplex = DUPLEX_FULL;
>> -	} else if (linkmode_test_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT,
>> -				     common)) {
>> -		phydev->speed = SPEED_2500;
>> -		phydev->duplex = DUPLEX_FULL;
>> -	} else if (linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
>> -				     common)) {
>> -		phydev->speed = SPEED_1000;
>> -		phydev->duplex = DUPLEX_FULL;
>> -	} else if (linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT,
>> -				     common)) {
>> -		phydev->speed = SPEED_1000;
>> -		phydev->duplex = DUPLEX_HALF;
>> -	} else if (linkmode_test_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT,
>> -				     common)) {
>> -		phydev->speed = SPEED_100;
>> -		phydev->duplex = DUPLEX_FULL;
>> -	} else if (linkmode_test_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT,
>> -				     common)) {
>> -		phydev->speed = SPEED_100;
>> -		phydev->duplex = DUPLEX_HALF;
>> -	} else if (linkmode_test_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT,
>> -				     common)) {
>> -		phydev->speed = SPEED_10;
>> -		phydev->duplex = DUPLEX_FULL;
>> -	} else if (linkmode_test_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT,
>> -				     common)) {
>> -		phydev->speed = SPEED_10;
>> -		phydev->duplex = DUPLEX_HALF;
>> -	}
>> +	for (i = 0; i < ARRAY_SIZE(settings); i++)
>> +		if (test_bit(settings[i].bit, common)) {
>> +			phydev->speed = settings[i].speed;
>> +			phydev->duplex = settings[i].duplex;
>> +			break;
>> +		}
> 
> Hi Heiner
> 
> Nice simplification.
> 
> I just have one thought about this. The original code was limited to
> baseT. The new code could in theory return a non BaseT speed. For that
> to happen, it would require that phydev->lp_advertising and
> phydev->advertising contain a non BaseT link mode? Is that possible?
> I don't think it is.
> 
Currently we set only BaseT modes because that's what the clause 45
standard registers offer. However drivers may come that use vendor
registers for e.g. backplane auto-negotiation (IIRC clause 73).

Now it's even better because this function shouldn't be (and doesn't
have to be) limited to a specific physical link type.

>   Andrew
> 
Heiner


^ permalink raw reply

* Re: [PATCH RESEND net] net: phy: xgmiitorgmii: Support generic PHY status read
From: Florian Fainelli @ 2019-02-15 18:53 UTC (permalink / raw)
  To: Paul Kocialkowski, netdev, linux-arm-kernel, linux-kernel
  Cc: Andrew Lunn, Heiner Kallweit, David S . Miller, Michal Simek,
	Thomas Petazzoni
In-Reply-To: <38f6708476e9beca4583ccc2a62e238a4981b735.camel@bootlin.com>

On 2/15/19 10:34 AM, Paul Kocialkowski wrote:
> Hi,
> 
> On Fri, 2019-02-15 at 09:38 -0800, Florian Fainelli wrote:
>> On 2/15/19 8:32 AM, Paul Kocialkowski wrote:
>>> Some PHY drivers like the generic one do not provide a read_status
>>> callback on their own but rely on genphy_read_status being called
>>> directly.
>>>
>>> With the current code, this results in a NULL function pointer call.
>>> Call genphy_read_status instead when there is no specific callback.
>>>
>>> Fixes: f411a6160bd4 ("net: phy: Add gmiitorgmii converter support")
>>> Signed-off-by: Paul Kocialkowski <paul.kocialkowski@bootlin.com>
>>> ---
>>> Added Fixes tag and net label for resend.
>>
>> You would want to use phy_read_status() which encapsulates that check as
>> well as checks that the phy_drv pointer is not NULL.
> 
> Well, this driver is a bit different and our priv->phy_drv != phydev-
>> drv, so we can't use the helper directly. I should probably have
> mentionned it in the commit message, sorry!
> 
> As I was mentionning to Andrew in the initial submission of this patch,
> this driver is a bit unusual since it represents a GMII to RGMII
> bridge, so it's not actually a PHY driver on its own -- it just sticks
> itself in between the actual PHY and the MAC.

Yes, my bad, you should still consider checking priv->phy_drv though, if
someone unbinds the PHY driver on either side, you are toast.

> 
> Cheers and thanks for the review,
> 
> Paul
> 
>>>  drivers/net/phy/xilinx_gmii2rgmii.c | 5 ++++-
>>>  1 file changed, 4 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/net/phy/xilinx_gmii2rgmii.c b/drivers/net/phy/xilinx_gmii2rgmii.c
>>> index 74a8782313cf..bd6084e315de 100644
>>> --- a/drivers/net/phy/xilinx_gmii2rgmii.c
>>> +++ b/drivers/net/phy/xilinx_gmii2rgmii.c
>>> @@ -44,7 +44,10 @@ static int xgmiitorgmii_read_status(struct phy_device *phydev)
>>>  	u16 val = 0;
>>>  	int err;
>>>  
>>> -	err = priv->phy_drv->read_status(phydev);
>>> +	if (priv->phy_drv->read_status)
>>> +		err = priv->phy_drv->read_status(phydev);
>>> +	else
>>> +		err = genphy_read_status(phydev);
>>>  	if (err < 0)
>>>  		return err;
>>>  
>>>
>>
>>


-- 
Florian

^ permalink raw reply

* Re: [RESEND PATCH net] mm: page_alloc: fix ref bias in page_frag_alloc() for 1-byte allocs
From: Jann Horn @ 2019-02-15 18:53 UTC (permalink / raw)
  To: Alexander Duyck; +Cc: Network Development, kernel list
In-Reply-To: <CAKgT0UeOGPQO0viM0w3JD71357uj7uuZ+wX2tCUTP6u_ZQG78Q@mail.gmail.com>

On Fri, Feb 15, 2019 at 7:35 PM Alexander Duyck
<alexander.duyck@gmail.com> wrote:
> On Fri, Feb 15, 2019 at 6:10 AM Jann Horn <jannh@google.com> wrote:
> > On Thu, Feb 14, 2019 at 11:21 PM David Miller <davem@davemloft.net> wrote:
> > > From: Jann Horn <jannh@google.com>
> > > Date: Thu, 14 Feb 2019 22:26:22 +0100
> > >
> > > > On Thu, Feb 14, 2019 at 6:13 PM David Miller <davem@davemloft.net> wrote:
> > > >> From: Jann Horn <jannh@google.com>
> > > >> Date: Wed, 13 Feb 2019 22:45:59 +0100
> > > >>
> > > >> > The basic idea behind ->pagecnt_bias is: If we pre-allocate the maximum
> > > >> > number of references that we might need to create in the fastpath later,
> > > >> > the bump-allocation fastpath only has to modify the non-atomic bias value
> > > >> > that tracks the number of extra references we hold instead of the atomic
> > > >> > refcount. The maximum number of allocations we can serve (under the
> > > >> > assumption that no allocation is made with size 0) is nc->size, so that's
> > > >> > the bias used.
> > > >> >
> > > >> > However, even when all memory in the allocation has been given away, a
> > > >> > reference to the page is still held; and in the `offset < 0` slowpath, the
> > > >> > page may be reused if everyone else has dropped their references.
> > > >> > This means that the necessary number of references is actually
> > > >> > `nc->size+1`.
> > > >> >
> > > >> > Luckily, from a quick grep, it looks like the only path that can call
> > > >> > page_frag_alloc(fragsz=1) is TAP with the IFF_NAPI_FRAGS flag, which
> > > >> > requires CAP_NET_ADMIN in the init namespace and is only intended to be
> > > >> > used for kernel testing and fuzzing.
> > > >> >
> > > >> > To test for this issue, put a `WARN_ON(page_ref_count(page) == 0)` in the
> > > >> > `offset < 0` path, below the virt_to_page() call, and then repeatedly call
> > > >> > writev() on a TAP device with IFF_TAP|IFF_NO_PI|IFF_NAPI_FRAGS|IFF_NAPI,
> > > >> > with a vector consisting of 15 elements containing 1 byte each.
> > > >> >
> > > >> > Signed-off-by: Jann Horn <jannh@google.com>
> > > >>
> > > >> Applied and queued up for -stable.
> > > >
> > > > I had sent a v2 at Alexander Duyck's request an hour before you
> > > > applied the patch (with a minor difference that, in Alexander's
> > > > opinion, might be slightly more efficient). I guess the net tree
> > > > doesn't work like the mm tree, where patches can get removed and
> > > > replaced with newer versions? So if Alexander wants that change
> > > > (s/size/PAGE_FRAG_CACHE_MAX_SIZE/ in the refcount), someone has to
> > > > send that as a separate patch?
> > >
> > > Yes, please send a follow-up.  Sorry about that.
> >
> > @Alexander Do you want to do that? It was your idea and I don't think
> > I can reasonably judge the usefulness of the change.
>
> I'll take care of it. I'm kind of annoyed that you resubmitted this to
> netdev before anyone had a chance to even provide review comments
> though.

Ah, I wasn't aware that there's a convention against resubmitting a
patch immediately if the first recipient list pointed to the wrong
maintainer. The only recipient I removed for the resend was akpm, so I
assumed that anyone wanting to comment on the patch could just as well
do that on the resent patch. Also, akpm had already put it in his
tree, and I wasn't sure whether that meant that I had to send it to
davem quickly enough to make it land in davem's tree before akpm sends
his next pull request to Linus...

So next time this happens, should I add a note below the resend
pointing out that the resend is of a recent patch and it hasn't had
time to be reviewed yet, or something like that?

^ permalink raw reply

* Re: [net-next 5/5] net: sock: remove the definition of SOCK_DEBUG()
From: Joe Perches @ 2019-02-15 18:51 UTC (permalink / raw)
  To: Eric Dumazet, Cong Wang
  Cc: Yafang Shao, David Miller, Daniel Borkmann, netdev, shaoyafang
In-Reply-To: <CANn89iKgLBnC6wwDdUmM5XmgnODiL+sdHumg__jVC6FN_kKDEg@mail.gmail.com>

On Fri, 2019-02-15 at 10:22 -0800, Eric Dumazet wrote:
> On Fri, Feb 15, 2019 at 10:13 AM Cong Wang <xiyou.wangcong@gmail.com> wrote:
> > On Fri, Feb 15, 2019 at 8:26 AM Eric Dumazet <edumazet@google.com> wrote:
> > > On Fri, Feb 15, 2019 at 6:50 AM Yafang Shao <laoar.shao@gmail.com> wrote:
> > > > As SOCK_DEBUG() isn't used any more, we can get ride of it now.
> > > > 
> > > 
> > > No, we are still using this infrastructure from time to time.
> > > 
> > > I told you I agreed to remove the current (obsolete) TCP call sites,
> > >  I never suggested to remove SOCK_DEBUG() completely.
> > 
> > Since when do we upstream care about any out-of-tree users?
> > 
> > You can always carry a patch to keep it downstream if you want,
> > no one can stop you doing it.
> 
> Somehow the patch series seems to present things in this way :
> 
> Eric Dumazet suggested to remove completely the SOCK_DEBUG() interface.

Well, you kinda did.
It's certainly reasonable to interpret what you wrote as such.

On Tue, 2019-02-12 at 18:15 -0800, Eric Dumazet wrote:
> Just remove all SOCK_DEBUG() calls, there are leftovers of very ancient times.

My suggestion would be to undefine SOCK_DEBUGGING.

Something like:
---
 include/net/sock.h | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/include/net/sock.h b/include/net/sock.h
index 328cb7cb7b0b..7e39bdfa342a 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -81,14 +81,17 @@
  */
 
 /* Define this to get the SOCK_DBG debugging facility. */
-#define SOCK_DEBUGGING
+/* #define SOCK_DEBUGGING */
 #ifdef SOCK_DEBUGGING
-#define SOCK_DEBUG(sk, msg...) do { if ((sk) && sock_flag((sk), SOCK_DBG)) \
-					printk(KERN_DEBUG msg); } while (0)
+#define SOCK_DEBUG(sk, fmt, ...)				\
+do {								\
+	if ((sk) && sock_flag((sk), SOCK_DBG))			\
+		printk(KERN_DEBUG fmt, ##__VA_ARGS__);		\
+} while (0)
 #else
 /* Validate arguments and do nothing */
-static inline __printf(2, 3)
-void SOCK_DEBUG(const struct sock *sk, const char *msg, ...)
+__printf(2, 3)
+static inline void SOCK_DEBUG(const struct sock *sk, const char *fmt, ...)
 {
 }
 #endif



^ permalink raw reply related

* Re: [RESEND PATCH net] mm: page_alloc: fix ref bias in page_frag_alloc() for 1-byte allocs
From: Alexander Duyck @ 2019-02-15 18:35 UTC (permalink / raw)
  To: Jann Horn; +Cc: Network Development, kernel list
In-Reply-To: <CAG48ez0bFsKPBubARtjs+NQmRiUAbPEGbNjXPJaceuDa9Dd7OA@mail.gmail.com>

On Fri, Feb 15, 2019 at 6:10 AM Jann Horn <jannh@google.com> wrote:
>
> On Thu, Feb 14, 2019 at 11:21 PM David Miller <davem@davemloft.net> wrote:
> >
> > From: Jann Horn <jannh@google.com>
> > Date: Thu, 14 Feb 2019 22:26:22 +0100
> >
> > > On Thu, Feb 14, 2019 at 6:13 PM David Miller <davem@davemloft.net> wrote:
> > >>
> > >> From: Jann Horn <jannh@google.com>
> > >> Date: Wed, 13 Feb 2019 22:45:59 +0100
> > >>
> > >> > The basic idea behind ->pagecnt_bias is: If we pre-allocate the maximum
> > >> > number of references that we might need to create in the fastpath later,
> > >> > the bump-allocation fastpath only has to modify the non-atomic bias value
> > >> > that tracks the number of extra references we hold instead of the atomic
> > >> > refcount. The maximum number of allocations we can serve (under the
> > >> > assumption that no allocation is made with size 0) is nc->size, so that's
> > >> > the bias used.
> > >> >
> > >> > However, even when all memory in the allocation has been given away, a
> > >> > reference to the page is still held; and in the `offset < 0` slowpath, the
> > >> > page may be reused if everyone else has dropped their references.
> > >> > This means that the necessary number of references is actually
> > >> > `nc->size+1`.
> > >> >
> > >> > Luckily, from a quick grep, it looks like the only path that can call
> > >> > page_frag_alloc(fragsz=1) is TAP with the IFF_NAPI_FRAGS flag, which
> > >> > requires CAP_NET_ADMIN in the init namespace and is only intended to be
> > >> > used for kernel testing and fuzzing.
> > >> >
> > >> > To test for this issue, put a `WARN_ON(page_ref_count(page) == 0)` in the
> > >> > `offset < 0` path, below the virt_to_page() call, and then repeatedly call
> > >> > writev() on a TAP device with IFF_TAP|IFF_NO_PI|IFF_NAPI_FRAGS|IFF_NAPI,
> > >> > with a vector consisting of 15 elements containing 1 byte each.
> > >> >
> > >> > Signed-off-by: Jann Horn <jannh@google.com>
> > >>
> > >> Applied and queued up for -stable.
> > >
> > > I had sent a v2 at Alexander Duyck's request an hour before you
> > > applied the patch (with a minor difference that, in Alexander's
> > > opinion, might be slightly more efficient). I guess the net tree
> > > doesn't work like the mm tree, where patches can get removed and
> > > replaced with newer versions? So if Alexander wants that change
> > > (s/size/PAGE_FRAG_CACHE_MAX_SIZE/ in the refcount), someone has to
> > > send that as a separate patch?
> >
> > Yes, please send a follow-up.  Sorry about that.
>
> @Alexander Do you want to do that? It was your idea and I don't think
> I can reasonably judge the usefulness of the change.

I'll take care of it. I'm kind of annoyed that you resubmitted this to
netdev before anyone had a chance to even provide review comments
though.

As is this doesn't really address the issue anyway since the bigger
issue is the data alignment issue that I pointed out. I'll have
patches for both ready shortly.

- Alex

^ permalink raw reply

* Re: [PATCH RESEND net] net: phy: xgmiitorgmii: Support generic PHY status read
From: Paul Kocialkowski @ 2019-02-15 18:34 UTC (permalink / raw)
  To: Florian Fainelli, netdev, linux-arm-kernel, linux-kernel
  Cc: Andrew Lunn, Heiner Kallweit, David S . Miller, Michal Simek,
	Thomas Petazzoni
In-Reply-To: <387ed483-b205-beda-319d-6f2b8ea55601@gmail.com>

Hi,

On Fri, 2019-02-15 at 09:38 -0800, Florian Fainelli wrote:
> On 2/15/19 8:32 AM, Paul Kocialkowski wrote:
> > Some PHY drivers like the generic one do not provide a read_status
> > callback on their own but rely on genphy_read_status being called
> > directly.
> > 
> > With the current code, this results in a NULL function pointer call.
> > Call genphy_read_status instead when there is no specific callback.
> > 
> > Fixes: f411a6160bd4 ("net: phy: Add gmiitorgmii converter support")
> > Signed-off-by: Paul Kocialkowski <paul.kocialkowski@bootlin.com>
> > ---
> > Added Fixes tag and net label for resend.
> 
> You would want to use phy_read_status() which encapsulates that check as
> well as checks that the phy_drv pointer is not NULL.

Well, this driver is a bit different and our priv->phy_drv != phydev-
>drv, so we can't use the helper directly. I should probably have
mentionned it in the commit message, sorry!

As I was mentionning to Andrew in the initial submission of this patch,
this driver is a bit unusual since it represents a GMII to RGMII
bridge, so it's not actually a PHY driver on its own -- it just sticks
itself in between the actual PHY and the MAC.

Cheers and thanks for the review,

Paul

> >  drivers/net/phy/xilinx_gmii2rgmii.c | 5 ++++-
> >  1 file changed, 4 insertions(+), 1 deletion(-)
> > 
> > diff --git a/drivers/net/phy/xilinx_gmii2rgmii.c b/drivers/net/phy/xilinx_gmii2rgmii.c
> > index 74a8782313cf..bd6084e315de 100644
> > --- a/drivers/net/phy/xilinx_gmii2rgmii.c
> > +++ b/drivers/net/phy/xilinx_gmii2rgmii.c
> > @@ -44,7 +44,10 @@ static int xgmiitorgmii_read_status(struct phy_device *phydev)
> >  	u16 val = 0;
> >  	int err;
> >  
> > -	err = priv->phy_drv->read_status(phydev);
> > +	if (priv->phy_drv->read_status)
> > +		err = priv->phy_drv->read_status(phydev);
> > +	else
> > +		err = genphy_read_status(phydev);
> >  	if (err < 0)
> >  		return err;
> >  
> > 
> 
> 
-- 
Paul Kocialkowski, Bootlin
Embedded Linux and kernel engineering
https://bootlin.com


^ permalink raw reply

* Re: [PATCH V2 0/7] Add FOLL_LONGTERM to GUP fast and use it
From: Ira Weiny @ 2019-02-15 18:29 UTC (permalink / raw)
  To: linux-mips, linux-kernel, kvm-ppc, linuxppc-dev, linux-s390,
	linux-sh, sparclinux, kvm, linux-fpga, dri-devel, linux-rdma,
	linux-media, linux-scsi, devel, virtualization, netdev,
	linux-fbdev, xen-devel, devel, linux-mm, ceph-devel, rds-devel

> NOTE: This series depends on my clean up patch to remove the write parameter
> from gup_fast_permitted()[1]
> 
> HFI1, qib, and mthca, use get_user_pages_fast() due to it performance
> advantages.  These pages can be held for a significant time.  But
> get_user_pages_fast() does not protect against mapping of FS DAX pages.
> 
> Introduce FOLL_LONGTERM and use this flag in get_user_pages_fast() which
> retains the performance while also adding the FS DAX checks.  XDP has also
> shown interest in using this functionality.[2]
> 
> In addition we change get_user_pages() to use the new FOLL_LONGTERM flag and
> remove the specialized get_user_pages_longterm call.
> 
> [1] https://lkml.org/lkml/2019/2/11/237
> [2] https://lkml.org/lkml/2019/2/11/1789

Any comments on this series?  I've touched a lot of subsystems which I think
require review.

Thanks,
Ira

> 
> Ira Weiny (7):
>   mm/gup: Replace get_user_pages_longterm() with FOLL_LONGTERM
>   mm/gup: Change write parameter to flags in fast walk
>   mm/gup: Change GUP fast to use flags rather than a write 'bool'
>   mm/gup: Add FOLL_LONGTERM capability to GUP fast
>   IB/hfi1: Use the new FOLL_LONGTERM flag to get_user_pages_fast()
>   IB/qib: Use the new FOLL_LONGTERM flag to get_user_pages_fast()
>   IB/mthca: Use the new FOLL_LONGTERM flag to get_user_pages_fast()
> 
>  arch/mips/mm/gup.c                          |  11 +-
>  arch/powerpc/kvm/book3s_64_mmu_hv.c         |   4 +-
>  arch/powerpc/kvm/e500_mmu.c                 |   2 +-
>  arch/powerpc/mm/mmu_context_iommu.c         |   4 +-
>  arch/s390/kvm/interrupt.c                   |   2 +-
>  arch/s390/mm/gup.c                          |  12 +-
>  arch/sh/mm/gup.c                            |  11 +-
>  arch/sparc/mm/gup.c                         |   9 +-
>  arch/x86/kvm/paging_tmpl.h                  |   2 +-
>  arch/x86/kvm/svm.c                          |   2 +-
>  drivers/fpga/dfl-afu-dma-region.c           |   2 +-
>  drivers/gpu/drm/via/via_dmablit.c           |   3 +-
>  drivers/infiniband/core/umem.c              |   5 +-
>  drivers/infiniband/hw/hfi1/user_pages.c     |   5 +-
>  drivers/infiniband/hw/mthca/mthca_memfree.c |   3 +-
>  drivers/infiniband/hw/qib/qib_user_pages.c  |   8 +-
>  drivers/infiniband/hw/qib/qib_user_sdma.c   |   2 +-
>  drivers/infiniband/hw/usnic/usnic_uiom.c    |   9 +-
>  drivers/media/v4l2-core/videobuf-dma-sg.c   |   6 +-
>  drivers/misc/genwqe/card_utils.c            |   2 +-
>  drivers/misc/vmw_vmci/vmci_host.c           |   2 +-
>  drivers/misc/vmw_vmci/vmci_queue_pair.c     |   6 +-
>  drivers/platform/goldfish/goldfish_pipe.c   |   3 +-
>  drivers/rapidio/devices/rio_mport_cdev.c    |   4 +-
>  drivers/sbus/char/oradax.c                  |   2 +-
>  drivers/scsi/st.c                           |   3 +-
>  drivers/staging/gasket/gasket_page_table.c  |   4 +-
>  drivers/tee/tee_shm.c                       |   2 +-
>  drivers/vfio/vfio_iommu_spapr_tce.c         |   3 +-
>  drivers/vfio/vfio_iommu_type1.c             |   3 +-
>  drivers/vhost/vhost.c                       |   2 +-
>  drivers/video/fbdev/pvr2fb.c                |   2 +-
>  drivers/virt/fsl_hypervisor.c               |   2 +-
>  drivers/xen/gntdev.c                        |   2 +-
>  fs/orangefs/orangefs-bufmap.c               |   2 +-
>  include/linux/mm.h                          |  17 +-
>  kernel/futex.c                              |   2 +-
>  lib/iov_iter.c                              |   7 +-
>  mm/gup.c                                    | 220 ++++++++++++--------
>  mm/gup_benchmark.c                          |   5 +-
>  mm/util.c                                   |   8 +-
>  net/ceph/pagevec.c                          |   2 +-
>  net/rds/info.c                              |   2 +-
>  net/rds/rdma.c                              |   3 +-
>  44 files changed, 232 insertions(+), 180 deletions(-)
> 
> -- 
> 2.20.1
> 

^ permalink raw reply

* [PATCH net-next 6/7] s390/qeth: add support for ETHTOOL_GRINGPARAM
From: Julian Wiedmann @ 2019-02-15 18:22 UTC (permalink / raw)
  To: David Miller
  Cc: netdev, linux-s390, Martin Schwidefsky, Heiko Carstens,
	Stefan Raspl, Ursula Braun, Julian Wiedmann
In-Reply-To: <20190215182231.90709-1-jwi@linux.ibm.com>

Implement a trivial callback that exposes the queue sizes.

Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
---
 drivers/s390/net/qeth_ethtool.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/drivers/s390/net/qeth_ethtool.c b/drivers/s390/net/qeth_ethtool.c
index 9ce5ef5fcf7a..93a53fed4cf8 100644
--- a/drivers/s390/net/qeth_ethtool.c
+++ b/drivers/s390/net/qeth_ethtool.c
@@ -102,6 +102,22 @@ static void qeth_get_ethtool_stats(struct net_device *dev,
 				   txq_stats, TXQ_STATS_LEN);
 }
 
+static void qeth_get_ringparam(struct net_device *dev,
+			       struct ethtool_ringparam *param)
+{
+	struct qeth_card *card = dev->ml_priv;
+
+	param->rx_max_pending = QDIO_MAX_BUFFERS_PER_Q;
+	param->rx_mini_max_pending = 0;
+	param->rx_jumbo_max_pending = 0;
+	param->tx_max_pending = QDIO_MAX_BUFFERS_PER_Q;
+
+	param->rx_pending = card->qdio.in_buf_pool.buf_count;
+	param->rx_mini_pending = 0;
+	param->rx_jumbo_pending = 0;
+	param->tx_pending = QDIO_MAX_BUFFERS_PER_Q;
+}
+
 static void qeth_get_strings(struct net_device *dev, u32 stringset, u8 *data)
 {
 	struct qeth_card *card = dev->ml_priv;
@@ -338,6 +354,7 @@ static int qeth_get_link_ksettings(struct net_device *netdev,
 
 const struct ethtool_ops qeth_ethtool_ops = {
 	.get_link = ethtool_op_get_link,
+	.get_ringparam = qeth_get_ringparam,
 	.get_strings = qeth_get_strings,
 	.get_ethtool_stats = qeth_get_ethtool_stats,
 	.get_sset_count = qeth_get_sset_count,
-- 
2.16.4


^ permalink raw reply related

* [PATCH net-next 5/7] s390/qeth: overhaul ethtool statistics
From: Julian Wiedmann @ 2019-02-15 18:22 UTC (permalink / raw)
  To: David Miller
  Cc: netdev, linux-s390, Martin Schwidefsky, Heiko Carstens,
	Stefan Raspl, Ursula Braun, Julian Wiedmann
In-Reply-To: <20190215182231.90709-1-jwi@linux.ibm.com>

Accumulate per-TX queue statistics, and increase their size to 64 bit.
Don't bother with enabling/disabling the statistics, the overhead is
negligible.

Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
---
 drivers/s390/net/qeth_core.h      |  90 ++++++++++++----------
 drivers/s390/net/qeth_core_main.c | 127 ++++++++++++++++---------------
 drivers/s390/net/qeth_core_sys.c  |  39 +++++-----
 drivers/s390/net/qeth_ethtool.c   | 152 +++++++++++++++++++++-----------------
 drivers/s390/net/qeth_l2_main.c   |  28 +++----
 drivers/s390/net/qeth_l3_main.c   |  36 ++++-----
 6 files changed, 256 insertions(+), 216 deletions(-)

diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index 5e08f112db13..c0c46be0b251 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -34,6 +34,8 @@
 #include <asm/ccwgroup.h>
 #include <asm/sysinfo.h>
 
+#include <uapi/linux/if_link.h>
+
 #include "qeth_core_mpc.h"
 
 /**
@@ -112,35 +114,6 @@ static inline u32 qeth_get_device_id(struct ccw_device *cdev)
 #define CCW_DEVID(cdev)		(qeth_get_device_id(cdev))
 #define CARD_DEVID(card)	(CCW_DEVID(CARD_RDEV(card)))
 
-/**
- * card stuff
- */
-struct qeth_perf_stats {
-	unsigned int bufs_rec;
-	unsigned int bufs_sent;
-	unsigned int buf_elements_sent;
-
-	unsigned int skbs_sent_pack;
-	unsigned int bufs_sent_pack;
-
-	unsigned int sc_dp_p;
-	unsigned int sc_p_dp;
-	unsigned int large_send_bytes;
-	unsigned int large_send_cnt;
-	unsigned int sg_skbs_sent;
-	/* initial values when measuring starts */
-	unsigned long initial_rx_packets;
-	unsigned long initial_tx_packets;
-	/* inbound scatter gather data */
-	unsigned int sg_skbs_rx;
-	unsigned int sg_frags_rx;
-	unsigned int sg_alloc_page_rx;
-	unsigned int tx_csum;
-	unsigned int tx_lin;
-	unsigned int tx_linfail;
-	unsigned int rx_csum;
-};
-
 /* Routing stuff */
 struct qeth_routing_info {
 	enum qeth_routing_types type;
@@ -470,10 +443,54 @@ enum qeth_out_q_states {
        QETH_OUT_Q_LOCKED_FLUSH,
 };
 
+#define QETH_CARD_STAT_ADD(_c, _stat, _val)	((_c)->stats._stat += (_val))
+#define QETH_CARD_STAT_INC(_c, _stat)		QETH_CARD_STAT_ADD(_c, _stat, 1)
+
+#define QETH_TXQ_STAT_ADD(_q, _stat, _val)	((_q)->stats._stat += (_val))
+#define QETH_TXQ_STAT_INC(_q, _stat)		QETH_TXQ_STAT_ADD(_q, _stat, 1)
+
+struct qeth_card_stats {
+	u64 rx_bufs;
+	u64 rx_skb_csum;
+	u64 rx_sg_skbs;
+	u64 rx_sg_frags;
+	u64 rx_sg_alloc_page;
+
+	/* rtnl_link_stats64 */
+	u64 rx_packets;
+	u64 rx_bytes;
+	u64 rx_errors;
+	u64 rx_dropped;
+	u64 rx_multicast;
+	u64 tx_errors;
+};
+
+struct qeth_out_q_stats {
+	u64 bufs;
+	u64 bufs_pack;
+	u64 buf_elements;
+	u64 skbs_pack;
+	u64 skbs_sg;
+	u64 skbs_csum;
+	u64 skbs_tso;
+	u64 skbs_linearized;
+	u64 skbs_linearized_fail;
+	u64 tso_bytes;
+	u64 packing_mode_switch;
+
+	/* rtnl_link_stats64 */
+	u64 tx_packets;
+	u64 tx_bytes;
+	u64 tx_errors;
+	u64 tx_dropped;
+	u64 tx_carrier_errors;
+};
+
 struct qeth_qdio_out_q {
 	struct qdio_buffer *qdio_bufs[QDIO_MAX_BUFFERS_PER_Q];
 	struct qeth_qdio_out_buffer *bufs[QDIO_MAX_BUFFERS_PER_Q];
 	struct qdio_outbuf_state *bufstates; /* convenience pointer */
+	struct qeth_out_q_stats stats;
 	int queue_no;
 	struct qeth_card *card;
 	atomic_t state;
@@ -677,7 +694,6 @@ struct qeth_card_options {
 	struct qeth_vnicc_info vnicc; /* VNICC options */
 	int fake_broadcast;
 	enum qeth_discipline_id layer;
-	int performance_stats;
 	int rx_sg_cb;
 	enum qeth_ipa_isolation_modes isolation;
 	enum qeth_ipa_isolation_modes prev_isolation;
@@ -753,8 +769,7 @@ struct qeth_card {
 	struct qeth_channel data;
 
 	struct net_device *dev;
-	struct net_device_stats stats;
-
+	struct qeth_card_stats stats;
 	struct qeth_card_info info;
 	struct qeth_token token;
 	struct qeth_seqno seqno;
@@ -777,7 +792,6 @@ struct qeth_card {
 	struct list_head cmd_waiter_list;
 	/* QDIO buffer handling */
 	struct qeth_qdio_info qdio;
-	struct qeth_perf_stats perf_stats;
 	int read_or_write_problem;
 	struct qeth_osn_info osn_info;
 	struct qeth_discipline *discipline;
@@ -858,8 +872,7 @@ static inline void qeth_rx_csum(struct qeth_card *card, struct sk_buff *skb,
 	if ((card->dev->features & NETIF_F_RXCSUM) &&
 	    (flags & QETH_HDR_EXT_CSUM_TRANSP_REQ)) {
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
-		if (card->options.performance_stats)
-			card->perf_stats.rx_csum++;
+		QETH_CARD_STAT_INC(card, rx_skb_csum);
 	} else {
 		skb->ip_summed = CHECKSUM_NONE;
 	}
@@ -966,7 +979,6 @@ void qeth_clear_working_pool_list(struct qeth_card *);
 void qeth_clear_cmd_buffers(struct qeth_channel *);
 void qeth_clear_qdio_buffers(struct qeth_card *);
 void qeth_setadp_promisc_mode(struct qeth_card *);
-struct net_device_stats *qeth_get_stats(struct net_device *);
 int qeth_setadpparms_change_macaddr(struct qeth_card *);
 void qeth_tx_timeout(struct net_device *);
 void qeth_prepare_control_data(struct qeth_card *, int,
@@ -1002,14 +1014,16 @@ netdev_features_t qeth_fix_features(struct net_device *, netdev_features_t);
 netdev_features_t qeth_features_check(struct sk_buff *skb,
 				      struct net_device *dev,
 				      netdev_features_t features);
+void qeth_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats);
 int qeth_open(struct net_device *dev);
 int qeth_stop(struct net_device *dev);
 
 int qeth_vm_request_mac(struct qeth_card *card);
 int qeth_xmit(struct qeth_card *card, struct sk_buff *skb,
 	      struct qeth_qdio_out_q *queue, int ipv, int cast_type,
-	      void (*fill_header)(struct qeth_card *card, struct qeth_hdr *hdr,
-				  struct sk_buff *skb, int ipv, int cast_type,
+	      void (*fill_header)(struct qeth_qdio_out_q *queue,
+				  struct qeth_hdr *hdr, struct sk_buff *skb,
+				  int ipv, int cast_type,
 				  unsigned int data_len));
 
 /* exports for OSN */
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 96e4876a4daa..6d93bb48b1d9 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -2703,8 +2703,7 @@ static struct qeth_buffer_pool_entry *qeth_find_free_buffer_pool_entry(
 			} else {
 				free_page((unsigned long)entry->elements[i]);
 				entry->elements[i] = page_address(page);
-				if (card->options.performance_stats)
-					card->perf_stats.sg_alloc_page_rx++;
+				QETH_CARD_STAT_INC(card, rx_sg_alloc_page);
 			}
 		}
 	}
@@ -3180,7 +3179,7 @@ static int qeth_check_qdio_errors(struct qeth_card *card,
 			       buf->element[14].sflags);
 		QETH_CARD_TEXT_(card, 2, " qerr=%X", qdio_error);
 		if ((buf->element[15].sflags) == 0x12) {
-			card->stats.rx_dropped++;
+			QETH_CARD_STAT_INC(card, rx_dropped);
 			return 0;
 		} else
 			return 1;
@@ -3322,8 +3321,7 @@ static void qeth_switch_to_packing_if_needed(struct qeth_qdio_out_q *queue)
 		    >= QETH_HIGH_WATERMARK_PACK){
 			/* switch non-PACKING -> PACKING */
 			QETH_CARD_TEXT(queue->card, 6, "np->pack");
-			if (queue->card->options.performance_stats)
-				queue->card->perf_stats.sc_dp_p++;
+			QETH_TXQ_STAT_INC(queue, packing_mode_switch);
 			queue->do_pack = 1;
 		}
 	}
@@ -3342,8 +3340,7 @@ static int qeth_switch_to_nonpacking_if_needed(struct qeth_qdio_out_q *queue)
 		    <= QETH_LOW_WATERMARK_PACK) {
 			/* switch PACKING -> non-PACKING */
 			QETH_CARD_TEXT(queue->card, 6, "pack->np");
-			if (queue->card->options.performance_stats)
-				queue->card->perf_stats.sc_p_dp++;
+			QETH_TXQ_STAT_INC(queue, packing_mode_switch);
 			queue->do_pack = 0;
 			return qeth_prep_flush_pack_buffer(queue);
 		}
@@ -3397,6 +3394,7 @@ static void qeth_flush_buffers(struct qeth_qdio_out_q *queue, int index,
 		}
 	}
 
+	QETH_TXQ_STAT_ADD(queue, bufs, count);
 	netif_trans_update(queue->card->dev);
 	qdio_flags = QDIO_FLAG_SYNC_OUTPUT;
 	if (atomic_read(&queue->set_pci_flags_count))
@@ -3405,7 +3403,7 @@ static void qeth_flush_buffers(struct qeth_qdio_out_q *queue, int index,
 	rc = do_QDIO(CARD_DDEV(queue->card), qdio_flags,
 		     queue->queue_no, index, count);
 	if (rc) {
-		queue->card->stats.tx_errors += count;
+		QETH_TXQ_STAT_ADD(queue, tx_errors, count);
 		/* ignore temporary SIGA errors without busy condition */
 		if (rc == -ENOBUFS)
 			return;
@@ -3420,8 +3418,6 @@ static void qeth_flush_buffers(struct qeth_qdio_out_q *queue, int index,
 		qeth_schedule_recovery(queue->card);
 		return;
 	}
-	if (queue->card->options.performance_stats)
-		queue->card->perf_stats.bufs_sent += count;
 }
 
 static void qeth_check_outbound_queue(struct qeth_qdio_out_q *queue)
@@ -3452,10 +3448,8 @@ static void qeth_check_outbound_queue(struct qeth_qdio_out_q *queue)
 			if (!flush_cnt &&
 			    !atomic_read(&queue->set_pci_flags_count))
 				flush_cnt += qeth_prep_flush_pack_buffer(queue);
-			if (queue->card->options.performance_stats &&
-			    q_was_packing)
-				queue->card->perf_stats.bufs_sent_pack +=
-					flush_cnt;
+			if (q_was_packing)
+				QETH_TXQ_STAT_ADD(queue, bufs_pack, flush_cnt);
 			if (flush_cnt)
 				qeth_flush_buffers(queue, index, flush_cnt);
 			atomic_set(&queue->state, QETH_OUT_Q_UNLOCKED);
@@ -3748,11 +3742,12 @@ EXPORT_SYMBOL_GPL(qeth_count_elements);
  * The number of needed buffer elements is returned in @elements.
  * Error to create the hdr is indicated by returning with < 0.
  */
-static int qeth_add_hw_header(struct qeth_card *card, struct sk_buff *skb,
-			      struct qeth_hdr **hdr, unsigned int hdr_len,
-			      unsigned int proto_len, unsigned int *elements)
+static int qeth_add_hw_header(struct qeth_qdio_out_q *queue,
+			      struct sk_buff *skb, struct qeth_hdr **hdr,
+			      unsigned int hdr_len, unsigned int proto_len,
+			      unsigned int *elements)
 {
-	const unsigned int max_elements = QETH_MAX_BUFFER_ELEMENTS(card);
+	const unsigned int max_elements = QETH_MAX_BUFFER_ELEMENTS(queue->card);
 	const unsigned int contiguous = proto_len ? proto_len : 1;
 	unsigned int __elements;
 	addr_t start, end;
@@ -3791,15 +3786,12 @@ static int qeth_add_hw_header(struct qeth_card *card, struct sk_buff *skb,
 		}
 
 		rc = skb_linearize(skb);
-		if (card->options.performance_stats) {
-			if (rc)
-				card->perf_stats.tx_linfail++;
-			else
-				card->perf_stats.tx_lin++;
-		}
-		if (rc)
+		if (rc) {
+			QETH_TXQ_STAT_INC(queue, skbs_linearized_fail);
 			return rc;
+		}
 
+		QETH_TXQ_STAT_INC(queue, skbs_linearized);
 		/* Linearization changed the layout, re-evaluate: */
 		goto check_layout;
 	}
@@ -3923,9 +3915,8 @@ static int qeth_fill_buffer(struct qeth_qdio_out_q *queue,
 		QETH_CARD_TEXT(queue->card, 6, "fillbfnp");
 	} else {
 		QETH_CARD_TEXT(queue->card, 6, "fillbfpa");
-		if (queue->card->options.performance_stats)
-			queue->card->perf_stats.skbs_sent_pack++;
 
+		QETH_TXQ_STAT_INC(queue, skbs_pack);
 		/* If the buffer still has free elements, keep using it. */
 		if (buf->next_element_to_fill <
 		    QETH_MAX_BUFFER_ELEMENTS(queue->card))
@@ -4039,8 +4030,8 @@ int qeth_do_send_packet(struct qeth_card *card, struct qeth_qdio_out_q *queue,
 	}
 out:
 	/* at this point the queue is UNLOCKED again */
-	if (queue->card->options.performance_stats && do_pack)
-		queue->card->perf_stats.bufs_sent_pack += flush_count;
+	if (do_pack)
+		QETH_TXQ_STAT_ADD(queue, bufs_pack, flush_count);
 
 	return rc;
 }
@@ -4064,8 +4055,9 @@ static void qeth_fill_tso_ext(struct qeth_hdr_tso *hdr,
 
 int qeth_xmit(struct qeth_card *card, struct sk_buff *skb,
 	      struct qeth_qdio_out_q *queue, int ipv, int cast_type,
-	      void (*fill_header)(struct qeth_card *card, struct qeth_hdr *hdr,
-				  struct sk_buff *skb, int ipv, int cast_type,
+	      void (*fill_header)(struct qeth_qdio_out_q *queue,
+				  struct qeth_hdr *hdr, struct sk_buff *skb,
+				  int ipv, int cast_type,
 				  unsigned int data_len))
 {
 	unsigned int proto_len, hw_hdr_len;
@@ -4090,7 +4082,7 @@ int qeth_xmit(struct qeth_card *card, struct sk_buff *skb,
 	if (rc)
 		return rc;
 
-	push_len = qeth_add_hw_header(card, skb, &hdr, hw_hdr_len, proto_len,
+	push_len = qeth_add_hw_header(queue, skb, &hdr, hw_hdr_len, proto_len,
 				      &elements);
 	if (push_len < 0)
 		return push_len;
@@ -4100,7 +4092,7 @@ int qeth_xmit(struct qeth_card *card, struct sk_buff *skb,
 		data_offset = push_len + proto_len;
 	}
 	memset(hdr, 0, hw_hdr_len);
-	fill_header(card, hdr, skb, ipv, cast_type, frame_len);
+	fill_header(queue, hdr, skb, ipv, cast_type, frame_len);
 	if (is_tso)
 		qeth_fill_tso_ext((struct qeth_hdr_tso *) hdr,
 				  frame_len - proto_len, skb, proto_len);
@@ -4117,14 +4109,12 @@ int qeth_xmit(struct qeth_card *card, struct sk_buff *skb,
 	}
 
 	if (!rc) {
-		if (card->options.performance_stats) {
-			card->perf_stats.buf_elements_sent += elements;
-			if (is_sg)
-				card->perf_stats.sg_skbs_sent++;
-			if (is_tso) {
-				card->perf_stats.large_send_bytes += frame_len;
-				card->perf_stats.large_send_cnt++;
-			}
+		QETH_TXQ_STAT_ADD(queue, buf_elements, elements);
+		if (is_sg)
+			QETH_TXQ_STAT_INC(queue, skbs_sg);
+		if (is_tso) {
+			QETH_TXQ_STAT_INC(queue, skbs_tso);
+			QETH_TXQ_STAT_ADD(queue, tso_bytes, frame_len);
 		}
 	} else {
 		if (!push_len)
@@ -4183,18 +4173,6 @@ void qeth_setadp_promisc_mode(struct qeth_card *card)
 }
 EXPORT_SYMBOL_GPL(qeth_setadp_promisc_mode);
 
-struct net_device_stats *qeth_get_stats(struct net_device *dev)
-{
-	struct qeth_card *card;
-
-	card = dev->ml_priv;
-
-	QETH_CARD_TEXT(card, 5, "getstat");
-
-	return &card->stats;
-}
-EXPORT_SYMBOL_GPL(qeth_get_stats);
-
 static int qeth_setadpparms_change_macaddr_cb(struct qeth_card *card,
 		struct qeth_reply *reply, unsigned long data)
 {
@@ -4388,7 +4366,7 @@ void qeth_tx_timeout(struct net_device *dev)
 
 	card = dev->ml_priv;
 	QETH_CARD_TEXT(card, 4, "txtimeo");
-	card->stats.tx_errors++;
+	QETH_CARD_STAT_INC(card, tx_errors);
 	qeth_schedule_recovery(card);
 }
 EXPORT_SYMBOL_GPL(qeth_tx_timeout);
@@ -5246,7 +5224,7 @@ struct sk_buff *qeth_core_get_next_skb(struct qeth_card *card,
 				QETH_CARD_TEXT(card, 4, "unexeob");
 				QETH_CARD_HEX(card, 2, buffer, sizeof(void *));
 				dev_kfree_skb_any(skb);
-				card->stats.rx_errors++;
+				QETH_CARD_STAT_INC(card, rx_errors);
 				return NULL;
 			}
 			element++;
@@ -5258,16 +5236,17 @@ struct sk_buff *qeth_core_get_next_skb(struct qeth_card *card,
 	}
 	*__element = element;
 	*__offset = offset;
-	if (use_rx_sg && card->options.performance_stats) {
-		card->perf_stats.sg_skbs_rx++;
-		card->perf_stats.sg_frags_rx += skb_shinfo(skb)->nr_frags;
+	if (use_rx_sg) {
+		QETH_CARD_STAT_INC(card, rx_sg_skbs);
+		QETH_CARD_STAT_ADD(card, rx_sg_frags,
+				   skb_shinfo(skb)->nr_frags);
 	}
 	return skb;
 no_mem:
 	if (net_ratelimit()) {
 		QETH_CARD_TEXT(card, 2, "noskbmem");
 	}
-	card->stats.rx_dropped++;
+	QETH_CARD_STAT_INC(card, rx_dropped);
 	return NULL;
 }
 EXPORT_SYMBOL_GPL(qeth_core_get_next_skb);
@@ -5308,8 +5287,7 @@ int qeth_poll(struct napi_struct *napi, int budget)
 				done = 1;
 
 			if (done) {
-				if (card->options.performance_stats)
-					card->perf_stats.bufs_rec++;
+				QETH_CARD_STAT_INC(card, rx_bufs);
 				qeth_put_buffer_pool_entry(card,
 					buffer->pool_entry);
 				qeth_queue_input_buffer(card, card->rx.b_index);
@@ -6223,6 +6201,33 @@ netdev_features_t qeth_features_check(struct sk_buff *skb,
 }
 EXPORT_SYMBOL_GPL(qeth_features_check);
 
+void qeth_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
+{
+	struct qeth_card *card = dev->ml_priv;
+	struct qeth_qdio_out_q *queue;
+	unsigned int i;
+
+	QETH_CARD_TEXT(card, 5, "getstat");
+
+	stats->rx_packets = card->stats.rx_packets;
+	stats->rx_bytes = card->stats.rx_bytes;
+	stats->rx_errors = card->stats.rx_errors;
+	stats->rx_dropped = card->stats.rx_dropped;
+	stats->multicast = card->stats.rx_multicast;
+	stats->tx_errors = card->stats.tx_errors;
+
+	for (i = 0; i < card->qdio.no_out_queues; i++) {
+		queue = card->qdio.out_qs[i];
+
+		stats->tx_packets += queue->stats.tx_packets;
+		stats->tx_bytes += queue->stats.tx_bytes;
+		stats->tx_errors += queue->stats.tx_errors;
+		stats->tx_dropped += queue->stats.tx_dropped;
+		stats->tx_carrier_errors += queue->stats.tx_carrier_errors;
+	}
+}
+EXPORT_SYMBOL_GPL(qeth_get_stats64);
+
 int qeth_open(struct net_device *dev)
 {
 	struct qeth_card *card = dev->ml_priv;
diff --git a/drivers/s390/net/qeth_core_sys.c b/drivers/s390/net/qeth_core_sys.c
index 485a335669ba..8b223cc2c19b 100644
--- a/drivers/s390/net/qeth_core_sys.c
+++ b/drivers/s390/net/qeth_core_sys.c
@@ -336,35 +336,36 @@ static ssize_t qeth_dev_performance_stats_show(struct device *dev,
 	if (!card)
 		return -EINVAL;
 
-	return sprintf(buf, "%i\n", card->options.performance_stats ? 1:0);
+	return sprintf(buf, "1\n");
 }
 
 static ssize_t qeth_dev_performance_stats_store(struct device *dev,
 		struct device_attribute *attr, const char *buf, size_t count)
 {
 	struct qeth_card *card = dev_get_drvdata(dev);
-	char *tmp;
-	int i, rc = 0;
+	struct qeth_qdio_out_q *queue;
+	unsigned int i;
+	bool reset;
+	int rc;
 
 	if (!card)
 		return -EINVAL;
 
-	mutex_lock(&card->conf_mutex);
-	i = simple_strtoul(buf, &tmp, 16);
-	if ((i == 0) || (i == 1)) {
-		if (i == card->options.performance_stats)
-			goto out;
-		card->options.performance_stats = i;
-		if (i == 0)
-			memset(&card->perf_stats, 0,
-				sizeof(struct qeth_perf_stats));
-		card->perf_stats.initial_rx_packets = card->stats.rx_packets;
-		card->perf_stats.initial_tx_packets = card->stats.tx_packets;
-	} else
-		rc = -EINVAL;
-out:
-	mutex_unlock(&card->conf_mutex);
-	return rc ? rc : count;
+	rc = kstrtobool(buf, &reset);
+	if (rc)
+		return rc;
+
+	if (reset) {
+		memset(&card->stats, 0, sizeof(card->stats));
+		for (i = 0; i < card->qdio.no_out_queues; i++) {
+			queue = card->qdio.out_qs[i];
+			if (!queue)
+				break;
+			memset(&queue->stats, 0, sizeof(queue->stats));
+		}
+	}
+
+	return count;
 }
 
 static DEVICE_ATTR(performance_stats, 0644, qeth_dev_performance_stats_show,
diff --git a/drivers/s390/net/qeth_ethtool.c b/drivers/s390/net/qeth_ethtool.c
index a275f31150b2..9ce5ef5fcf7a 100644
--- a/drivers/s390/net/qeth_ethtool.c
+++ b/drivers/s390/net/qeth_ethtool.c
@@ -9,43 +9,82 @@
 #include <linux/ethtool.h>
 #include "qeth_core.h"
 
-static struct {
-	const char str[ETH_GSTRING_LEN];
-} qeth_ethtool_stats_keys[] = {
-/*  0 */{"rx skbs"},
-	{"rx buffers"},
-	{"tx skbs"},
-	{"tx buffers"},
-	{"tx skbs no packing"},
-	{"tx buffers no packing"},
-	{"tx skbs packing"},
-	{"tx buffers packing"},
-	{"tx sg skbs"},
-	{"tx buffer elements"},
-/* 10 */{"rx sg skbs"},
-	{"rx sg frags"},
-	{"rx sg page allocs"},
-	{"tx large kbytes"},
-	{"tx large count"},
-	{"tx pk state ch n->p"},
-	{"tx pk state ch p->n"},
-	{"tx pk watermark low"},
-	{"tx pk watermark high"},
-	{"queue 0 buffer usage"},
-/* 20 */{"queue 1 buffer usage"},
-	{"queue 2 buffer usage"},
-	{"queue 3 buffer usage"},
-	{"tx csum"},
-	{"tx lin"},
-	{"tx linfail"},
-	{"rx csum"}
+
+#define QETH_TXQ_STAT(_name, _stat) { \
+	.name = _name, \
+	.offset = offsetof(struct qeth_out_q_stats, _stat) \
+}
+
+#define QETH_CARD_STAT(_name, _stat) { \
+	.name = _name, \
+	.offset = offsetof(struct qeth_card_stats, _stat) \
+}
+
+struct qeth_stats {
+	char name[ETH_GSTRING_LEN];
+	unsigned int offset;
+};
+
+static const struct qeth_stats txq_stats[] = {
+	QETH_TXQ_STAT("IO buffers", bufs),
+	QETH_TXQ_STAT("IO buffer elements", buf_elements),
+	QETH_TXQ_STAT("packed IO buffers", bufs_pack),
+	QETH_TXQ_STAT("skbs", tx_packets),
+	QETH_TXQ_STAT("packed skbs", skbs_pack),
+	QETH_TXQ_STAT("SG skbs", skbs_sg),
+	QETH_TXQ_STAT("HW csum skbs", skbs_csum),
+	QETH_TXQ_STAT("TSO skbs", skbs_tso),
+	QETH_TXQ_STAT("linearized skbs", skbs_linearized),
+	QETH_TXQ_STAT("linearized+error skbs", skbs_linearized_fail),
+	QETH_TXQ_STAT("TSO bytes", tso_bytes),
+	QETH_TXQ_STAT("Packing mode switches", packing_mode_switch),
+};
+
+static const struct qeth_stats card_stats[] = {
+	QETH_CARD_STAT("rx0 IO buffers", rx_bufs),
+	QETH_CARD_STAT("rx0 HW csum skbs", rx_skb_csum),
+	QETH_CARD_STAT("rx0 SG skbs", rx_sg_skbs),
+	QETH_CARD_STAT("rx0 SG page frags", rx_sg_frags),
+	QETH_CARD_STAT("rx0 SG page allocs", rx_sg_alloc_page),
 };
 
+#define TXQ_STATS_LEN	ARRAY_SIZE(txq_stats)
+#define CARD_STATS_LEN	ARRAY_SIZE(card_stats)
+
+static void qeth_add_stat_data(u64 **dst, void *src,
+			       const struct qeth_stats stats[],
+			       unsigned int size)
+{
+	unsigned int i;
+	char *stat;
+
+	for (i = 0; i < size; i++) {
+		stat = (char *)src + stats[i].offset;
+		**dst = *(u64 *)stat;
+		(*dst)++;
+	}
+}
+
+static void qeth_add_stat_strings(u8 **data, const char *prefix,
+				  const struct qeth_stats stats[],
+				  unsigned int size)
+{
+	unsigned int i;
+
+	for (i = 0; i < size; i++) {
+		snprintf(*data, ETH_GSTRING_LEN, "%s%s", prefix, stats[i].name);
+		*data += ETH_GSTRING_LEN;
+	}
+}
+
 static int qeth_get_sset_count(struct net_device *dev, int stringset)
 {
+	struct qeth_card *card = dev->ml_priv;
+
 	switch (stringset) {
 	case ETH_SS_STATS:
-		return (sizeof(qeth_ethtool_stats_keys) / ETH_GSTRING_LEN);
+		return CARD_STATS_LEN +
+		       card->qdio.no_out_queues * TXQ_STATS_LEN;
 	default:
 		return -EINVAL;
 	}
@@ -55,48 +94,29 @@ static void qeth_get_ethtool_stats(struct net_device *dev,
 				   struct ethtool_stats *stats, u64 *data)
 {
 	struct qeth_card *card = dev->ml_priv;
+	unsigned int i;
 
-	data[0] = card->stats.rx_packets -
-				card->perf_stats.initial_rx_packets;
-	data[1] = card->perf_stats.bufs_rec;
-	data[2] = card->stats.tx_packets -
-				card->perf_stats.initial_tx_packets;
-	data[3] = card->perf_stats.bufs_sent;
-	data[4] = card->stats.tx_packets - card->perf_stats.initial_tx_packets
-			- card->perf_stats.skbs_sent_pack;
-	data[5] = card->perf_stats.bufs_sent - card->perf_stats.bufs_sent_pack;
-	data[6] = card->perf_stats.skbs_sent_pack;
-	data[7] = card->perf_stats.bufs_sent_pack;
-	data[8] = card->perf_stats.sg_skbs_sent;
-	data[9] = card->perf_stats.buf_elements_sent;
-	data[10] = card->perf_stats.sg_skbs_rx;
-	data[11] = card->perf_stats.sg_frags_rx;
-	data[12] = card->perf_stats.sg_alloc_page_rx;
-	data[13] = (card->perf_stats.large_send_bytes >> 10);
-	data[14] = card->perf_stats.large_send_cnt;
-	data[15] = card->perf_stats.sc_dp_p;
-	data[16] = card->perf_stats.sc_p_dp;
-	data[17] = QETH_LOW_WATERMARK_PACK;
-	data[18] = QETH_HIGH_WATERMARK_PACK;
-	data[19] = atomic_read(&card->qdio.out_qs[0]->used_buffers);
-	data[20] = (card->qdio.no_out_queues > 1) ?
-			atomic_read(&card->qdio.out_qs[1]->used_buffers) : 0;
-	data[21] = (card->qdio.no_out_queues > 2) ?
-			atomic_read(&card->qdio.out_qs[2]->used_buffers) : 0;
-	data[22] = (card->qdio.no_out_queues > 3) ?
-			atomic_read(&card->qdio.out_qs[3]->used_buffers) : 0;
-	data[23] = card->perf_stats.tx_csum;
-	data[24] = card->perf_stats.tx_lin;
-	data[25] = card->perf_stats.tx_linfail;
-	data[26] = card->perf_stats.rx_csum;
+	qeth_add_stat_data(&data, &card->stats, card_stats, CARD_STATS_LEN);
+	for (i = 0; i < card->qdio.no_out_queues; i++)
+		qeth_add_stat_data(&data, &card->qdio.out_qs[i]->stats,
+				   txq_stats, TXQ_STATS_LEN);
 }
 
 static void qeth_get_strings(struct net_device *dev, u32 stringset, u8 *data)
 {
+	struct qeth_card *card = dev->ml_priv;
+	char prefix[ETH_GSTRING_LEN] = "";
+	unsigned int i;
+
 	switch (stringset) {
 	case ETH_SS_STATS:
-		memcpy(data, &qeth_ethtool_stats_keys,
-			sizeof(qeth_ethtool_stats_keys));
+		qeth_add_stat_strings(&data, prefix, card_stats,
+				      CARD_STATS_LEN);
+		for (i = 0; i < card->qdio.no_out_queues; i++) {
+			snprintf(prefix, ETH_GSTRING_LEN, "tx%u ", i);
+			qeth_add_stat_strings(&data, prefix, txq_stats,
+					      TXQ_STATS_LEN);
+		}
 		break;
 	default:
 		WARN_ON(1);
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index dbcba77fd94c..b1280a155953 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -174,9 +174,9 @@ static int qeth_l2_get_cast_type(struct qeth_card *card, struct sk_buff *skb)
 	return RTN_UNICAST;
 }
 
-static void qeth_l2_fill_header(struct qeth_card *card, struct qeth_hdr *hdr,
-				struct sk_buff *skb, int ipv, int cast_type,
-				unsigned int data_len)
+static void qeth_l2_fill_header(struct qeth_qdio_out_q *queue,
+				struct qeth_hdr *hdr, struct sk_buff *skb,
+				int ipv, int cast_type, unsigned int data_len)
 {
 	struct vlan_ethhdr *veth = vlan_eth_hdr(skb);
 
@@ -188,8 +188,7 @@ static void qeth_l2_fill_header(struct qeth_card *card, struct qeth_hdr *hdr,
 		hdr->hdr.l2.id = QETH_HEADER_TYPE_LAYER2;
 		if (skb->ip_summed == CHECKSUM_PARTIAL) {
 			qeth_tx_csum(skb, &hdr->hdr.l2.flags[1], ipv);
-			if (card->options.performance_stats)
-				card->perf_stats.tx_csum++;
+			QETH_TXQ_STAT_INC(queue, skbs_csum);
 		}
 	}
 
@@ -369,8 +368,8 @@ static int qeth_l2_process_inbound_buffer(struct qeth_card *card,
 		}
 		work_done++;
 		budget--;
-		card->stats.rx_packets++;
-		card->stats.rx_bytes += len;
+		QETH_CARD_STAT_INC(card, rx_packets);
+		QETH_CARD_STAT_ADD(card, rx_bytes, len);
 	}
 	return work_done;
 }
@@ -626,12 +625,13 @@ static netdev_tx_t qeth_l2_hard_start_xmit(struct sk_buff *skb,
 	int tx_bytes = skb->len;
 	int rc;
 
+	queue = qeth_get_tx_queue(card, skb, ipv, cast_type);
+
 	if (card->state != CARD_STATE_UP) {
-		card->stats.tx_carrier_errors++;
+		QETH_TXQ_STAT_INC(queue, tx_carrier_errors);
 		goto tx_drop;
 	}
 
-	queue = qeth_get_tx_queue(card, skb, ipv, cast_type);
 	netif_stop_queue(dev);
 
 	if (IS_OSN(card))
@@ -641,8 +641,8 @@ static netdev_tx_t qeth_l2_hard_start_xmit(struct sk_buff *skb,
 			       qeth_l2_fill_header);
 
 	if (!rc) {
-		card->stats.tx_packets++;
-		card->stats.tx_bytes += tx_bytes;
+		QETH_TXQ_STAT_INC(queue, tx_packets);
+		QETH_TXQ_STAT_ADD(queue, tx_bytes, tx_bytes);
 		netif_wake_queue(dev);
 		return NETDEV_TX_OK;
 	} else if (rc == -EBUSY) {
@@ -650,8 +650,8 @@ static netdev_tx_t qeth_l2_hard_start_xmit(struct sk_buff *skb,
 	} /* else fall through */
 
 tx_drop:
-	card->stats.tx_dropped++;
-	card->stats.tx_errors++;
+	QETH_TXQ_STAT_INC(queue, tx_dropped);
+	QETH_TXQ_STAT_INC(queue, tx_errors);
 	dev_kfree_skb_any(skb);
 	netif_wake_queue(dev);
 	return NETDEV_TX_OK;
@@ -699,7 +699,7 @@ static void qeth_l2_remove_device(struct ccwgroup_device *cgdev)
 static const struct net_device_ops qeth_l2_netdev_ops = {
 	.ndo_open		= qeth_open,
 	.ndo_stop		= qeth_stop,
-	.ndo_get_stats		= qeth_get_stats,
+	.ndo_get_stats64	= qeth_get_stats64,
 	.ndo_start_xmit		= qeth_l2_hard_start_xmit,
 	.ndo_features_check	= qeth_features_check,
 	.ndo_validate_addr	= qeth_l2_validate_addr,
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index 40c7d53f5512..07c3149e228c 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -1315,12 +1315,11 @@ static void qeth_l3_rebuild_skb(struct qeth_card *card, struct sk_buff *skb,
 				ip_eth_mc_map(ip_hdr(skb)->daddr, tg_addr);
 			else
 				ipv6_eth_mc_map(&ipv6_hdr(skb)->daddr, tg_addr);
-
-			card->stats.multicast++;
+			QETH_CARD_STAT_INC(card, rx_multicast);
 			break;
 		case QETH_CAST_BROADCAST:
 			ether_addr_copy(tg_addr, card->dev->broadcast);
-			card->stats.multicast++;
+			QETH_CARD_STAT_INC(card, rx_multicast);
 			break;
 		default:
 			if (card->options.sniffer)
@@ -1401,8 +1400,8 @@ static int qeth_l3_process_inbound_buffer(struct qeth_card *card,
 		}
 		work_done++;
 		budget--;
-		card->stats.rx_packets++;
-		card->stats.rx_bytes += len;
+		QETH_CARD_STAT_INC(card, rx_packets);
+		QETH_CARD_STAT_ADD(card, rx_bytes, len);
 	}
 	return work_done;
 }
@@ -1945,12 +1944,13 @@ static u8 qeth_l3_cast_type_to_flag(int cast_type)
 	return QETH_CAST_UNICAST;
 }
 
-static void qeth_l3_fill_header(struct qeth_card *card, struct qeth_hdr *hdr,
-				struct sk_buff *skb, int ipv, int cast_type,
-				unsigned int data_len)
+static void qeth_l3_fill_header(struct qeth_qdio_out_q *queue,
+				struct qeth_hdr *hdr, struct sk_buff *skb,
+				int ipv, int cast_type, unsigned int data_len)
 {
 	struct qeth_hdr_layer3 *l3_hdr = &hdr->hdr.l3;
 	struct vlan_ethhdr *veth = vlan_eth_hdr(skb);
+	struct qeth_card *card = queue->card;
 
 	hdr->hdr.l3.length = data_len;
 
@@ -1972,8 +1972,7 @@ static void qeth_l3_fill_header(struct qeth_card *card, struct qeth_hdr *hdr,
 			/* some HW requires combined L3+L4 csum offload: */
 			if (ipv == 4)
 				hdr->hdr.l3.ext_flags |= QETH_HDR_EXT_CSUM_HDR_REQ;
-			if (card->options.performance_stats)
-				card->perf_stats.tx_csum++;
+			QETH_TXQ_STAT_INC(queue, skbs_csum);
 		}
 	}
 
@@ -2074,6 +2073,8 @@ static netdev_tx_t qeth_l3_hard_start_xmit(struct sk_buff *skb,
 	int tx_bytes = skb->len;
 	int rc;
 
+	queue = qeth_get_tx_queue(card, skb, ipv, cast_type);
+
 	if (IS_IQD(card)) {
 		if (card->options.sniffer)
 			goto tx_drop;
@@ -2084,14 +2085,13 @@ static netdev_tx_t qeth_l3_hard_start_xmit(struct sk_buff *skb,
 	}
 
 	if (card->state != CARD_STATE_UP) {
-		card->stats.tx_carrier_errors++;
+		QETH_TXQ_STAT_INC(queue, tx_carrier_errors);
 		goto tx_drop;
 	}
 
 	if (cast_type == RTN_BROADCAST && !card->info.broadcast_capable)
 		goto tx_drop;
 
-	queue = qeth_get_tx_queue(card, skb, ipv, cast_type);
 	netif_stop_queue(dev);
 
 	if (ipv == 4 || IS_IQD(card))
@@ -2101,8 +2101,8 @@ static netdev_tx_t qeth_l3_hard_start_xmit(struct sk_buff *skb,
 			       qeth_l3_fill_header);
 
 	if (!rc) {
-		card->stats.tx_packets++;
-		card->stats.tx_bytes += tx_bytes;
+		QETH_TXQ_STAT_INC(queue, tx_packets);
+		QETH_TXQ_STAT_ADD(queue, tx_bytes, tx_bytes);
 		netif_wake_queue(dev);
 		return NETDEV_TX_OK;
 	} else if (rc == -EBUSY) {
@@ -2110,8 +2110,8 @@ static netdev_tx_t qeth_l3_hard_start_xmit(struct sk_buff *skb,
 	} /* else fall through */
 
 tx_drop:
-	card->stats.tx_dropped++;
-	card->stats.tx_errors++;
+	QETH_TXQ_STAT_INC(queue, tx_dropped);
+	QETH_TXQ_STAT_INC(queue, tx_errors);
 	dev_kfree_skb_any(skb);
 	netif_wake_queue(dev);
 	return NETDEV_TX_OK;
@@ -2153,7 +2153,7 @@ static netdev_features_t qeth_l3_osa_features_check(struct sk_buff *skb,
 static const struct net_device_ops qeth_l3_netdev_ops = {
 	.ndo_open		= qeth_open,
 	.ndo_stop		= qeth_stop,
-	.ndo_get_stats		= qeth_get_stats,
+	.ndo_get_stats64	= qeth_get_stats64,
 	.ndo_start_xmit		= qeth_l3_hard_start_xmit,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_rx_mode	= qeth_l3_set_rx_mode,
@@ -2168,7 +2168,7 @@ static const struct net_device_ops qeth_l3_netdev_ops = {
 static const struct net_device_ops qeth_l3_osa_netdev_ops = {
 	.ndo_open		= qeth_open,
 	.ndo_stop		= qeth_stop,
-	.ndo_get_stats		= qeth_get_stats,
+	.ndo_get_stats64	= qeth_get_stats64,
 	.ndo_start_xmit		= qeth_l3_hard_start_xmit,
 	.ndo_features_check	= qeth_l3_osa_features_check,
 	.ndo_validate_addr	= eth_validate_addr,
-- 
2.16.4


^ permalink raw reply related

* [PATCH net-next 7/7] s390/qeth: split out OSN netdev ops
From: Julian Wiedmann @ 2019-02-15 18:22 UTC (permalink / raw)
  To: David Miller
  Cc: netdev, linux-s390, Martin Schwidefsky, Heiko Carstens,
	Stefan Raspl, Ursula Braun, Julian Wiedmann
In-Reply-To: <20190215182231.90709-1-jwi@linux.ibm.com>

Rather than special-casing OSN in a number of places, just give this
device type its own netdev_ops structure.

When setting up the OSN net_device, also skip the handling of the
various HW offloads (eg TSO). The device shouldn't be advertising any of
them, and the OSN code paths in qeth don't have support for them.
In particular RX VLAN filtering is not supported, so don't hook up those
callbacks in the netdev_ops.

Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
---
 drivers/s390/net/qeth_core_main.c |  3 ---
 drivers/s390/net/qeth_core_mpc.h  |  1 +
 drivers/s390/net/qeth_l2_main.c   | 32 ++++++++++++++++++++------------
 3 files changed, 21 insertions(+), 15 deletions(-)

diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 6d93bb48b1d9..4708df39f129 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -5850,9 +5850,6 @@ int qeth_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 	if (!card)
 		return -ENODEV;
 
-	if (card->info.type == QETH_CARD_TYPE_OSN)
-		return -EPERM;
-
 	switch (cmd) {
 	case SIOC_QETH_ADP_SET_SNMP_CONTROL:
 		rc = qeth_snmp_command(card, rq->ifr_ifru.ifru_data);
diff --git a/drivers/s390/net/qeth_core_mpc.h b/drivers/s390/net/qeth_core_mpc.h
index 4b1690380ebe..f8c5d4a9be13 100644
--- a/drivers/s390/net/qeth_core_mpc.h
+++ b/drivers/s390/net/qeth_core_mpc.h
@@ -81,6 +81,7 @@ enum qeth_card_types {
 #define IS_OSD(card)	((card)->info.type == QETH_CARD_TYPE_OSD)
 #define IS_OSM(card)	((card)->info.type == QETH_CARD_TYPE_OSM)
 #define IS_OSN(card)	((card)->info.type == QETH_CARD_TYPE_OSN)
+#define IS_OSX(card)	((card)->info.type == QETH_CARD_TYPE_OSX)
 #define IS_VM_NIC(card)	((card)->info.guestlan)
 
 #define QETH_MPC_DIFINFO_LEN_INDICATES_LINK_TYPE 0x18
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index b1280a155953..2c9714215775 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -425,7 +425,7 @@ static int qeth_l2_validate_addr(struct net_device *dev)
 {
 	struct qeth_card *card = dev->ml_priv;
 
-	if (IS_OSN(card) || (card->info.mac_bits & QETH_LAYER2_MAC_REGISTERED))
+	if (card->info.mac_bits & QETH_LAYER2_MAC_REGISTERED)
 		return eth_validate_addr(dev);
 
 	QETH_CARD_TEXT(card, 4, "nomacadr");
@@ -441,9 +441,7 @@ static int qeth_l2_set_mac_address(struct net_device *dev, void *p)
 
 	QETH_CARD_TEXT(card, 3, "setmac");
 
-	if (card->info.type == QETH_CARD_TYPE_OSN ||
-	    card->info.type == QETH_CARD_TYPE_OSM ||
-	    card->info.type == QETH_CARD_TYPE_OSX) {
+	if (IS_OSM(card) || IS_OSX(card)) {
 		QETH_CARD_TEXT(card, 3, "setmcTYP");
 		return -EOPNOTSUPP;
 	}
@@ -537,9 +535,6 @@ static void qeth_l2_set_rx_mode(struct net_device *dev)
 	int i;
 	int rc;
 
-	if (card->info.type == QETH_CARD_TYPE_OSN)
-		return;
-
 	QETH_CARD_TEXT(card, 3, "setmulti");
 
 	spin_lock_bh(&card->mclock);
@@ -713,16 +708,28 @@ static const struct net_device_ops qeth_l2_netdev_ops = {
 	.ndo_set_features	= qeth_set_features
 };
 
+static const struct net_device_ops qeth_osn_netdev_ops = {
+	.ndo_open		= qeth_open,
+	.ndo_stop		= qeth_stop,
+	.ndo_get_stats64	= qeth_get_stats64,
+	.ndo_start_xmit		= qeth_l2_hard_start_xmit,
+	.ndo_validate_addr	= eth_validate_addr,
+	.ndo_tx_timeout		= qeth_tx_timeout,
+};
+
 static int qeth_l2_setup_netdev(struct qeth_card *card, bool carrier_ok)
 {
 	int rc;
 
-	card->dev->priv_flags |= IFF_UNICAST_FLT;
-	card->dev->netdev_ops = &qeth_l2_netdev_ops;
-	if (IS_OSN(card))
+	if (IS_OSN(card)) {
+		card->dev->netdev_ops = &qeth_osn_netdev_ops;
 		card->dev->flags |= IFF_NOARP;
-	else
-		card->dev->needed_headroom = sizeof(struct qeth_hdr);
+		goto add_napi;
+	}
+
+	card->dev->needed_headroom = sizeof(struct qeth_hdr);
+	card->dev->netdev_ops = &qeth_l2_netdev_ops;
+	card->dev->priv_flags |= IFF_UNICAST_FLT;
 
 	if (IS_OSM(card)) {
 		card->dev->features |= NETIF_F_VLAN_CHALLENGED;
@@ -764,6 +771,7 @@ static int qeth_l2_setup_netdev(struct qeth_card *card, bool carrier_ok)
 				       PAGE_SIZE * (QDIO_MAX_ELEMENTS_PER_BUFFER - 1));
 	}
 
+add_napi:
 	netif_napi_add(card->dev, &card->napi, qeth_poll, QETH_NAPI_WEIGHT);
 	rc = register_netdev(card->dev);
 	if (!rc && carrier_ok)
-- 
2.16.4


^ permalink raw reply related

* [PATCH net-next 4/7] s390/qeth: move ethtool code into its own file
From: Julian Wiedmann @ 2019-02-15 18:22 UTC (permalink / raw)
  To: David Miller
  Cc: netdev, linux-s390, Martin Schwidefsky, Heiko Carstens,
	Stefan Raspl, Ursula Braun, Julian Wiedmann
In-Reply-To: <20190215182231.90709-1-jwi@linux.ibm.com>

Most of this is self-contained code.

Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
---
 drivers/s390/net/Makefile         |   2 +-
 drivers/s390/net/qeth_core.h      |  12 +-
 drivers/s390/net/qeth_core_main.c | 317 +-----------------------------------
 drivers/s390/net/qeth_ethtool.c   | 333 ++++++++++++++++++++++++++++++++++++++
 drivers/s390/net/qeth_l2_main.c   |  23 +--
 drivers/s390/net/qeth_l3_main.c   |  10 --
 6 files changed, 346 insertions(+), 351 deletions(-)
 create mode 100644 drivers/s390/net/qeth_ethtool.c

diff --git a/drivers/s390/net/Makefile b/drivers/s390/net/Makefile
index f2d6bbe57a6f..bc55ec316adb 100644
--- a/drivers/s390/net/Makefile
+++ b/drivers/s390/net/Makefile
@@ -9,7 +9,7 @@ obj-$(CONFIG_NETIUCV) += netiucv.o fsm.o
 obj-$(CONFIG_SMSGIUCV) += smsgiucv.o
 obj-$(CONFIG_SMSGIUCV_EVENT) += smsgiucv_app.o
 obj-$(CONFIG_LCS) += lcs.o
-qeth-y += qeth_core_sys.o qeth_core_main.o qeth_core_mpc.o
+qeth-y += qeth_core_sys.o qeth_core_main.o qeth_core_mpc.o qeth_ethtool.o
 obj-$(CONFIG_QETH) += qeth.o
 qeth_l2-y += qeth_l2_main.o qeth_l2_sys.o
 obj-$(CONFIG_QETH_L2) += qeth_l2.o
diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index 9928728649eb..5e08f112db13 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -18,7 +18,6 @@
 #include <linux/in6.h>
 #include <linux/bitops.h>
 #include <linux/seq_file.h>
-#include <linux/ethtool.h>
 #include <linux/hashtable.h>
 #include <linux/ip.h>
 #include <linux/refcount.h>
@@ -922,6 +921,8 @@ static inline struct qeth_qdio_out_q *qeth_get_tx_queue(struct qeth_card *card,
 
 extern struct qeth_discipline qeth_l2_discipline;
 extern struct qeth_discipline qeth_l3_discipline;
+extern const struct ethtool_ops qeth_ethtool_ops;
+extern const struct ethtool_ops qeth_osn_ethtool_ops;
 extern const struct attribute_group *qeth_generic_attr_groups[];
 extern const struct attribute_group *qeth_osn_attr_groups[];
 extern const struct attribute_group qeth_device_attr_group;
@@ -976,20 +977,15 @@ void qeth_prepare_ipa_cmd(struct qeth_card *card, struct qeth_cmd_buffer *iob,
 struct qeth_cmd_buffer *qeth_wait_for_buffer(struct qeth_channel *);
 int qeth_query_switch_attributes(struct qeth_card *card,
 				  struct qeth_switch_info *sw_info);
+int qeth_query_card_info(struct qeth_card *card,
+			 struct carrier_info *carrier_info);
 unsigned int qeth_count_elements(struct sk_buff *skb, unsigned int data_offset);
 int qeth_do_send_packet(struct qeth_card *card, struct qeth_qdio_out_q *queue,
 			struct sk_buff *skb, struct qeth_hdr *hdr,
 			unsigned int offset, unsigned int hd_len,
 			int elements_needed);
 int qeth_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
-int qeth_core_get_sset_count(struct net_device *, int);
-void qeth_core_get_ethtool_stats(struct net_device *,
-				struct ethtool_stats *, u64 *);
-void qeth_core_get_strings(struct net_device *, u32, u8 *);
-void qeth_core_get_drvinfo(struct net_device *, struct ethtool_drvinfo *);
 void qeth_dbf_longtext(debug_info_t *id, int level, char *text, ...);
-int qeth_core_ethtool_get_link_ksettings(struct net_device *netdev,
-					 struct ethtool_link_ksettings *cmd);
 int qeth_set_access_ctrl_online(struct qeth_card *card, int fallback);
 int qeth_configure_cq(struct qeth_card *, enum qeth_cq);
 int qeth_hw_trap(struct qeth_card *, enum qeth_diags_trap_action);
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index e3127e232fb2..96e4876a4daa 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -4698,8 +4698,8 @@ static int qeth_query_card_info_cb(struct qeth_card *card,
 	return 0;
 }
 
-static int qeth_query_card_info(struct qeth_card *card,
-				struct carrier_info *carrier_info)
+int qeth_query_card_info(struct qeth_card *card,
+			 struct carrier_info *carrier_info)
 {
 	struct qeth_cmd_buffer *iob;
 
@@ -5623,7 +5623,10 @@ static struct net_device *qeth_alloc_netdev(struct qeth_card *card)
 	SET_NETDEV_DEV(dev, &card->gdev->dev);
 	netif_carrier_off(dev);
 
-	if (!IS_OSN(card)) {
+	if (IS_OSN(card)) {
+		dev->ethtool_ops = &qeth_osn_ethtool_ops;
+	} else {
+		dev->ethtool_ops = &qeth_ethtool_ops;
 		dev->priv_flags &= ~IFF_TX_SKB_SHARING;
 		dev->hw_features |= NETIF_F_SG;
 		dev->vlan_features |= NETIF_F_SG;
@@ -5911,314 +5914,6 @@ int qeth_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 }
 EXPORT_SYMBOL_GPL(qeth_do_ioctl);
 
-static struct {
-	const char str[ETH_GSTRING_LEN];
-} qeth_ethtool_stats_keys[] = {
-/*  0 */{"rx skbs"},
-	{"rx buffers"},
-	{"tx skbs"},
-	{"tx buffers"},
-	{"tx skbs no packing"},
-	{"tx buffers no packing"},
-	{"tx skbs packing"},
-	{"tx buffers packing"},
-	{"tx sg skbs"},
-	{"tx buffer elements"},
-/* 10 */{"rx sg skbs"},
-	{"rx sg frags"},
-	{"rx sg page allocs"},
-	{"tx large kbytes"},
-	{"tx large count"},
-	{"tx pk state ch n->p"},
-	{"tx pk state ch p->n"},
-	{"tx pk watermark low"},
-	{"tx pk watermark high"},
-	{"queue 0 buffer usage"},
-/* 20 */{"queue 1 buffer usage"},
-	{"queue 2 buffer usage"},
-	{"queue 3 buffer usage"},
-	{"tx csum"},
-	{"tx lin"},
-	{"tx linfail"},
-	{"rx csum"}
-};
-
-int qeth_core_get_sset_count(struct net_device *dev, int stringset)
-{
-	switch (stringset) {
-	case ETH_SS_STATS:
-		return (sizeof(qeth_ethtool_stats_keys) / ETH_GSTRING_LEN);
-	default:
-		return -EINVAL;
-	}
-}
-EXPORT_SYMBOL_GPL(qeth_core_get_sset_count);
-
-void qeth_core_get_ethtool_stats(struct net_device *dev,
-		struct ethtool_stats *stats, u64 *data)
-{
-	struct qeth_card *card = dev->ml_priv;
-	data[0] = card->stats.rx_packets -
-				card->perf_stats.initial_rx_packets;
-	data[1] = card->perf_stats.bufs_rec;
-	data[2] = card->stats.tx_packets -
-				card->perf_stats.initial_tx_packets;
-	data[3] = card->perf_stats.bufs_sent;
-	data[4] = card->stats.tx_packets - card->perf_stats.initial_tx_packets
-			- card->perf_stats.skbs_sent_pack;
-	data[5] = card->perf_stats.bufs_sent - card->perf_stats.bufs_sent_pack;
-	data[6] = card->perf_stats.skbs_sent_pack;
-	data[7] = card->perf_stats.bufs_sent_pack;
-	data[8] = card->perf_stats.sg_skbs_sent;
-	data[9] = card->perf_stats.buf_elements_sent;
-	data[10] = card->perf_stats.sg_skbs_rx;
-	data[11] = card->perf_stats.sg_frags_rx;
-	data[12] = card->perf_stats.sg_alloc_page_rx;
-	data[13] = (card->perf_stats.large_send_bytes >> 10);
-	data[14] = card->perf_stats.large_send_cnt;
-	data[15] = card->perf_stats.sc_dp_p;
-	data[16] = card->perf_stats.sc_p_dp;
-	data[17] = QETH_LOW_WATERMARK_PACK;
-	data[18] = QETH_HIGH_WATERMARK_PACK;
-	data[19] = atomic_read(&card->qdio.out_qs[0]->used_buffers);
-	data[20] = (card->qdio.no_out_queues > 1) ?
-			atomic_read(&card->qdio.out_qs[1]->used_buffers) : 0;
-	data[21] = (card->qdio.no_out_queues > 2) ?
-			atomic_read(&card->qdio.out_qs[2]->used_buffers) : 0;
-	data[22] = (card->qdio.no_out_queues > 3) ?
-			atomic_read(&card->qdio.out_qs[3]->used_buffers) : 0;
-	data[23] = card->perf_stats.tx_csum;
-	data[24] = card->perf_stats.tx_lin;
-	data[25] = card->perf_stats.tx_linfail;
-	data[26] = card->perf_stats.rx_csum;
-}
-EXPORT_SYMBOL_GPL(qeth_core_get_ethtool_stats);
-
-void qeth_core_get_strings(struct net_device *dev, u32 stringset, u8 *data)
-{
-	switch (stringset) {
-	case ETH_SS_STATS:
-		memcpy(data, &qeth_ethtool_stats_keys,
-			sizeof(qeth_ethtool_stats_keys));
-		break;
-	default:
-		WARN_ON(1);
-		break;
-	}
-}
-EXPORT_SYMBOL_GPL(qeth_core_get_strings);
-
-void qeth_core_get_drvinfo(struct net_device *dev,
-		struct ethtool_drvinfo *info)
-{
-	struct qeth_card *card = dev->ml_priv;
-
-	strlcpy(info->driver, IS_LAYER2(card) ? "qeth_l2" : "qeth_l3",
-		sizeof(info->driver));
-	strlcpy(info->version, "1.0", sizeof(info->version));
-	strlcpy(info->fw_version, card->info.mcl_level,
-		sizeof(info->fw_version));
-	snprintf(info->bus_info, sizeof(info->bus_info), "%s/%s/%s",
-		 CARD_RDEV_ID(card), CARD_WDEV_ID(card), CARD_DDEV_ID(card));
-}
-EXPORT_SYMBOL_GPL(qeth_core_get_drvinfo);
-
-/* Helper function to fill 'advertising' and 'supported' which are the same. */
-/* Autoneg and full-duplex are supported and advertised unconditionally.     */
-/* Always advertise and support all speeds up to specified, and only one     */
-/* specified port type.							     */
-static void qeth_set_cmd_adv_sup(struct ethtool_link_ksettings *cmd,
-				int maxspeed, int porttype)
-{
-	ethtool_link_ksettings_zero_link_mode(cmd, supported);
-	ethtool_link_ksettings_zero_link_mode(cmd, advertising);
-	ethtool_link_ksettings_zero_link_mode(cmd, lp_advertising);
-
-	ethtool_link_ksettings_add_link_mode(cmd, supported, Autoneg);
-	ethtool_link_ksettings_add_link_mode(cmd, advertising, Autoneg);
-
-	switch (porttype) {
-	case PORT_TP:
-		ethtool_link_ksettings_add_link_mode(cmd, supported, TP);
-		ethtool_link_ksettings_add_link_mode(cmd, advertising, TP);
-		break;
-	case PORT_FIBRE:
-		ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE);
-		ethtool_link_ksettings_add_link_mode(cmd, advertising, FIBRE);
-		break;
-	default:
-		ethtool_link_ksettings_add_link_mode(cmd, supported, TP);
-		ethtool_link_ksettings_add_link_mode(cmd, advertising, TP);
-		WARN_ON_ONCE(1);
-	}
-
-	/* partially does fall through, to also select lower speeds */
-	switch (maxspeed) {
-	case SPEED_25000:
-		ethtool_link_ksettings_add_link_mode(cmd, supported,
-						     25000baseSR_Full);
-		ethtool_link_ksettings_add_link_mode(cmd, advertising,
-						     25000baseSR_Full);
-		break;
-	case SPEED_10000:
-		ethtool_link_ksettings_add_link_mode(cmd, supported,
-						     10000baseT_Full);
-		ethtool_link_ksettings_add_link_mode(cmd, advertising,
-						     10000baseT_Full);
-	case SPEED_1000:
-		ethtool_link_ksettings_add_link_mode(cmd, supported,
-						     1000baseT_Full);
-		ethtool_link_ksettings_add_link_mode(cmd, advertising,
-						     1000baseT_Full);
-		ethtool_link_ksettings_add_link_mode(cmd, supported,
-						     1000baseT_Half);
-		ethtool_link_ksettings_add_link_mode(cmd, advertising,
-						     1000baseT_Half);
-	case SPEED_100:
-		ethtool_link_ksettings_add_link_mode(cmd, supported,
-						     100baseT_Full);
-		ethtool_link_ksettings_add_link_mode(cmd, advertising,
-						     100baseT_Full);
-		ethtool_link_ksettings_add_link_mode(cmd, supported,
-						     100baseT_Half);
-		ethtool_link_ksettings_add_link_mode(cmd, advertising,
-						     100baseT_Half);
-	case SPEED_10:
-		ethtool_link_ksettings_add_link_mode(cmd, supported,
-						     10baseT_Full);
-		ethtool_link_ksettings_add_link_mode(cmd, advertising,
-						     10baseT_Full);
-		ethtool_link_ksettings_add_link_mode(cmd, supported,
-						     10baseT_Half);
-		ethtool_link_ksettings_add_link_mode(cmd, advertising,
-						     10baseT_Half);
-		/* end fallthrough */
-		break;
-	default:
-		ethtool_link_ksettings_add_link_mode(cmd, supported,
-						     10baseT_Full);
-		ethtool_link_ksettings_add_link_mode(cmd, advertising,
-						     10baseT_Full);
-		ethtool_link_ksettings_add_link_mode(cmd, supported,
-						     10baseT_Half);
-		ethtool_link_ksettings_add_link_mode(cmd, advertising,
-						     10baseT_Half);
-		WARN_ON_ONCE(1);
-	}
-}
-
-int qeth_core_ethtool_get_link_ksettings(struct net_device *netdev,
-		struct ethtool_link_ksettings *cmd)
-{
-	struct qeth_card *card = netdev->ml_priv;
-	enum qeth_link_types link_type;
-	struct carrier_info carrier_info;
-	int rc;
-
-	if ((card->info.type == QETH_CARD_TYPE_IQD) || (card->info.guestlan))
-		link_type = QETH_LINK_TYPE_10GBIT_ETH;
-	else
-		link_type = card->info.link_type;
-
-	cmd->base.duplex = DUPLEX_FULL;
-	cmd->base.autoneg = AUTONEG_ENABLE;
-	cmd->base.phy_address = 0;
-	cmd->base.mdio_support = 0;
-	cmd->base.eth_tp_mdix = ETH_TP_MDI_INVALID;
-	cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI_INVALID;
-
-	switch (link_type) {
-	case QETH_LINK_TYPE_FAST_ETH:
-	case QETH_LINK_TYPE_LANE_ETH100:
-		cmd->base.speed = SPEED_100;
-		cmd->base.port = PORT_TP;
-		break;
-	case QETH_LINK_TYPE_GBIT_ETH:
-	case QETH_LINK_TYPE_LANE_ETH1000:
-		cmd->base.speed = SPEED_1000;
-		cmd->base.port = PORT_FIBRE;
-		break;
-	case QETH_LINK_TYPE_10GBIT_ETH:
-		cmd->base.speed = SPEED_10000;
-		cmd->base.port = PORT_FIBRE;
-		break;
-	case QETH_LINK_TYPE_25GBIT_ETH:
-		cmd->base.speed = SPEED_25000;
-		cmd->base.port = PORT_FIBRE;
-		break;
-	default:
-		cmd->base.speed = SPEED_10;
-		cmd->base.port = PORT_TP;
-	}
-	qeth_set_cmd_adv_sup(cmd, cmd->base.speed, cmd->base.port);
-
-	/* Check if we can obtain more accurate information.	 */
-	/* If QUERY_CARD_INFO command is not supported or fails, */
-	/* just return the heuristics that was filled above.	 */
-	rc = qeth_query_card_info(card, &carrier_info);
-	if (rc == -EOPNOTSUPP) /* for old hardware, return heuristic */
-		return 0;
-	if (rc) /* report error from the hardware operation */
-		return rc;
-	/* on success, fill in the information got from the hardware */
-
-	netdev_dbg(netdev,
-	"card info: card_type=0x%02x, port_mode=0x%04x, port_speed=0x%08x\n",
-			carrier_info.card_type,
-			carrier_info.port_mode,
-			carrier_info.port_speed);
-
-	/* Update attributes for which we've obtained more authoritative */
-	/* information, leave the rest the way they where filled above.  */
-	switch (carrier_info.card_type) {
-	case CARD_INFO_TYPE_1G_COPPER_A:
-	case CARD_INFO_TYPE_1G_COPPER_B:
-		cmd->base.port = PORT_TP;
-		qeth_set_cmd_adv_sup(cmd, SPEED_1000, cmd->base.port);
-		break;
-	case CARD_INFO_TYPE_1G_FIBRE_A:
-	case CARD_INFO_TYPE_1G_FIBRE_B:
-		cmd->base.port = PORT_FIBRE;
-		qeth_set_cmd_adv_sup(cmd, SPEED_1000, cmd->base.port);
-		break;
-	case CARD_INFO_TYPE_10G_FIBRE_A:
-	case CARD_INFO_TYPE_10G_FIBRE_B:
-		cmd->base.port = PORT_FIBRE;
-		qeth_set_cmd_adv_sup(cmd, SPEED_10000, cmd->base.port);
-		break;
-	}
-
-	switch (carrier_info.port_mode) {
-	case CARD_INFO_PORTM_FULLDUPLEX:
-		cmd->base.duplex = DUPLEX_FULL;
-		break;
-	case CARD_INFO_PORTM_HALFDUPLEX:
-		cmd->base.duplex = DUPLEX_HALF;
-		break;
-	}
-
-	switch (carrier_info.port_speed) {
-	case CARD_INFO_PORTS_10M:
-		cmd->base.speed = SPEED_10;
-		break;
-	case CARD_INFO_PORTS_100M:
-		cmd->base.speed = SPEED_100;
-		break;
-	case CARD_INFO_PORTS_1G:
-		cmd->base.speed = SPEED_1000;
-		break;
-	case CARD_INFO_PORTS_10G:
-		cmd->base.speed = SPEED_10000;
-		break;
-	case CARD_INFO_PORTS_25G:
-		cmd->base.speed = SPEED_25000;
-		break;
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(qeth_core_ethtool_get_link_ksettings);
-
 static int qeth_start_csum_cb(struct qeth_card *card, struct qeth_reply *reply,
 			      unsigned long data)
 {
diff --git a/drivers/s390/net/qeth_ethtool.c b/drivers/s390/net/qeth_ethtool.c
new file mode 100644
index 000000000000..a275f31150b2
--- /dev/null
+++ b/drivers/s390/net/qeth_ethtool.c
@@ -0,0 +1,333 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2018
+ */
+
+#define KMSG_COMPONENT "qeth"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/ethtool.h>
+#include "qeth_core.h"
+
+static struct {
+	const char str[ETH_GSTRING_LEN];
+} qeth_ethtool_stats_keys[] = {
+/*  0 */{"rx skbs"},
+	{"rx buffers"},
+	{"tx skbs"},
+	{"tx buffers"},
+	{"tx skbs no packing"},
+	{"tx buffers no packing"},
+	{"tx skbs packing"},
+	{"tx buffers packing"},
+	{"tx sg skbs"},
+	{"tx buffer elements"},
+/* 10 */{"rx sg skbs"},
+	{"rx sg frags"},
+	{"rx sg page allocs"},
+	{"tx large kbytes"},
+	{"tx large count"},
+	{"tx pk state ch n->p"},
+	{"tx pk state ch p->n"},
+	{"tx pk watermark low"},
+	{"tx pk watermark high"},
+	{"queue 0 buffer usage"},
+/* 20 */{"queue 1 buffer usage"},
+	{"queue 2 buffer usage"},
+	{"queue 3 buffer usage"},
+	{"tx csum"},
+	{"tx lin"},
+	{"tx linfail"},
+	{"rx csum"}
+};
+
+static int qeth_get_sset_count(struct net_device *dev, int stringset)
+{
+	switch (stringset) {
+	case ETH_SS_STATS:
+		return (sizeof(qeth_ethtool_stats_keys) / ETH_GSTRING_LEN);
+	default:
+		return -EINVAL;
+	}
+}
+
+static void qeth_get_ethtool_stats(struct net_device *dev,
+				   struct ethtool_stats *stats, u64 *data)
+{
+	struct qeth_card *card = dev->ml_priv;
+
+	data[0] = card->stats.rx_packets -
+				card->perf_stats.initial_rx_packets;
+	data[1] = card->perf_stats.bufs_rec;
+	data[2] = card->stats.tx_packets -
+				card->perf_stats.initial_tx_packets;
+	data[3] = card->perf_stats.bufs_sent;
+	data[4] = card->stats.tx_packets - card->perf_stats.initial_tx_packets
+			- card->perf_stats.skbs_sent_pack;
+	data[5] = card->perf_stats.bufs_sent - card->perf_stats.bufs_sent_pack;
+	data[6] = card->perf_stats.skbs_sent_pack;
+	data[7] = card->perf_stats.bufs_sent_pack;
+	data[8] = card->perf_stats.sg_skbs_sent;
+	data[9] = card->perf_stats.buf_elements_sent;
+	data[10] = card->perf_stats.sg_skbs_rx;
+	data[11] = card->perf_stats.sg_frags_rx;
+	data[12] = card->perf_stats.sg_alloc_page_rx;
+	data[13] = (card->perf_stats.large_send_bytes >> 10);
+	data[14] = card->perf_stats.large_send_cnt;
+	data[15] = card->perf_stats.sc_dp_p;
+	data[16] = card->perf_stats.sc_p_dp;
+	data[17] = QETH_LOW_WATERMARK_PACK;
+	data[18] = QETH_HIGH_WATERMARK_PACK;
+	data[19] = atomic_read(&card->qdio.out_qs[0]->used_buffers);
+	data[20] = (card->qdio.no_out_queues > 1) ?
+			atomic_read(&card->qdio.out_qs[1]->used_buffers) : 0;
+	data[21] = (card->qdio.no_out_queues > 2) ?
+			atomic_read(&card->qdio.out_qs[2]->used_buffers) : 0;
+	data[22] = (card->qdio.no_out_queues > 3) ?
+			atomic_read(&card->qdio.out_qs[3]->used_buffers) : 0;
+	data[23] = card->perf_stats.tx_csum;
+	data[24] = card->perf_stats.tx_lin;
+	data[25] = card->perf_stats.tx_linfail;
+	data[26] = card->perf_stats.rx_csum;
+}
+
+static void qeth_get_strings(struct net_device *dev, u32 stringset, u8 *data)
+{
+	switch (stringset) {
+	case ETH_SS_STATS:
+		memcpy(data, &qeth_ethtool_stats_keys,
+			sizeof(qeth_ethtool_stats_keys));
+		break;
+	default:
+		WARN_ON(1);
+		break;
+	}
+}
+
+static void qeth_get_drvinfo(struct net_device *dev,
+			     struct ethtool_drvinfo *info)
+{
+	struct qeth_card *card = dev->ml_priv;
+
+	strlcpy(info->driver, IS_LAYER2(card) ? "qeth_l2" : "qeth_l3",
+		sizeof(info->driver));
+	strlcpy(info->version, "1.0", sizeof(info->version));
+	strlcpy(info->fw_version, card->info.mcl_level,
+		sizeof(info->fw_version));
+	snprintf(info->bus_info, sizeof(info->bus_info), "%s/%s/%s",
+		 CARD_RDEV_ID(card), CARD_WDEV_ID(card), CARD_DDEV_ID(card));
+}
+
+/* Helper function to fill 'advertising' and 'supported' which are the same. */
+/* Autoneg and full-duplex are supported and advertised unconditionally.     */
+/* Always advertise and support all speeds up to specified, and only one     */
+/* specified port type.							     */
+static void qeth_set_cmd_adv_sup(struct ethtool_link_ksettings *cmd,
+				int maxspeed, int porttype)
+{
+	ethtool_link_ksettings_zero_link_mode(cmd, supported);
+	ethtool_link_ksettings_zero_link_mode(cmd, advertising);
+	ethtool_link_ksettings_zero_link_mode(cmd, lp_advertising);
+
+	ethtool_link_ksettings_add_link_mode(cmd, supported, Autoneg);
+	ethtool_link_ksettings_add_link_mode(cmd, advertising, Autoneg);
+
+	switch (porttype) {
+	case PORT_TP:
+		ethtool_link_ksettings_add_link_mode(cmd, supported, TP);
+		ethtool_link_ksettings_add_link_mode(cmd, advertising, TP);
+		break;
+	case PORT_FIBRE:
+		ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE);
+		ethtool_link_ksettings_add_link_mode(cmd, advertising, FIBRE);
+		break;
+	default:
+		ethtool_link_ksettings_add_link_mode(cmd, supported, TP);
+		ethtool_link_ksettings_add_link_mode(cmd, advertising, TP);
+		WARN_ON_ONCE(1);
+	}
+
+	/* partially does fall through, to also select lower speeds */
+	switch (maxspeed) {
+	case SPEED_25000:
+		ethtool_link_ksettings_add_link_mode(cmd, supported,
+						     25000baseSR_Full);
+		ethtool_link_ksettings_add_link_mode(cmd, advertising,
+						     25000baseSR_Full);
+		break;
+	case SPEED_10000:
+		ethtool_link_ksettings_add_link_mode(cmd, supported,
+						     10000baseT_Full);
+		ethtool_link_ksettings_add_link_mode(cmd, advertising,
+						     10000baseT_Full);
+		/* fall through */
+	case SPEED_1000:
+		ethtool_link_ksettings_add_link_mode(cmd, supported,
+						     1000baseT_Full);
+		ethtool_link_ksettings_add_link_mode(cmd, advertising,
+						     1000baseT_Full);
+		ethtool_link_ksettings_add_link_mode(cmd, supported,
+						     1000baseT_Half);
+		ethtool_link_ksettings_add_link_mode(cmd, advertising,
+						     1000baseT_Half);
+		/* fall through */
+	case SPEED_100:
+		ethtool_link_ksettings_add_link_mode(cmd, supported,
+						     100baseT_Full);
+		ethtool_link_ksettings_add_link_mode(cmd, advertising,
+						     100baseT_Full);
+		ethtool_link_ksettings_add_link_mode(cmd, supported,
+						     100baseT_Half);
+		ethtool_link_ksettings_add_link_mode(cmd, advertising,
+						     100baseT_Half);
+		/* fall through */
+	case SPEED_10:
+		ethtool_link_ksettings_add_link_mode(cmd, supported,
+						     10baseT_Full);
+		ethtool_link_ksettings_add_link_mode(cmd, advertising,
+						     10baseT_Full);
+		ethtool_link_ksettings_add_link_mode(cmd, supported,
+						     10baseT_Half);
+		ethtool_link_ksettings_add_link_mode(cmd, advertising,
+						     10baseT_Half);
+		break;
+	default:
+		ethtool_link_ksettings_add_link_mode(cmd, supported,
+						     10baseT_Full);
+		ethtool_link_ksettings_add_link_mode(cmd, advertising,
+						     10baseT_Full);
+		ethtool_link_ksettings_add_link_mode(cmd, supported,
+						     10baseT_Half);
+		ethtool_link_ksettings_add_link_mode(cmd, advertising,
+						     10baseT_Half);
+		WARN_ON_ONCE(1);
+	}
+}
+
+
+static int qeth_get_link_ksettings(struct net_device *netdev,
+				   struct ethtool_link_ksettings *cmd)
+{
+	struct qeth_card *card = netdev->ml_priv;
+	enum qeth_link_types link_type;
+	struct carrier_info carrier_info;
+	int rc;
+
+	if (IS_IQD(card) || IS_VM_NIC(card))
+		link_type = QETH_LINK_TYPE_10GBIT_ETH;
+	else
+		link_type = card->info.link_type;
+
+	cmd->base.duplex = DUPLEX_FULL;
+	cmd->base.autoneg = AUTONEG_ENABLE;
+	cmd->base.phy_address = 0;
+	cmd->base.mdio_support = 0;
+	cmd->base.eth_tp_mdix = ETH_TP_MDI_INVALID;
+	cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI_INVALID;
+
+	switch (link_type) {
+	case QETH_LINK_TYPE_FAST_ETH:
+	case QETH_LINK_TYPE_LANE_ETH100:
+		cmd->base.speed = SPEED_100;
+		cmd->base.port = PORT_TP;
+		break;
+	case QETH_LINK_TYPE_GBIT_ETH:
+	case QETH_LINK_TYPE_LANE_ETH1000:
+		cmd->base.speed = SPEED_1000;
+		cmd->base.port = PORT_FIBRE;
+		break;
+	case QETH_LINK_TYPE_10GBIT_ETH:
+		cmd->base.speed = SPEED_10000;
+		cmd->base.port = PORT_FIBRE;
+		break;
+	case QETH_LINK_TYPE_25GBIT_ETH:
+		cmd->base.speed = SPEED_25000;
+		cmd->base.port = PORT_FIBRE;
+		break;
+	default:
+		cmd->base.speed = SPEED_10;
+		cmd->base.port = PORT_TP;
+	}
+	qeth_set_cmd_adv_sup(cmd, cmd->base.speed, cmd->base.port);
+
+	/* Check if we can obtain more accurate information.	 */
+	/* If QUERY_CARD_INFO command is not supported or fails, */
+	/* just return the heuristics that was filled above.	 */
+	rc = qeth_query_card_info(card, &carrier_info);
+	if (rc == -EOPNOTSUPP) /* for old hardware, return heuristic */
+		return 0;
+	if (rc) /* report error from the hardware operation */
+		return rc;
+	/* on success, fill in the information got from the hardware */
+
+	netdev_dbg(netdev,
+	"card info: card_type=0x%02x, port_mode=0x%04x, port_speed=0x%08x\n",
+			carrier_info.card_type,
+			carrier_info.port_mode,
+			carrier_info.port_speed);
+
+	/* Update attributes for which we've obtained more authoritative */
+	/* information, leave the rest the way they where filled above.  */
+	switch (carrier_info.card_type) {
+	case CARD_INFO_TYPE_1G_COPPER_A:
+	case CARD_INFO_TYPE_1G_COPPER_B:
+		cmd->base.port = PORT_TP;
+		qeth_set_cmd_adv_sup(cmd, SPEED_1000, cmd->base.port);
+		break;
+	case CARD_INFO_TYPE_1G_FIBRE_A:
+	case CARD_INFO_TYPE_1G_FIBRE_B:
+		cmd->base.port = PORT_FIBRE;
+		qeth_set_cmd_adv_sup(cmd, SPEED_1000, cmd->base.port);
+		break;
+	case CARD_INFO_TYPE_10G_FIBRE_A:
+	case CARD_INFO_TYPE_10G_FIBRE_B:
+		cmd->base.port = PORT_FIBRE;
+		qeth_set_cmd_adv_sup(cmd, SPEED_10000, cmd->base.port);
+		break;
+	}
+
+	switch (carrier_info.port_mode) {
+	case CARD_INFO_PORTM_FULLDUPLEX:
+		cmd->base.duplex = DUPLEX_FULL;
+		break;
+	case CARD_INFO_PORTM_HALFDUPLEX:
+		cmd->base.duplex = DUPLEX_HALF;
+		break;
+	}
+
+	switch (carrier_info.port_speed) {
+	case CARD_INFO_PORTS_10M:
+		cmd->base.speed = SPEED_10;
+		break;
+	case CARD_INFO_PORTS_100M:
+		cmd->base.speed = SPEED_100;
+		break;
+	case CARD_INFO_PORTS_1G:
+		cmd->base.speed = SPEED_1000;
+		break;
+	case CARD_INFO_PORTS_10G:
+		cmd->base.speed = SPEED_10000;
+		break;
+	case CARD_INFO_PORTS_25G:
+		cmd->base.speed = SPEED_25000;
+		break;
+	}
+
+	return 0;
+}
+
+const struct ethtool_ops qeth_ethtool_ops = {
+	.get_link = ethtool_op_get_link,
+	.get_strings = qeth_get_strings,
+	.get_ethtool_stats = qeth_get_ethtool_stats,
+	.get_sset_count = qeth_get_sset_count,
+	.get_drvinfo = qeth_get_drvinfo,
+	.get_link_ksettings = qeth_get_link_ksettings,
+};
+
+const struct ethtool_ops qeth_osn_ethtool_ops = {
+	.get_strings = qeth_get_strings,
+	.get_ethtool_stats = qeth_get_ethtool_stats,
+	.get_sset_count = qeth_get_sset_count,
+	.get_drvinfo = qeth_get_drvinfo,
+};
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index 8931dd6e2caa..dbcba77fd94c 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -696,22 +696,6 @@ static void qeth_l2_remove_device(struct ccwgroup_device *cgdev)
 		unregister_netdev(card->dev);
 }
 
-static const struct ethtool_ops qeth_l2_ethtool_ops = {
-	.get_link = ethtool_op_get_link,
-	.get_strings = qeth_core_get_strings,
-	.get_ethtool_stats = qeth_core_get_ethtool_stats,
-	.get_sset_count = qeth_core_get_sset_count,
-	.get_drvinfo = qeth_core_get_drvinfo,
-	.get_link_ksettings = qeth_core_ethtool_get_link_ksettings,
-};
-
-static const struct ethtool_ops qeth_l2_osn_ops = {
-	.get_strings = qeth_core_get_strings,
-	.get_ethtool_stats = qeth_core_get_ethtool_stats,
-	.get_sset_count = qeth_core_get_sset_count,
-	.get_drvinfo = qeth_core_get_drvinfo,
-};
-
 static const struct net_device_ops qeth_l2_netdev_ops = {
 	.ndo_open		= qeth_open,
 	.ndo_stop		= qeth_stop,
@@ -735,13 +719,10 @@ static int qeth_l2_setup_netdev(struct qeth_card *card, bool carrier_ok)
 
 	card->dev->priv_flags |= IFF_UNICAST_FLT;
 	card->dev->netdev_ops = &qeth_l2_netdev_ops;
-	if (card->info.type == QETH_CARD_TYPE_OSN) {
-		card->dev->ethtool_ops = &qeth_l2_osn_ops;
+	if (IS_OSN(card))
 		card->dev->flags |= IFF_NOARP;
-	} else {
-		card->dev->ethtool_ops = &qeth_l2_ethtool_ops;
+	else
 		card->dev->needed_headroom = sizeof(struct qeth_hdr);
-	}
 
 	if (IS_OSM(card)) {
 		card->dev->features |= NETIF_F_VLAN_CHALLENGED;
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index 012a290f1f4b..40c7d53f5512 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -2117,15 +2117,6 @@ static netdev_tx_t qeth_l3_hard_start_xmit(struct sk_buff *skb,
 	return NETDEV_TX_OK;
 }
 
-static const struct ethtool_ops qeth_l3_ethtool_ops = {
-	.get_link = ethtool_op_get_link,
-	.get_strings = qeth_core_get_strings,
-	.get_ethtool_stats = qeth_core_get_ethtool_stats,
-	.get_sset_count = qeth_core_get_sset_count,
-	.get_drvinfo = qeth_core_get_drvinfo,
-	.get_link_ksettings = qeth_core_ethtool_get_link_ksettings,
-};
-
 /*
  * we need NOARP for IPv4 but we want neighbor solicitation for IPv6. Setting
  * NOARP on the netdevice is no option because it also turns off neighbor
@@ -2247,7 +2238,6 @@ static int qeth_l3_setup_netdev(struct qeth_card *card, bool carrier_ok)
 		return -ENODEV;
 
 	card->dev->needed_headroom = headroom;
-	card->dev->ethtool_ops = &qeth_l3_ethtool_ops;
 	card->dev->features |=	NETIF_F_HW_VLAN_CTAG_TX |
 				NETIF_F_HW_VLAN_CTAG_RX |
 				NETIF_F_HW_VLAN_CTAG_FILTER;
-- 
2.16.4


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox