Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH 1/2] bpf samples: fix compiler errors with sockex2 and sockex3
From: Naveen N. Rao @ 2016-09-23 20:40 UTC (permalink / raw)
  To: linux-kernel, linuxppc-dev, netdev, David S. Miller
  Cc: Michael Ellerman, Alexei Starovoitov, Daniel Borkmann,
	Ananth N Mavinakayanahalli

These samples fail to compile as 'struct flow_keys' conflicts with
definition in net/flow_dissector.h. Fix the same by renaming the
structure used in the sample.

Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
---
 samples/bpf/sockex2_kern.c | 10 +++++-----
 samples/bpf/sockex3_kern.c |  8 ++++----
 samples/bpf/sockex3_user.c |  4 ++--
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/samples/bpf/sockex2_kern.c b/samples/bpf/sockex2_kern.c
index ba0e177..44e5846 100644
--- a/samples/bpf/sockex2_kern.c
+++ b/samples/bpf/sockex2_kern.c
@@ -14,7 +14,7 @@ struct vlan_hdr {
 	__be16 h_vlan_encapsulated_proto;
 };
 
-struct flow_keys {
+struct bpf_flow_keys {
 	__be32 src;
 	__be32 dst;
 	union {
@@ -59,7 +59,7 @@ static inline __u32 ipv6_addr_hash(struct __sk_buff *ctx, __u64 off)
 }
 
 static inline __u64 parse_ip(struct __sk_buff *skb, __u64 nhoff, __u64 *ip_proto,
-			     struct flow_keys *flow)
+			     struct bpf_flow_keys *flow)
 {
 	__u64 verlen;
 
@@ -83,7 +83,7 @@ static inline __u64 parse_ip(struct __sk_buff *skb, __u64 nhoff, __u64 *ip_proto
 }
 
 static inline __u64 parse_ipv6(struct __sk_buff *skb, __u64 nhoff, __u64 *ip_proto,
-			       struct flow_keys *flow)
+			       struct bpf_flow_keys *flow)
 {
 	*ip_proto = load_byte(skb,
 			      nhoff + offsetof(struct ipv6hdr, nexthdr));
@@ -96,7 +96,7 @@ static inline __u64 parse_ipv6(struct __sk_buff *skb, __u64 nhoff, __u64 *ip_pro
 	return nhoff;
 }
 
-static inline bool flow_dissector(struct __sk_buff *skb, struct flow_keys *flow)
+static inline bool flow_dissector(struct __sk_buff *skb, struct bpf_flow_keys *flow)
 {
 	__u64 nhoff = ETH_HLEN;
 	__u64 ip_proto;
@@ -198,7 +198,7 @@ struct bpf_map_def SEC("maps") hash_map = {
 SEC("socket2")
 int bpf_prog2(struct __sk_buff *skb)
 {
-	struct flow_keys flow;
+	struct bpf_flow_keys flow;
 	struct pair *value;
 	u32 key;
 
diff --git a/samples/bpf/sockex3_kern.c b/samples/bpf/sockex3_kern.c
index 41ae2fd..95907f8 100644
--- a/samples/bpf/sockex3_kern.c
+++ b/samples/bpf/sockex3_kern.c
@@ -61,7 +61,7 @@ struct vlan_hdr {
 	__be16 h_vlan_encapsulated_proto;
 };
 
-struct flow_keys {
+struct bpf_flow_keys {
 	__be32 src;
 	__be32 dst;
 	union {
@@ -88,7 +88,7 @@ static inline __u32 ipv6_addr_hash(struct __sk_buff *ctx, __u64 off)
 }
 
 struct globals {
-	struct flow_keys flow;
+	struct bpf_flow_keys flow;
 };
 
 struct bpf_map_def SEC("maps") percpu_map = {
@@ -114,14 +114,14 @@ struct pair {
 
 struct bpf_map_def SEC("maps") hash_map = {
 	.type = BPF_MAP_TYPE_HASH,
-	.key_size = sizeof(struct flow_keys),
+	.key_size = sizeof(struct bpf_flow_keys),
 	.value_size = sizeof(struct pair),
 	.max_entries = 1024,
 };
 
 static void update_stats(struct __sk_buff *skb, struct globals *g)
 {
-	struct flow_keys key = g->flow;
+	struct bpf_flow_keys key = g->flow;
 	struct pair *value;
 
 	value = bpf_map_lookup_elem(&hash_map, &key);
diff --git a/samples/bpf/sockex3_user.c b/samples/bpf/sockex3_user.c
index d4184ab..3fcfd8c4 100644
--- a/samples/bpf/sockex3_user.c
+++ b/samples/bpf/sockex3_user.c
@@ -7,7 +7,7 @@
 #include <arpa/inet.h>
 #include <sys/resource.h>
 
-struct flow_keys {
+struct bpf_flow_keys {
 	__be32 src;
 	__be32 dst;
 	union {
@@ -49,7 +49,7 @@ int main(int argc, char **argv)
 	(void) f;
 
 	for (i = 0; i < 5; i++) {
-		struct flow_keys key = {}, next_key;
+		struct bpf_flow_keys key = {}, next_key;
 		struct pair value;
 
 		sleep(1);
-- 
2.9.3

^ permalink raw reply related

* [PATCH 2/2] bpf samples: update tracex5 sample to use __seccomp_filter
From: Naveen N. Rao @ 2016-09-23 20:40 UTC (permalink / raw)
  To: linux-kernel, linuxppc-dev, netdev, David S. Miller
  Cc: Michael Ellerman, Alexei Starovoitov, Daniel Borkmann,
	Ananth N Mavinakayanahalli
In-Reply-To: <7b29fba6073924dc4c9e7d639eefb0b15a98660d.1474661952.git.naveen.n.rao@linux.vnet.ibm.com>

seccomp_phase1() does not exist anymore. Instead, update sample to use
__seccomp_filter(). While at it, set max locked memory to unlimited.

Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
---
I am not completely sure if __seccomp_filter is the right place to hook
in. This works for me though. Please review.

Thanks,
Naveen


 samples/bpf/tracex5_kern.c | 16 +++++++---------
 samples/bpf/tracex5_user.c |  3 +++
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/samples/bpf/tracex5_kern.c b/samples/bpf/tracex5_kern.c
index f95f232..fd12d71 100644
--- a/samples/bpf/tracex5_kern.c
+++ b/samples/bpf/tracex5_kern.c
@@ -19,20 +19,18 @@ struct bpf_map_def SEC("maps") progs = {
 	.max_entries = 1024,
 };
 
-SEC("kprobe/seccomp_phase1")
+SEC("kprobe/__seccomp_filter")
 int bpf_prog1(struct pt_regs *ctx)
 {
-	struct seccomp_data sd;
-
-	bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM1(ctx));
+	int sc_nr = (int)PT_REGS_PARM1(ctx);
 
 	/* dispatch into next BPF program depending on syscall number */
-	bpf_tail_call(ctx, &progs, sd.nr);
+	bpf_tail_call(ctx, &progs, sc_nr);
 
 	/* fall through -> unknown syscall */
-	if (sd.nr >= __NR_getuid && sd.nr <= __NR_getsid) {
+	if (sc_nr >= __NR_getuid && sc_nr <= __NR_getsid) {
 		char fmt[] = "syscall=%d (one of get/set uid/pid/gid)\n";
-		bpf_trace_printk(fmt, sizeof(fmt), sd.nr);
+		bpf_trace_printk(fmt, sizeof(fmt), sc_nr);
 	}
 	return 0;
 }
@@ -42,7 +40,7 @@ PROG(__NR_write)(struct pt_regs *ctx)
 {
 	struct seccomp_data sd;
 
-	bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM1(ctx));
+	bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx));
 	if (sd.args[2] == 512) {
 		char fmt[] = "write(fd=%d, buf=%p, size=%d)\n";
 		bpf_trace_printk(fmt, sizeof(fmt),
@@ -55,7 +53,7 @@ PROG(__NR_read)(struct pt_regs *ctx)
 {
 	struct seccomp_data sd;
 
-	bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM1(ctx));
+	bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx));
 	if (sd.args[2] > 128 && sd.args[2] <= 1024) {
 		char fmt[] = "read(fd=%d, buf=%p, size=%d)\n";
 		bpf_trace_printk(fmt, sizeof(fmt),
diff --git a/samples/bpf/tracex5_user.c b/samples/bpf/tracex5_user.c
index a04dd3c..36b5925 100644
--- a/samples/bpf/tracex5_user.c
+++ b/samples/bpf/tracex5_user.c
@@ -6,6 +6,7 @@
 #include <sys/prctl.h>
 #include "libbpf.h"
 #include "bpf_load.h"
+#include <sys/resource.h>
 
 /* install fake seccomp program to enable seccomp code path inside the kernel,
  * so that our kprobe attached to seccomp_phase1() can be triggered
@@ -27,8 +28,10 @@ int main(int ac, char **argv)
 {
 	FILE *f;
 	char filename[256];
+	struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
 
 	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+	setrlimit(RLIMIT_MEMLOCK, &r);
 
 	if (load_bpf_file(filename)) {
 		printf("%s", bpf_log_buf);
-- 
2.9.3

^ permalink raw reply related

* Re: [RFC] net: store port/representative id in metadata_dst
From: Jakub Kicinski @ 2016-09-23 20:45 UTC (permalink / raw)
  To: John Fastabend
  Cc: Jakub Kicinski, Samudrala, Sridhar, Jiri Benc, Jiri Pirko, netdev,
	Thomas Graf, Roopa Prabhu, ogerlitz, ast, daniel, simon.horman,
	Paolo Abeni, Pravin B Shelar, hannes
In-Reply-To: <57E58FA6.3050001@gmail.com>

On Fri, 23 Sep 2016 13:25:10 -0700, John Fastabend wrote:
> On 16-09-23 01:17 PM, Jakub Kicinski wrote:
> > On Fri, 23 Sep 2016 10:22:59 -0700, Samudrala, Sridhar wrote:  
> >> On 9/23/2016 8:29 AM, Jakub Kicinski wrote:  
>  [...]  
>  [...]  
> >>
> >> The 'accel' parameter in dev_queue_xmit_accel() is currently only passed
> >> to ndo_select_queue() via netdev_pick_tx() and is used to select the tx 
> >> queue.
> >> Also, it is not passed all the way to the driver specific xmit routine.  
> >> Doesn't it require
> >> changing all the driver xmit routines if we want to pass this parameter?
> >>  
>  [...]  
> >>
> >> Yes.  The VFPR netdevs don't have any HW queues associated with them and 
> >> we would like
> >> to use the PF queues for the xmit.
> >> I was also looking into some way of passing the port id via skb 
> >> parameter to the
> >> dev_queue_xmit() call so that the PF xmit routine can do a directed 
> >> transmit to a specifc VF.
> >> Is skb->cb an option to pass this info?
> >> dst_metadata approach would work  too if it is acceptable.  
> > 
> > I don't think we can trust skb->cb to be set to anything meaningful
> > when the skb is received by the lower device. 
> 
> Agreed. I wouldn't recommend using skb->cb. How about passing it through
> dev_queue_xmit_accel() through to the driver?
> 
> If you pass the metadata through the dev_queue_xmit_accel() handle tx
> queue  selection would work using normal mechanisms (xps, select_queue,
> cls  hook, etc.). If you wanted to pick some specific queue based on
> policy the policy could be loaded into one of those hooks.

Do you mean without extending how accel is handled by
dev_queue_xmit_accel() today?  If my goal is to not have extra HW
queues then I don't see how I could mux in the lower dev without extra
locking (as I tried to explain two emails ago).  Sorry for being slow
here :(

^ permalink raw reply

* Re: [PATCH] net: bcmgenet: Fix EPHY reset in power up
From: Jaedon Shin @ 2016-09-23 20:55 UTC (permalink / raw)
  To: Florian Fainelli; +Cc: Andrew Lunn, David Miller, Philippe Reynes, netdev
In-Reply-To: <f68f0b8c-df49-1326-c05f-ddd7f95e4209@gmail.com>

Hi Florian,

> On 24 Sep 2016, at 1:54 AM, Florian Fainelli <f.fainelli@gmail.com> wrote:
> 
> On 09/23/2016 08:04 AM, Jaedon Shin wrote:
>> Hi Andrew,
>> 
>> On 23 Sep 2016, at 11:06 PM, Andrew Lunn <andrew@lunn.ch> wrote:
>>> 
>>> On Fri, Sep 23, 2016 at 10:20:04PM +0900, Jaedon Shin wrote:
>>>> The bcmgenet_mii_reset() is always not running in power up sequence
>>>> after 'commit 62469c76007e ("net: ethernet: bcmgenet: use phydev from
>>>> struct net_device")'. This'll show extremely high latency and duplicate
>>>> packets while interface down and up repeatedly.
>>>> 
>>>> For now, adds again a private phydev for mii reset when runs power up to
>>>> open interface.
>>> 
>>> Hi Jaedon
>>> 
>>> How does this fix the issue? It sounds like you are papering over the
>>> crack, not truly fixing it.
>>> 
>>>      Andrew
>> 
>> Yes, It feel like a workaround, but I think it must need v4.8 stable
>> version. If we find better way that fixes internal PHY to initialize
>> after re-open interface, this patch will be dropped.
> 
> I can observe the faulting behavior with 4.8-rc7 that the link below
> fixed initially:
> 
> # ping fainelli-linux
> PING fainelli-linux (10.112.156.244): 56 data bytes
> 64 bytes from 10.112.156.244: seq=1 ttl=61 time=1.352 ms
> 64 bytes from 10.112.156.244: seq=1 ttl=61 time=1.472 ms (DUP!)
> 64 bytes from 10.112.156.244: seq=1 ttl=61 time=1.496 ms (DUP!)
> 64 bytes from 10.112.156.244: seq=1 ttl=61 time=1.517 ms (DUP!)
> 64 bytes from 10.112.156.244: seq=1 ttl=61 time=1.536 ms (DUP!)
> 64 bytes from 10.112.156.244: seq=1 ttl=61 time=1.557 ms (DUP!)
> 64 bytes from 10.112.156.244: seq=1 ttl=61 time=752.448 ms (DUP!)
> 64 bytes from 10.112.156.244: seq=2 ttl=61 time=1.291 ms
> 64 bytes from 10.112.156.244: seq=2 ttl=61 time=1.421 ms (DUP!)
> 64 bytes from 10.112.156.244: seq=2 ttl=61 time=1.444 ms (DUP!)
> 64 bytes from 10.112.156.244: seq=2 ttl=61 time=1.464 ms (DUP!)
> 64 bytes from 10.112.156.244: seq=2 ttl=61 time=1.483 ms (DUP!)
> 64 bytes from 10.112.156.244: seq=2 ttl=61 time=1.505 ms (DUP!)
> 64 bytes from 10.112.156.244: seq=2 ttl=61 time=24.964 ms (DUP!)
> 
> If we revert this patch, we indeed get the normal and expected behavior
> back:
> 
> # ping fainelli-linux
> PING fainelli-linux (10.112.156.244): 56 data bytes
> 64 bytes from 10.112.156.244: seq=0 ttl=61 time=0.417 ms
> 64 bytes from 10.112.156.244: seq=1 ttl=61 time=0.415 ms
> 64 bytes from 10.112.156.244: seq=2 ttl=61 time=0.424 ms
> 
> Actually, the key thing is this:
> 
> - without Philippe's patch we call twice bcmgenet_mii_reset, and that is
> intended:
> 	- first time from bcmgenet_power_up() to make sure the PHY is
> initialized *before* we get to initialize the UniMAC, this is critical
> 	- second time from bcmgenet_mii_probe(), through the normal phy_init_hw()
> 
> - with Philippe's patch, we only get to call bcmgenet_mii_reset once, in
> bcmgenet_mii_probe() because the first time in bcmgenet_power_up(),
> dev->phydev is NULL, because of a prior call to phy_disconnect() in
> bcmgenet_close(), unfortunately, there has been MAC activity, so the PHY
> gets in a bad state
> 
> Jaedon, feel free to use the explanation above, and send a plain revert
> of commit 62469c76007e11428e2ee3c6de90cbe74b588d44.
> 

Will send revert patch.

Thanks,
Jaedon

> Thanks!
> 
> Thanks!
> -- 
> Florian

^ permalink raw reply

* Re: [net-next 5/5] PCI: disable FLR for 82579 device
From: Jeff Kirsher @ 2016-09-23 21:05 UTC (permalink / raw)
  To: Bjorn Helgaas
  Cc: davem, bhelgaas, Sasha Neftin, netdev, nhorman, sassmann,
	jogreene, guru.anbalagane, linux-pci
In-Reply-To: <20160923140136.GC1514@localhost>

[-- Attachment #1: Type: text/plain, Size: 3107 bytes --]

On Fri, 2016-09-23 at 09:01 -0500, Bjorn Helgaas wrote:
> On Thu, Sep 22, 2016 at 11:39:01PM -0700, Jeff Kirsher wrote:
> > 
> > From: Sasha Neftin <sasha.neftin@intel.com>
> > 
> > 82579 has a problem reattaching itself after the device is detached.
> > The bug was reported by Redhat. The suggested fix is to disable
> > FLR capability in PCIe configuration space.
> > 
> > Reproduction:
> > Attach the device to a VM, then detach and try to attach again.
> > 
> > Fix:
> > Disable FLR capability to prevent the 82579 from hanging.
> 
> Is there a bugzilla or other reference URL to include here?  Should
> this be marked for stable?

So the author is in Israel, meaning it is their weekend now.  I do not
believe Sasha monitors email over the weekend, so a response to your
questions won't happen for a few days.

I tried searching my archives for more information, but had no luck finding
any additional information.

> > Signed-off-by: Sasha Neftin <sasha.neftin@intel.com>
> > Tested-by: Aaron Brown <aaron.f.brown@intel.com>
> > Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
> > ---
> >  drivers/pci/quirks.c | 21 +++++++++++++++++++++
> >  1 file changed, 21 insertions(+)
> > 
> > diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
> > index 44e0ff3..59fba6e 100644
> > --- a/drivers/pci/quirks.c
> > +++ b/drivers/pci/quirks.c
> > @@ -4431,3 +4431,24 @@ static void quirk_intel_qat_vf_cap(struct
> > pci_dev *pdev)
> >  	}
> >  }
> >  DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x443,
> > quirk_intel_qat_vf_cap);
> > +/*
> > + * Workaround FLR issues for 82579
> > + * This code disables the FLR (Function Level Reset) via PCIe, in
> > order
> > + * to workaround a bug found while using device passthrough, where the
> > + * interface would become non-responsive.
> > + * NOTE: the FLR bit is Read/Write Once (RWO) in config space, so if
> > + * the BIOS or kernel writes this register * then this workaround will
> > + * not work.
> 
> This doesn't sound like a root cause.  Is the issue a hardware
> erratum?  Linux PCI core bug?  VFIO bug?  Device firmware bug?
> 
> The changelog suggests that the problem only affects passthrough,
> which suggests some sort of kernel bug related to how passthrough is
> implemented.
> 
> > 
> > + */
> > +static void quirk_intel_flr_cap_dis(struct pci_dev *dev)
> > +{
> > +	int pos = pci_find_capability(dev, PCI_CAP_ID_AF);
> > +	if (pos) {
> > +		u8 cap;
> > +		pci_read_config_byte(dev, pos + PCI_AF_CAP, &cap);
> > +		cap = cap & (~PCI_AF_CAP_FLR);
> > +		pci_write_config_byte(dev, pos + PCI_AF_CAP, cap);
> > +	}
> > +}
> > +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x1502,
> > quirk_intel_flr_cap_dis);
> > +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x1503,
> > quirk_intel_flr_cap_dis);
> > -- 
> > 2.7.4
> > 
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-pci" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply

* [PATCH] Revert "net: ethernet: bcmgenet: use phydev from struct net_device"
From: Jaedon Shin @ 2016-09-23 21:08 UTC (permalink / raw)
  To: Florian Fainelli, David S . Miller
  Cc: Philippe Reynes, Andrew Lunn, netdev, Jaedon Shin

This reverts commit 62469c76007e ("net: ethernet: bcmgenet: use phydev
from struct net_device")

without this patch, we call twice bcmgenet_mii_reset, and that is intended:
- first time from bcmgenet_power_up() to make sure the PHY is initialized
  *before* we get to initialize the UniMAC, this is critical
- second time from bcmgenet_mii_probe(), through the normal phy_init_hw()

with this patch, we only get to call bcmgenet_mii_reset once, in
bcmgenet_mii_probe() because the first time in bcmgenet_power_up(),
dev->phydev is NULL, because of a prior call to phy_disconnect() in
bcmgenet_close(), unfortunately, there has been MAC activity, so the PHY
gets in a bad state

Signed-off-by: Jaedon Shin <jaedon.shin@gmail.com>
---
 drivers/net/ethernet/broadcom/genet/bcmgenet.c | 45 ++++++++++++++------------
 drivers/net/ethernet/broadcom/genet/bcmgenet.h |  1 +
 drivers/net/ethernet/broadcom/genet/bcmmii.c   | 24 +++++++-------
 3 files changed, 39 insertions(+), 31 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 8d4f8495dbb3..541456398dfb 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -453,25 +453,29 @@ static inline void bcmgenet_rdma_ring_writel(struct bcmgenet_priv *priv,
 static int bcmgenet_get_settings(struct net_device *dev,
 				 struct ethtool_cmd *cmd)
 {
+	struct bcmgenet_priv *priv = netdev_priv(dev);
+
 	if (!netif_running(dev))
 		return -EINVAL;
 
-	if (!dev->phydev)
+	if (!priv->phydev)
 		return -ENODEV;
 
-	return phy_ethtool_gset(dev->phydev, cmd);
+	return phy_ethtool_gset(priv->phydev, cmd);
 }
 
 static int bcmgenet_set_settings(struct net_device *dev,
 				 struct ethtool_cmd *cmd)
 {
+	struct bcmgenet_priv *priv = netdev_priv(dev);
+
 	if (!netif_running(dev))
 		return -EINVAL;
 
-	if (!dev->phydev)
+	if (!priv->phydev)
 		return -ENODEV;
 
-	return phy_ethtool_sset(dev->phydev, cmd);
+	return phy_ethtool_sset(priv->phydev, cmd);
 }
 
 static int bcmgenet_set_rx_csum(struct net_device *dev,
@@ -937,7 +941,7 @@ static int bcmgenet_get_eee(struct net_device *dev, struct ethtool_eee *e)
 	e->eee_active = p->eee_active;
 	e->tx_lpi_timer = bcmgenet_umac_readl(priv, UMAC_EEE_LPI_TIMER);
 
-	return phy_ethtool_get_eee(dev->phydev, e);
+	return phy_ethtool_get_eee(priv->phydev, e);
 }
 
 static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_eee *e)
@@ -954,7 +958,7 @@ static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_eee *e)
 	if (!p->eee_enabled) {
 		bcmgenet_eee_enable_set(dev, false);
 	} else {
-		ret = phy_init_eee(dev->phydev, 0);
+		ret = phy_init_eee(priv->phydev, 0);
 		if (ret) {
 			netif_err(priv, hw, dev, "EEE initialization failed\n");
 			return ret;
@@ -964,12 +968,14 @@ static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_eee *e)
 		bcmgenet_eee_enable_set(dev, true);
 	}
 
-	return phy_ethtool_set_eee(dev->phydev, e);
+	return phy_ethtool_set_eee(priv->phydev, e);
 }
 
 static int bcmgenet_nway_reset(struct net_device *dev)
 {
-	return genphy_restart_aneg(dev->phydev);
+	struct bcmgenet_priv *priv = netdev_priv(dev);
+
+	return genphy_restart_aneg(priv->phydev);
 }
 
 /* standard ethtool support functions. */
@@ -996,13 +1002,12 @@ static struct ethtool_ops bcmgenet_ethtool_ops = {
 static int bcmgenet_power_down(struct bcmgenet_priv *priv,
 				enum bcmgenet_power_mode mode)
 {
-	struct net_device *ndev = priv->dev;
 	int ret = 0;
 	u32 reg;
 
 	switch (mode) {
 	case GENET_POWER_CABLE_SENSE:
-		phy_detach(ndev->phydev);
+		phy_detach(priv->phydev);
 		break;
 
 	case GENET_POWER_WOL_MAGIC:
@@ -1063,6 +1068,7 @@ static void bcmgenet_power_up(struct bcmgenet_priv *priv,
 /* ioctl handle special commands that are not present in ethtool. */
 static int bcmgenet_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 {
+	struct bcmgenet_priv *priv = netdev_priv(dev);
 	int val = 0;
 
 	if (!netif_running(dev))
@@ -1072,10 +1078,10 @@ static int bcmgenet_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 	case SIOCGMIIPHY:
 	case SIOCGMIIREG:
 	case SIOCSMIIREG:
-		if (!dev->phydev)
+		if (!priv->phydev)
 			val = -ENODEV;
 		else
-			val = phy_mii_ioctl(dev->phydev, rq, cmd);
+			val = phy_mii_ioctl(priv->phydev, rq, cmd);
 		break;
 
 	default:
@@ -2458,7 +2464,6 @@ static void bcmgenet_irq_task(struct work_struct *work)
 {
 	struct bcmgenet_priv *priv = container_of(
 			work, struct bcmgenet_priv, bcmgenet_irq_work);
-	struct net_device *ndev = priv->dev;
 
 	netif_dbg(priv, intr, priv->dev, "%s\n", __func__);
 
@@ -2471,7 +2476,7 @@ static void bcmgenet_irq_task(struct work_struct *work)
 
 	/* Link UP/DOWN event */
 	if (priv->irq0_stat & UMAC_IRQ_LINK_EVENT) {
-		phy_mac_interrupt(ndev->phydev,
+		phy_mac_interrupt(priv->phydev,
 				  !!(priv->irq0_stat & UMAC_IRQ_LINK_UP));
 		priv->irq0_stat &= ~UMAC_IRQ_LINK_EVENT;
 	}
@@ -2833,7 +2838,7 @@ static void bcmgenet_netif_start(struct net_device *dev)
 	/* Monitor link interrupts now */
 	bcmgenet_link_intr_enable(priv);
 
-	phy_start(dev->phydev);
+	phy_start(priv->phydev);
 }
 
 static int bcmgenet_open(struct net_device *dev)
@@ -2932,7 +2937,7 @@ static void bcmgenet_netif_stop(struct net_device *dev)
 	struct bcmgenet_priv *priv = netdev_priv(dev);
 
 	netif_tx_stop_all_queues(dev);
-	phy_stop(dev->phydev);
+	phy_stop(priv->phydev);
 	bcmgenet_intr_disable(priv);
 	bcmgenet_disable_rx_napi(priv);
 	bcmgenet_disable_tx_napi(priv);
@@ -2958,7 +2963,7 @@ static int bcmgenet_close(struct net_device *dev)
 	bcmgenet_netif_stop(dev);
 
 	/* Really kill the PHY state machine and disconnect from it */
-	phy_disconnect(dev->phydev);
+	phy_disconnect(priv->phydev);
 
 	/* Disable MAC receive */
 	umac_enable_set(priv, CMD_RX_EN, false);
@@ -3517,7 +3522,7 @@ static int bcmgenet_suspend(struct device *d)
 
 	bcmgenet_netif_stop(dev);
 
-	phy_suspend(dev->phydev);
+	phy_suspend(priv->phydev);
 
 	netif_device_detach(dev);
 
@@ -3581,7 +3586,7 @@ static int bcmgenet_resume(struct device *d)
 	if (priv->wolopts)
 		clk_disable_unprepare(priv->clk_wol);
 
-	phy_init_hw(dev->phydev);
+	phy_init_hw(priv->phydev);
 	/* Speed settings must be restored */
 	bcmgenet_mii_config(priv->dev);
 
@@ -3614,7 +3619,7 @@ static int bcmgenet_resume(struct device *d)
 
 	netif_device_attach(dev);
 
-	phy_resume(dev->phydev);
+	phy_resume(priv->phydev);
 
 	if (priv->eee.eee_enabled)
 		bcmgenet_eee_enable_set(dev, true);
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
index 0f0868c56f05..1e2dc34d331a 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
@@ -597,6 +597,7 @@ struct bcmgenet_priv {
 
 	/* MDIO bus variables */
 	wait_queue_head_t wq;
+	struct phy_device *phydev;
 	bool internal_phy;
 	struct device_node *phy_dn;
 	struct device_node *mdio_dn;
diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c
index e907acd81da9..457c3bc8cfff 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmmii.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c
@@ -86,7 +86,7 @@ static int bcmgenet_mii_write(struct mii_bus *bus, int phy_id,
 void bcmgenet_mii_setup(struct net_device *dev)
 {
 	struct bcmgenet_priv *priv = netdev_priv(dev);
-	struct phy_device *phydev = dev->phydev;
+	struct phy_device *phydev = priv->phydev;
 	u32 reg, cmd_bits = 0;
 	bool status_changed = false;
 
@@ -183,9 +183,9 @@ void bcmgenet_mii_reset(struct net_device *dev)
 	if (GENET_IS_V4(priv))
 		return;
 
-	if (dev->phydev) {
-		phy_init_hw(dev->phydev);
-		phy_start_aneg(dev->phydev);
+	if (priv->phydev) {
+		phy_init_hw(priv->phydev);
+		phy_start_aneg(priv->phydev);
 	}
 }
 
@@ -236,7 +236,6 @@ static void bcmgenet_internal_phy_setup(struct net_device *dev)
 
 static void bcmgenet_moca_phy_setup(struct bcmgenet_priv *priv)
 {
-	struct net_device *ndev = priv->dev;
 	u32 reg;
 
 	/* Speed settings are set in bcmgenet_mii_setup() */
@@ -245,14 +244,14 @@ static void bcmgenet_moca_phy_setup(struct bcmgenet_priv *priv)
 	bcmgenet_sys_writel(priv, reg, SYS_PORT_CTRL);
 
 	if (priv->hw_params->flags & GENET_HAS_MOCA_LINK_DET)
-		fixed_phy_set_link_update(ndev->phydev,
+		fixed_phy_set_link_update(priv->phydev,
 					  bcmgenet_fixed_phy_link_update);
 }
 
 int bcmgenet_mii_config(struct net_device *dev)
 {
 	struct bcmgenet_priv *priv = netdev_priv(dev);
-	struct phy_device *phydev = dev->phydev;
+	struct phy_device *phydev = priv->phydev;
 	struct device *kdev = &priv->pdev->dev;
 	const char *phy_name = NULL;
 	u32 id_mode_dis = 0;
@@ -303,7 +302,7 @@ int bcmgenet_mii_config(struct net_device *dev)
 		 * capabilities, use that knowledge to also configure the
 		 * Reverse MII interface correctly.
 		 */
-		if ((phydev->supported & PHY_BASIC_FEATURES) ==
+		if ((priv->phydev->supported & PHY_BASIC_FEATURES) ==
 				PHY_BASIC_FEATURES)
 			port_ctrl = PORT_MODE_EXT_RVMII_25;
 		else
@@ -372,7 +371,7 @@ int bcmgenet_mii_probe(struct net_device *dev)
 			return -ENODEV;
 		}
 	} else {
-		phydev = dev->phydev;
+		phydev = priv->phydev;
 		phydev->dev_flags = phy_flags;
 
 		ret = phy_connect_direct(dev, phydev, bcmgenet_mii_setup,
@@ -383,6 +382,8 @@ int bcmgenet_mii_probe(struct net_device *dev)
 		}
 	}
 
+	priv->phydev = phydev;
+
 	/* Configure port multiplexer based on what the probed PHY device since
 	 * reading the 'max-speed' property determines the maximum supported
 	 * PHY speed which is needed for bcmgenet_mii_config() to configure
@@ -390,7 +391,7 @@ int bcmgenet_mii_probe(struct net_device *dev)
 	 */
 	ret = bcmgenet_mii_config(dev);
 	if (ret) {
-		phy_disconnect(phydev);
+		phy_disconnect(priv->phydev);
 		return ret;
 	}
 
@@ -400,7 +401,7 @@ int bcmgenet_mii_probe(struct net_device *dev)
 	 * Ethernet MAC ISRs
 	 */
 	if (priv->internal_phy)
-		phydev->irq = PHY_IGNORE_INTERRUPT;
+		priv->phydev->irq = PHY_IGNORE_INTERRUPT;
 
 	return 0;
 }
@@ -605,6 +606,7 @@ static int bcmgenet_mii_pd_init(struct bcmgenet_priv *priv)
 
 	}
 
+	priv->phydev = phydev;
 	priv->phy_interface = pd->phy_interface;
 
 	return 0;
-- 
2.10.0

^ permalink raw reply related

* Re: [PATCH] softirq: let ksoftirqd do its job
From: Peter Zijlstra @ 2016-09-23 21:16 UTC (permalink / raw)
  To: Jesper Dangaard Brouer
  Cc: Daniel Borkmann, David Miller, eric.dumazet, riel, pabeni, hannes,
	linux-kernel, netdev, corbet, Ingo Molnar
In-Reply-To: <20160923185104.535813b4@redhat.com>

On Fri, Sep 23, 2016 at 06:51:04PM +0200, Jesper Dangaard Brouer wrote:

> This is your git tree, right:
>  https://git.kernel.org/cgit/linux/kernel/git/peterz/queue.git/
> 
> Doesn't look like you pushed it yet, or do I need to look at a specific
> branch?

I mainly work from a local quilt queue which I feed to mingo. I
occasionally push out to get build-bot coverage or have people look at
bits I poked together.

That said, I'll try and do a push later tonight.

Do note however, that git tree is a complete wipe and rebuild, don't
expect any kind of continuity from it.

^ permalink raw reply

* Re: [RFC] net: store port/representative id in metadata_dst
From: John Fastabend @ 2016-09-23 21:20 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: Jakub Kicinski, Samudrala, Sridhar, Jiri Benc, Jiri Pirko, netdev,
	Thomas Graf, Roopa Prabhu, ogerlitz, ast, daniel, simon.horman,
	Paolo Abeni, Pravin B Shelar, hannes
In-Reply-To: <20160923214544.6a3cd0ff@laptop>

On 16-09-23 01:45 PM, Jakub Kicinski wrote:
> On Fri, 23 Sep 2016 13:25:10 -0700, John Fastabend wrote:
>> On 16-09-23 01:17 PM, Jakub Kicinski wrote:
>>> On Fri, 23 Sep 2016 10:22:59 -0700, Samudrala, Sridhar wrote:  
>>>> On 9/23/2016 8:29 AM, Jakub Kicinski wrote:  
>>  [...]  
>>  [...]  
>>>>
>>>> The 'accel' parameter in dev_queue_xmit_accel() is currently only passed
>>>> to ndo_select_queue() via netdev_pick_tx() and is used to select the tx 
>>>> queue.
>>>> Also, it is not passed all the way to the driver specific xmit routine.  
>>>> Doesn't it require
>>>> changing all the driver xmit routines if we want to pass this parameter?
>>>>  
>>  [...]  
>>>>
>>>> Yes.  The VFPR netdevs don't have any HW queues associated with them and 
>>>> we would like
>>>> to use the PF queues for the xmit.
>>>> I was also looking into some way of passing the port id via skb 
>>>> parameter to the
>>>> dev_queue_xmit() call so that the PF xmit routine can do a directed 
>>>> transmit to a specifc VF.
>>>> Is skb->cb an option to pass this info?
>>>> dst_metadata approach would work  too if it is acceptable.  
>>>
>>> I don't think we can trust skb->cb to be set to anything meaningful
>>> when the skb is received by the lower device. 
>>
>> Agreed. I wouldn't recommend using skb->cb. How about passing it through
>> dev_queue_xmit_accel() through to the driver?
>>
>> If you pass the metadata through the dev_queue_xmit_accel() handle tx
>> queue  selection would work using normal mechanisms (xps, select_queue,
>> cls  hook, etc.). If you wanted to pick some specific queue based on
>> policy the policy could be loaded into one of those hooks.
> 
> Do you mean without extending how accel is handled by
> dev_queue_xmit_accel() today?  If my goal is to not have extra HW
> queues then I don't see how I could mux in the lower dev without extra
> locking (as I tried to explain two emails ago).  Sorry for being slow
> here :(
> 

Not slow here I think I was overly optimistic...

Yeh let me try this, roughly the current flow is,

   dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv);
   __dev_queue_xmit(skb, accel_priv);
   netdev_pick_tx(dev, skb, accel_priv);
	ndo_select_queue(dev, skb, accel_priv, ...);
   [...]
   q->enqueue();
   [...]
   dev_hard_start_xmit();
   [...]
    <driver code here>

So in this flow the VFR netdev driver handles its xmit routine by
calling dev_queue_xmit_accel after setting skb->dev to the physical
device and passing a cookie via accel that the select_queue() routine
can use to pick a tx queue. The rest of the stack q->enqueue() and
friends will ensure that locking and qdisc is handled correctly.

But accel_priv was lost at queue selection and so its not being passed
down to the driver so no way to set your descriptor bits or whatever
needed to push to the VF. I was sort of thinking we could map it from
the select_queue routine but I can't figure out how to do that either.

The metadata idea doesn't seem that bad now that I've spent some more
time going through it. Either that or hijack some field in the skb but
I think that might be worse than the proposal here.

I'm trying to think up some other alternative now and will let you know
if I think of anything clever but got nothing at the moment.

.John
	

^ permalink raw reply

* Re: [PATCH net] i40e: fix call of ndo_dflt_bridge_getlink()
From: Jeff Kirsher @ 2016-09-23 21:37 UTC (permalink / raw)
  To: nicolas.dichtel
  Cc: davem, netdev, Huaibin Wang, Scott Feldman, Carolyn Wyborny,
	Catherine Sullivan
In-Reply-To: <60c90efd-fab3-84cd-f6e1-f6bc710c5ab7@6wind.com>

[-- Attachment #1: Type: text/plain, Size: 1259 bytes --]

On Fri, 2016-09-23 at 11:12 +0200, Nicolas Dichtel wrote:
> Le 19/09/2016 à 18:14, Nicolas Dichtel a écrit :
> > From: Huaibin Wang <huaibin.wang@6wind.com>
> > 
> > Order of arguments is wrong.
> > The wrong code has been introduced by commit 7d4f8d871ab1, but is
> compiled
> > only since commit 9df70b66418e.
> > 
> > Note that this may break netlink dumps.
> > 
> > Fixes: 9df70b66418e ("i40e: Remove incorrect #ifdef's")
> > Fixes: 7d4f8d871ab1 ("switchdev; add VLAN support for port's
> bridge_getlink")
> > CC: Scott Feldman <sfeldma@gmail.com>
> > CC: Carolyn Wyborny <carolyn.wyborny@intel.com>
> > CC: Catherine Sullivan <catherine.sullivan@intel.com>
> > Signed-off-by: Huaibin Wang <huaibin.wang@6wind.com>
> > Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
> Hi Jeff,
> 
> any news about this patch? David has marked it "awaiting upstream" on the
> patchwork, so I understand it should go to your tree.

Yes, it needs to go through my tree.   Please send it to intel-wired-lan@li
sts.osuosl.org mailing list, that way I can track it through our patchwork
projects.  Also you can trim Scott Feldman and Catherine Sullivan from the
CC list.

http://patchwork.ozlabs.org/project/intel-wired-lan/list/

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply

* Re: [PATCH 3/3] bpf powerpc: add support for bpf constant blinding
From: Daniel Borkmann @ 2016-09-23 21:40 UTC (permalink / raw)
  To: Naveen N. Rao, linux-kernel, linuxppc-dev, netdev,
	Michael Ellerman
  Cc: Alexei Starovoitov, David S. Miller, Ananth N Mavinakayanahalli
In-Reply-To: <0ecead168c80b1c3d8a8101595e689ff7c7a735f.1474661927.git.naveen.n.rao@linux.vnet.ibm.com>

On 09/23/2016 10:35 PM, Naveen N. Rao wrote:
> In line with similar support for other architectures by Daniel Borkmann.
>
> 'MOD Default X' from test_bpf without constant blinding:
> 84 bytes emitted from JIT compiler (pass:3, flen:7)
> d0000000058a4688 + <x>:
>     0:	nop
>     4:	nop
>     8:	std     r27,-40(r1)
>     c:	std     r28,-32(r1)
>    10:	xor     r8,r8,r8
>    14:	xor     r28,r28,r28
>    18:	mr      r27,r3
>    1c:	li      r8,66
>    20:	cmpwi   r28,0
>    24:	bne     0x0000000000000030
>    28:	li      r8,0
>    2c:	b       0x0000000000000044
>    30:	divwu   r9,r8,r28
>    34:	mullw   r9,r28,r9
>    38:	subf    r8,r9,r8
>    3c:	rotlwi  r8,r8,0
>    40:	li      r8,66
>    44:	ld      r27,-40(r1)
>    48:	ld      r28,-32(r1)
>    4c:	mr      r3,r8
>    50:	blr
>
> ... and with constant blinding:
> 140 bytes emitted from JIT compiler (pass:3, flen:11)
> d00000000bd6ab24 + <x>:
>     0:	nop
>     4:	nop
>     8:	std     r27,-40(r1)
>     c:	std     r28,-32(r1)
>    10:	xor     r8,r8,r8
>    14:	xor     r28,r28,r28
>    18:	mr      r27,r3
>    1c:	lis     r2,-22834
>    20:	ori     r2,r2,36083
>    24:	rotlwi  r2,r2,0
>    28:	xori    r2,r2,36017
>    2c:	xoris   r2,r2,42702
>    30:	rotlwi  r2,r2,0
>    34:	mr      r8,r2
>    38:	rotlwi  r8,r8,0
>    3c:	cmpwi   r28,0
>    40:	bne     0x000000000000004c
>    44:	li      r8,0
>    48:	b       0x000000000000007c
>    4c:	divwu   r9,r8,r28
>    50:	mullw   r9,r28,r9
>    54:	subf    r8,r9,r8
>    58:	rotlwi  r8,r8,0
>    5c:	lis     r2,-17137
>    60:	ori     r2,r2,39065
>    64:	rotlwi  r2,r2,0
>    68:	xori    r2,r2,39131
>    6c:	xoris   r2,r2,48399
>    70:	rotlwi  r2,r2,0
>    74:	mr      r8,r2
>    78:	rotlwi  r8,r8,0
>    7c:	ld      r27,-40(r1)
>    80:	ld      r28,-32(r1)
>    84:	mr      r3,r8
>    88:	blr
>
> Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>

Acked-by: Daniel Borkmann <daniel@iogearbox.net>

^ permalink raw reply

* Re: [PATCH net-next v2] Documentation: devicetree: revise ethernet device-tree binding about TRGMII
From: Rob Herring @ 2016-09-23 21:48 UTC (permalink / raw)
  To: sean.wang-NuS5LvNUpcJWk0Htik3J/w
  Cc: davem-fT/PcQaiUtIeIZ0/mPfg9Q,
	sergei.shtylyov-M4DtvfQ/ZS1MRgGoP+s0PdBPR1lH4CV8,
	nbd-p3rKhJxN3npAfugRpC6u6w, netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	devicetree-u79uwXL29TY76Z2rM5mHXA,
	linux-mediatek-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
	john-Pj+rj9U5foFAfugRpC6u6w, keyhaede-Re5JQEeQqe8AvxtiuMwx3w,
	objelf-Re5JQEeQqe8AvxtiuMwx3w
In-Reply-To: <1474610649-18582-1-git-send-email-sean.wang-NuS5LvNUpcJWk0Htik3J/w@public.gmane.org>

On Fri, Sep 23, 2016 at 02:04:09PM +0800, sean.wang-NuS5LvNUpcJWk0Htik3J/w@public.gmane.org wrote:
> From: Sean Wang <sean.wang-NuS5LvNUpcJWk0Htik3J/w@public.gmane.org>
> 
> add phy-mode "trgmii" to
> Documentation/devicetree/bindings/net/ethernet.txt
> 
> Cc: devicetree-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> Reported-by: Sergei Shtylyov <sergei.shtylyov-M4DtvfQ/ZS1MRgGoP+s0PdBPR1lH4CV8@public.gmane.org>
> Signed-off-by: Sean Wang <sean.wang-NuS5LvNUpcJWk0Htik3J/w@public.gmane.org>
> ---
>  Documentation/devicetree/bindings/net/ethernet.txt | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)

Acked-by: Rob Herring <robh-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH net-next] Documentation: devicetree: fix typo in MediaTek ethernet device-tree binding
From: Rob Herring @ 2016-09-23 21:48 UTC (permalink / raw)
  To: sean.wang
  Cc: davem, sergei.shtylyov, nbd, netdev, linux-kernel, devicetree,
	linux-mediatek, john, keyhaede, objelf
In-Reply-To: <1474610972-14974-1-git-send-email-sean.wang@mediatek.com>

On Fri, Sep 23, 2016 at 02:09:32PM +0800, sean.wang@mediatek.com wrote:
> From: Sean Wang <sean.wang@mediatek.com>
> 
> fix typo in
> Documentation/devicetree/bindings/net/mediatek-net.txt
> 
> Cc: devicetree@vger.kernel.org
> Reported-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
> Signed-off-by: Sean Wang <sean.wang@mediatek.com>
> ---
>  Documentation/devicetree/bindings/net/mediatek-net.txt | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)

Acked-by: Rob Herring <robh@kernel.org>

^ permalink raw reply

* [PATCH] hv_netvsc: fix comments
From: sthemmin @ 2016-09-24  0:08 UTC (permalink / raw)
  To: K. Y. Srinivasan, Haiyang Zhang, davem; +Cc: netdev, Stephen Hemminger

From: Stephen Hemminger <sthemmin@microsoft.com>

Typo's and spelling errors. Also remove old comment from staging era.

Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
---
 drivers/net/hyperv/hyperv_net.h |    7 +++----
 1 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 284b97b..d7c1cc6 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -433,7 +433,7 @@ struct nvsp_1_message_revoke_send_buffer {
  */
 struct nvsp_1_message_send_rndis_packet {
 	/*
-	 * This field is specified by RNIDS. They assume there's two different
+	 * This field is specified by RNDIS. They assume there's two different
 	 * channels of communication. However, the Network VSP only has one.
 	 * Therefore, the channel travels with the RNDIS packet.
 	 */
@@ -578,7 +578,7 @@ struct nvsp_5_send_indirect_table {
 	/* The number of entries in the send indirection table */
 	u32 count;
 
-	/* The offset of the send indireciton table from top of this struct.
+	/* The offset of the send indirection table from top of this struct.
 	 * The send indirection table tells which channel to put the send
 	 * traffic on. Each entry is a channel number.
 	 */
@@ -733,7 +733,6 @@ struct netvsc_device {
 	struct nvsp_message channel_init_pkt;
 
 	struct nvsp_message revoke_packet;
-	/* unsigned char HwMacAddr[HW_MACADDR_LEN]; */
 
 	struct vmbus_channel *chn_table[VRSS_CHANNEL_MAX];
 	u32 send_table[VRSS_SEND_TAB_SIZE];
@@ -1238,7 +1237,7 @@ struct rndis_message {
 	u32 ndis_msg_type;
 
 	/* Total length of this message, from the beginning */
-	/* of the sruct rndis_message, in bytes. */
+	/* of the struct rndis_message, in bytes. */
 	u32 msg_len;
 
 	/* Actual message */
-- 
1.7.4.1

^ permalink raw reply related

* [PATCH] netfilter: don't permit unprivileged writes to global state via sysctls
From: Jann Horn @ 2016-09-23 22:21 UTC (permalink / raw)
  To: David S. Miller, Alexey Kuznetsov, James Morris,
	Hideaki YOSHIFUJI
  Cc: netdev, netfilter-devel

This prevents the modification of nf_conntrack_max in unprivileged network
namespaces. For unprivileged network namespaces, ip_conntrack_max is kept
as a readonly sysctl in order to minimize potential compatibility issues.

This patch should apply cleanly to the net tree.

Signed-off-by: Jann Horn <jann@thejh.net>
---
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index ae1a71a..a639e94 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -358,6 +358,9 @@ static int ipv4_init_net(struct net *net)
 	if (!in->ctl_table)
 		return -ENOMEM;
 
+	if (net->user_ns != &init_user_ns)
+		in->ctl_table[0].mode = 0444;
+
 	in->ctl_table[0].data = &nf_conntrack_max;
 	in->ctl_table[1].data = &net->ct.count;
 	in->ctl_table[2].data = &nf_conntrack_htable_size;
-- 
2.1.4

^ permalink raw reply related

* [PATCH] Net Driver: Add Cypress GX3 VID=04b4 PID=3610.
From: chris.roth-/KKvz3x1pcI @ 2016-09-23 22:24 UTC (permalink / raw)
  To: linux-usb-u79uwXL29TY76Z2rM5mHXA, netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA
  Cc: Chris Roth, Allan Chou

From: Chris Roth <chris.roth-/KKvz3x1pcI@public.gmane.org>

From: Allan Chou <allan-knRN6Y/kmf1NUHwG+Fw1Kw@public.gmane.org>

Add support for Cypress GX3 SuperSpeed to Gigabit Ethernet
Bridge Controller (Vendor=04b4 ProdID=3610).

Patch verified on x64 linux kernel 4.7.4 system with the
Kensington SD4600P USB-C Universal Dock with Power, which uses the
Cypress GX3 SuperSpeed to Gigabit Ethernet Bridge Controller.

A similar patch was signed-off and tested-by Allan Chou
<allan-knRN6Y/kmf1NUHwG+Fw1Kw@public.gmane.org> on 2015-12-01.

Allan verified his similar patch on x86 Linux kernel 4.1.6 system
with Cypress GX3 SuperSpeed to Gigabit Ethernet Bridge Controller.

Tested-by: Allan Chou <allan-knRN6Y/kmf1NUHwG+Fw1Kw@public.gmane.org>
Tested-by: Chris Roth <chris.roth-/KKvz3x1pcI@public.gmane.org>

Signed-off-by: Allan Chou <allan-knRN6Y/kmf1NUHwG+Fw1Kw@public.gmane.org>
Signed-off-by: Chris Roth <chris.roth-/KKvz3x1pcI@public.gmane.org>
---
 drivers/net/usb/ax88179_178a.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c
index e6338c1..8a6675d 100644
--- a/drivers/net/usb/ax88179_178a.c
+++ b/drivers/net/usb/ax88179_178a.c
@@ -1656,6 +1656,19 @@ static const struct driver_info ax88178a_info = {
 	.tx_fixup = ax88179_tx_fixup,
 };
 
+static const struct driver_info cypress_GX3_info = {
+	.description = "Cypress GX3 SuperSpeed to Gigabit Ethernet Controller",
+	.bind = ax88179_bind,
+	.unbind = ax88179_unbind,
+	.status = ax88179_status,
+	.link_reset = ax88179_link_reset,
+	.reset = ax88179_reset,
+	.stop = ax88179_stop,
+	.flags = FLAG_ETHER | FLAG_FRAMING_AX,
+	.rx_fixup = ax88179_rx_fixup,
+	.tx_fixup = ax88179_tx_fixup,
+};
+
 static const struct driver_info dlink_dub1312_info = {
 	.description = "D-Link DUB-1312 USB 3.0 to Gigabit Ethernet Adapter",
 	.bind = ax88179_bind,
@@ -1718,6 +1731,10 @@ static const struct usb_device_id products[] = {
 	USB_DEVICE(0x0b95, 0x178a),
 	.driver_info = (unsigned long)&ax88178a_info,
 }, {
+	/* Cypress GX3 SuperSpeed to Gigabit Ethernet Bridge Controller */
+	USB_DEVICE(0x04b4, 0x3610),
+	.driver_info = (unsigned long)&cypress_GX3_info,
+}, {
 	/* D-Link DUB-1312 USB 3.0 to Gigabit Ethernet Adapter */
 	USB_DEVICE(0x2001, 0x4a00),
 	.driver_info = (unsigned long)&dlink_dub1312_info,
-- 
2.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-usb" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* Re: [PATCH 2/3] bpf powerpc: implement support for tail calls
From: Daniel Borkmann @ 2016-09-23 22:33 UTC (permalink / raw)
  To: Naveen N. Rao, linux-kernel, linuxppc-dev, netdev,
	Michael Ellerman
  Cc: Alexei Starovoitov, David S. Miller, Ananth N Mavinakayanahalli
In-Reply-To: <c7c16b999694b2851705d79da939988e521bda3a.1474661927.git.naveen.n.rao@linux.vnet.ibm.com>

On 09/23/2016 10:35 PM, Naveen N. Rao wrote:
> Tail calls allow JIT'ed eBPF programs to call into other JIT'ed eBPF
> programs. This can be achieved either by:
> (1) retaining the stack setup by the first eBPF program and having all
> subsequent eBPF programs re-using it, or,
> (2) by unwinding/tearing down the stack and having each eBPF program
> deal with its own stack as it sees fit.
>
> To ensure that this does not create loops, there is a limit to how many
> tail calls can be done (currently 32). This requires the JIT'ed code to
> maintain a count of the number of tail calls done so far.
>
> Approach (1) is simple, but requires every eBPF program to have (almost)
> the same prologue/epilogue, regardless of whether they need it. This is
> inefficient for small eBPF programs which may not sometimes need a
> prologue at all. As such, to minimize impact of tail call
> implementation, we use approach (2) here which needs each eBPF program
> in the chain to use its own prologue/epilogue. This is not ideal when
> many tail calls are involved and when all the eBPF programs in the chain
> have similar prologue/epilogue. However, the impact is restricted to
> programs that do tail calls. Individual eBPF programs are not affected.
>
> We maintain the tail call count in a fixed location on the stack and
> updated tail call count values are passed in through this. The very
> first eBPF program in a chain sets this up to 0 (the first 2
> instructions). Subsequent tail calls skip the first two eBPF JIT
> instructions to maintain the count. For programs that don't do tail
> calls themselves, the first two instructions are NOPs.
>
> Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>

Thanks for adding support, Naveen, that's really great! I think 2) seems
fine as well in this context as prologue size can vary quite a bit here,
and depending on program types likelihood of tail call usage as well (but
I wouldn't expect deep nesting). Thanks a lot!

^ permalink raw reply

* Re: Modification to skb->queue_mapping affecting performance
From: Michael Ma @ 2016-09-23 23:21 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: netdev
In-Reply-To: <CAAmHdhwZUeKd_GKj8JdbXtdAZxn8sfbaGx+1Mjzn08rT3yf=+g@mail.gmail.com>

2016-09-16 15:00 GMT-07:00 Michael Ma <make0818@gmail.com>:
> 2016-09-16 12:53 GMT-07:00 Eric Dumazet <eric.dumazet@gmail.com>:
>> On Fri, 2016-09-16 at 10:57 -0700, Michael Ma wrote:
>>
>>> This is actually the problem - if flows from different RX queues are
>>> switched to the same RX queue in IFB, they'll use different processor
>>> context with the same tasklet, and the processor context of different
>>> tasklets might be the same. So multiple tasklets in IFB competes for
>>> the same core when queue is switched.
>>>
>>> The following simple fix proved this - with this change even switching
>>> the queue won't affect small packet bandwidth/latency anymore:
>>>
>>> in ifb.c:
>>>
>>> -       struct ifb_q_private *txp = dp->tx_private + skb_get_queue_mapping(skb);
>>> +       struct ifb_q_private *txp = dp->tx_private +
>>> (smp_processor_id() % dev->num_tx_queues);
>>>
>>> This should be more efficient since we're not sending the task to a
>>> different processor, instead we try to queue the packet to an
>>> appropriate tasklet based on the processor ID. Will this cause any
>>> packet out-of-order problem? If packets from the same flow are queued
>>> to the same RX queue due to RSS, and processor affinity is set for RX
>>> queues, I assume packets from the same flow will end up in the same
>>> core when tasklet is scheduled. But I might have missed some uncommon
>>> cases here... Would appreciate if anyone can provide more insights.
>>
>> Wait, don't you have proper smp affinity for the RX queues on your NIC ?
>>
>> ( Documentation/networking/scaling.txt RSS IRQ Configuration )
>>
> Yes - what I was trying to say is that this change will be more
> efficient than using smp_call_function_single() to schedule the
> tasklet to a different processor.
>
> RSS IRQ should be set properly already. The issue here is that I'll
> need to switch the queue mapping for NIC RX to a different TXQ on IFB,
> which allows me to classify the flows at the IFB TXQ layer and avoid
> qdisc lock contention.
>
> When that switch happens, ideally processor core shouldn't be switched
> because all the thread context isn't changed. The work in tasklet
> should be scheduled to the same processor as well. That's why I tried
> this change. Also conceptually IFB is a software device which should
> be able to schedule its workload independent from how NIC is
> configured for the interrupt handling.
>
>> A driver ndo_start_xmit() MUST use skb_get_queue_mapping(skb), because
>> the driver queue is locked before ndo_start_xmit())  (for non
>> NETIF_F_LLTX drivers at least)
>>
>
> Thanks a lot for pointing out this! I was expecting this kind of
> guidance... Then the options would be:
>
> 1. Use smp_call_function_single() to schedule the tasklet to a core
> statically mapped to the IFB TXQ, which is very similar to how TX/RX
> IRQ is configured.

This actually won't help with the throughput because ultimately load
will still be concentrated to some particular cores after packets are
concentrated to a TXQ due to queue level classification.

> 2. As you suggested below add some additional action to do the
> rescheduling before entering IFB - for example when receiving the
> packet we could just use RSS to redirect to the desired RXQ, however
> this doesn't seem to be easy, especially compared with the way how
> mqprio chooses the queue. The challenge here is that IFB queue
> selection is based on queue_mapping when skb arrives at IFB and core
> selection is based on RXQ on NIC and so it's also based on
> queue_mapping when skb arrives at NIC. Then these two queue_mappings
> must be the same so that there is no core conflict of processing two
> TXQs of IFB. Then this essentially means we have to change queue
> mapping of the NIC on the receiver side which can't be achieved using
> TC.
>

I tried to explore this further - there is actually XPS on ifb which
can be used to specify the processor cores that will be used to
process each TXQ of ifb, however the problem is similar as above -
eventually I'll have a few cores processing these queues instead of
having all the cores processing together with relatively light
contention. And this again reduces the throughput. So there isn't a
good place to do this. The ultimate problem is that we're trying to
workaround the qdisc spin lock problem by leveraging the independence
of TXQs, but at the same time after qdisc phase we also want to
maximize the utilization of cores across whatever TXQs that are used.

>> In case of ifb, __skb_queue_tail(&txp->rq, skb); could corrupt the skb
>> list.
>>
>> In any case, you could have an action to do this before reaching IFB.
>>
>>
>>

So here is another solution - for packets coming from the NIC ingress
path the context is already a tasklet and there is no need of starting
another tasklet based on the queue selected, right? All the RQ
handling and netif_tx_stop/wakeup stuff in ifb module is unnecessary
in this case. Then we can just do transmit/receive in ifb_xmit()
directly to simplify the problem here - there is no synchronization
issue for the RQ anymore since there is no RQ indeed.

Eric, would appreciate if you can share your thoughts here.

^ permalink raw reply

* Re: [PATCH net-next] net/vxlan: Avoid unaligned access in vxlan_build_skb()
From: Sowmini Varadhan @ 2016-09-23 23:41 UTC (permalink / raw)
  To: Alexander Duyck
  Cc: David Miller, Jiri Benc, Netdev, Hannes Frederic Sowa,
	Alexander Duyck, Daniel Borkmann, Paolo Abeni
In-Reply-To: <CAKgT0UeB5n3g4f1v_2nenjcayAi=eOPFSC7132ihXTZOztHRkw@mail.gmail.com>

On (09/23/16 10:38), Alexander Duyck wrote:
> 
> So basically what I was thinking is we do something like reserving
> NET_IP_ALIGN and continue writing headers to skb->data, but we force
> the tracking for the inner headers into frag[0] so that we can keep
> the inner headers aligned without messing up the alignment for outer
> headers.  In theory the inner offset and all that would still be
> functional but might need a few tweaks.  You could probably even use
> the skb->encapsulation bit to indicate you are doing this.  You could
> almost think of it as us doing something like the inverse of
> pskb_pull_tail.  The general idea here is we want to actually leave
> the data in skb->data, but just reference it from frag[0] so that we
> don't accidentally pull in the 2 byte padding for alignment when
> transmitting the frame.

yes, I think something along this line could do the trick.. I tried
hacking it a bit today for vxlan, and it could be extended for all
these encaps protocols. Let me fix/test this more next week, maybe
we can discuss in Tokyo.

--Sowmini

^ permalink raw reply

* Re: [PATCH net-next] net/vxlan: Avoid unaligned access in vxlan_build_skb()
From: Alexander Duyck @ 2016-09-24  0:43 UTC (permalink / raw)
  To: Sowmini Varadhan
  Cc: David Miller, Jiri Benc, Netdev, Hannes Frederic Sowa,
	Alexander Duyck, Daniel Borkmann, Paolo Abeni
In-Reply-To: <20160923234134.GA6397@oracle.com>

On Fri, Sep 23, 2016 at 4:41 PM, Sowmini Varadhan
<sowmini.varadhan@oracle.com> wrote:
> On (09/23/16 10:38), Alexander Duyck wrote:
>>
>> So basically what I was thinking is we do something like reserving
>> NET_IP_ALIGN and continue writing headers to skb->data, but we force
>> the tracking for the inner headers into frag[0] so that we can keep
>> the inner headers aligned without messing up the alignment for outer
>> headers.  In theory the inner offset and all that would still be
>> functional but might need a few tweaks.  You could probably even use
>> the skb->encapsulation bit to indicate you are doing this.  You could
>> almost think of it as us doing something like the inverse of
>> pskb_pull_tail.  The general idea here is we want to actually leave
>> the data in skb->data, but just reference it from frag[0] so that we
>> don't accidentally pull in the 2 byte padding for alignment when
>> transmitting the frame.
>
> yes, I think something along this line could do the trick.. I tried
> hacking it a bit today for vxlan, and it could be extended for all
> these encaps protocols. Let me fix/test this more next week, maybe
> we can discuss in Tokyo.

Agreed.  Keep in mind we only really need it for the architectures
that need to set NET_IP_ALIGN so we may want to end up wrapping the
code in ifndef checks for HAVE_EFFICIENT_UNALIGNED_ACCESS.

- Alex

^ permalink raw reply

* Re: Alignment issues with freescale FEC driver
From: David Miller @ 2016-09-24  2:43 UTC (permalink / raw)
  To: eric
  Cc: edumazet, linux-arm-kernel, netdev, rmk+kernel, fugang.duan,
	troy.kisky, otavio, cjb.sw.nospam
In-Reply-To: <0fe7a310-2d2f-4fca-d698-85d66122d91c@nelint.com>

From: Eric Nelson <eric@nelint.com>
Date: Fri, 23 Sep 2016 10:33:29 -0700

> Since the hardware requires longword alignment for its' DMA transfers,
> aligning the IP header will require a memcpy, right?

I wish hardware designers didn't do this.

There is no conflict between DMA alignment and properly offseting
the packet data by two bytes.

All hardware designers have to do is allow 2 padding bytes to be
emitted by the chip before the actual packet data.

Then the longword or whatever DMA transfer alignment is met
whilst still giving the necessary flexibility for where the
packet data lands.

^ permalink raw reply

* Re: Alignment issues with freescale FEC driver
From: David Miller @ 2016-09-24  2:45 UTC (permalink / raw)
  To: eric
  Cc: andrew, edumazet, fugang.duan, otavio, netdev, troy.kisky,
	rmk+kernel, cjb.sw.nospam, linux-arm-kernel
In-Reply-To: <bc19d934-f50b-50d6-0f4f-ecfcb8a2a1c9@nelint.com>

From: Eric Nelson <eric@nelint.com>
Date: Fri, 23 Sep 2016 11:35:17 -0700

> From the i.MX6DQ reference manual, bit 7 of ENET_RACC says this:
> 
> "RX FIFO Shift-16
> 
> When this field is set, the actual frame data starts at bit 16 of the first
> word read from the RX FIFO aligning the Ethernet payload on a
> 32-bit boundary."
> 
> Same for the i.MX6UL.
> 
> I'm not sure what it will take to use this, but it seems to be exactly
> what we're looking for.

+1

^ permalink raw reply

* Re: [Intel-wired-lan] [PATCH net-next v2 1/2] i40e: remove superfluous I40E_DEBUG_USER statement
From: Alexander Duyck @ 2016-09-24  2:48 UTC (permalink / raw)
  To: Stefan Assmann, David Miller; +Cc: intel-wired-lan, Netdev
In-Reply-To: <1474637458-5255-2-git-send-email-sassmann@kpanic.de>

On Fri, Sep 23, 2016 at 6:30 AM, Stefan Assmann <sassmann@kpanic.de> wrote:
> This debug statement is confusing and never set in the code. Any debug
> output should be guarded by the proper I40E_DEBUG_* statement which can
> be enabled via the debug module parameter or ethtool.
> Remove or convert the I40E_DEBUG_USER cases to I40E_DEBUG_INIT.
>
> v2: re-add setting the debug_mask in i40e_set_msglevel() so that the
> debug level can still be altered via ethtool msglvl.
>
> Signed-off-by: Stefan Assmann <sassmann@kpanic.de>
> ---
>  drivers/net/ethernet/intel/i40e/i40e_common.c  |  3 ---
>  drivers/net/ethernet/intel/i40e/i40e_debugfs.c |  6 -----
>  drivers/net/ethernet/intel/i40e/i40e_ethtool.c |  3 +--
>  drivers/net/ethernet/intel/i40e/i40e_main.c    | 35 +++++++++++++-------------
>  drivers/net/ethernet/intel/i40e/i40e_type.h    |  2 --
>  5 files changed, 18 insertions(+), 31 deletions(-)
>
> diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c
> index 2154a34..8ccb09c 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e_common.c
> +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c
> @@ -3207,9 +3207,6 @@ static void i40e_parse_discover_capabilities(struct i40e_hw *hw, void *buff,
>                         break;
>                 case I40E_AQ_CAP_ID_MSIX:
>                         p->num_msix_vectors = number;
> -                       i40e_debug(hw, I40E_DEBUG_INIT,
> -                                  "HW Capability: MSIX vector count = %d\n",
> -                                  p->num_msix_vectors);
>                         break;
>                 case I40E_AQ_CAP_ID_VF_MSIX:
>                         p->num_msix_vectors_vf = number;

I'm assuming this is dropped because you considered it redundant with
the dump in i40e_get_capabilities.  If so it would have been nice to
see this called out in your patch description somewhere as it doesn't
jive with the rest of the patch since you are stripping something that
is using I40E_DEBUG_INIT.

> diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
> index 05cf9a7..e9c6f1c 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
> +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
> @@ -1210,12 +1210,6 @@ static ssize_t i40e_dbg_command_write(struct file *filp,
>                 u32 level;
>                 cnt = sscanf(&cmd_buf[10], "%i", &level);
>                 if (cnt) {
> -                       if (I40E_DEBUG_USER & level) {
> -                               pf->hw.debug_mask = level;
> -                               dev_info(&pf->pdev->dev,
> -                                        "set hw.debug_mask = 0x%08x\n",
> -                                        pf->hw.debug_mask);
> -                       }
>                         pf->msg_enable = level;
>                         dev_info(&pf->pdev->dev, "set msg_enable = 0x%08x\n",
>                                  pf->msg_enable);

>From what I can tell the interface is completely redundant as ethtool
can already do this.  I'd say it is okay to just remove this command
and section entirely from the debugfs interface.

> diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
> index 1835186..02f55ab 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
> +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
> @@ -987,8 +987,7 @@ static void i40e_set_msglevel(struct net_device *netdev, u32 data)
>         struct i40e_netdev_priv *np = netdev_priv(netdev);
>         struct i40e_pf *pf = np->vsi->back;
>
> -       if (I40E_DEBUG_USER & data)
> -               pf->hw.debug_mask = data;
> +       pf->hw.debug_mask = data;
>         pf->msg_enable = data;
>  }
>

So the way I view this is that I40E_DEBUG_USER appears to be a flag
that is being used to differentiate between some proprietary flags and
the standard msg level.  The problem is that msg_enable and debug_mask
are playing off of two completely different bit definitions.  For
example how much sense does it make for NETIF_F_MSG_TX_DONE to map to
I40E_DEBUG_DCB.  If anything what should probably happen here is
instead of dropping the if there probably needs to be an else.

This is one of many things on my list of items to fix since I have
come back to Intel.  It is just a matter of finding the time.
Basically what I would really prefer to see here is us move all of the
flags in i40e_debug_mask so that we didn't have any overlap with the
NETIF_F_MSG_* flags unless there is a relation between the two.

> diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
> index 61b0fc4..56369761 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e_main.c
> +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
> @@ -6665,16 +6665,19 @@ static int i40e_get_capabilities(struct i40e_pf *pf)
>                 }
>         } while (err);
>
> -       if (pf->hw.debug_mask & I40E_DEBUG_USER)
> -               dev_info(&pf->pdev->dev,
> -                        "pf=%d, num_vfs=%d, msix_pf=%d, msix_vf=%d, fd_g=%d, fd_b=%d, pf_max_q=%d num_vsi=%d\n",
> -                        pf->hw.pf_id, pf->hw.func_caps.num_vfs,
> -                        pf->hw.func_caps.num_msix_vectors,
> -                        pf->hw.func_caps.num_msix_vectors_vf,
> -                        pf->hw.func_caps.fd_filters_guaranteed,
> -                        pf->hw.func_caps.fd_filters_best_effort,
> -                        pf->hw.func_caps.num_tx_qp,
> -                        pf->hw.func_caps.num_vsis);
> +       i40e_debug(&pf->hw, I40E_DEBUG_INIT,
> +                  "HW Capabilities: PF-id[%d] num_vfs=%d, msix_pf=%d, msix_vf=%d\n",
> +                  pf->hw.pf_id,
> +                  pf->hw.func_caps.num_vfs,
> +                  pf->hw.func_caps.num_msix_vectors,
> +                  pf->hw.func_caps.num_msix_vectors_vf);
> +       i40e_debug(&pf->hw, I40E_DEBUG_INIT,
> +                  "HW Capabilities: PF-id[%d] fd_g=%d, fd_b=%d, pf_max_qp=%d num_vsis=%d\n",
> +                  pf->hw.pf_id,
> +                  pf->hw.func_caps.fd_filters_guaranteed,
> +                  pf->hw.func_caps.fd_filters_best_effort,
> +                  pf->hw.func_caps.num_tx_qp,
> +                  pf->hw.func_caps.num_vsis);
>
>  #define DEF_NUM_VSI (1 + (pf->hw.func_caps.fcoe ? 1 : 0) \
>                        + pf->hw.func_caps.num_vfs)

I'd say don't bother with this.  There isn't any point.

> @@ -8495,14 +8498,10 @@ static int i40e_sw_init(struct i40e_pf *pf)
>         int err = 0;
>         int size;
>
> -       pf->msg_enable = netif_msg_init(I40E_DEFAULT_MSG_ENABLE,
> -                               (NETIF_MSG_DRV|NETIF_MSG_PROBE|NETIF_MSG_LINK));
> -       if (debug != -1 && debug != I40E_DEFAULT_MSG_ENABLE) {
> -               if (I40E_DEBUG_USER & debug)
> -                       pf->hw.debug_mask = debug;
> -               pf->msg_enable = netif_msg_init((debug & ~I40E_DEBUG_USER),
> -                                               I40E_DEFAULT_MSG_ENABLE);
> -       }
> +       pf->msg_enable = netif_msg_init(debug,
> +                                       NETIF_MSG_DRV    |
> +                                       NETIF_MSG_PROBE  |
> +                                       NETIF_MSG_LINK);
>
>         /* Set default capability flags */
>         pf->flags = I40E_FLAG_RX_CSUM_ENABLED |

Okay so I think I now see why there is confusion about how debug is
used.  The documentation in the driver is wrong for how it worked.  It
wasn't being passed as a 0-16, somebody implemented this as a 32 bit
bitmask.  So the question becomes how to fix it.  The problem is with
the patch as it is so far we end up with pf->msg_enable being
populated but pf->hw.debug_mask never being populated.  The values you
are passing as the default don't make any sense either since they
don't really map to the same functionality in I40e.  They map to
DEBUG_INIT, DEBUG_RELEASE, and an unused bit.

> diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h
> index bd5f13b..7e88e35 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e_type.h
> +++ b/drivers/net/ethernet/intel/i40e/i40e_type.h
> @@ -85,8 +85,6 @@ enum i40e_debug_mask {
>         I40E_DEBUG_AQ_COMMAND           = 0x06000000,
>         I40E_DEBUG_AQ                   = 0x0F000000,
>
> -       I40E_DEBUG_USER                 = 0xF0000000,
> -
>         I40E_DEBUG_ALL                  = 0xFFFFFFFF
>  };
>

This end piece is where the problem really lies.  The problem
statement for this would essentially be that the i40e driver uses the
debug module parameter in a non-standard way.  It is using a tg3 style
bitmask to populate the fields, but then documenting it and coding
part of it like it is expecting the default debug usage.  To top it
off it is doing the same kind of nonsense with the ethtool msg level
interface.

The one piece we probably need with all this in order to really "fix"
the issue and still maintain some sense of functionality would be to
look at adding something that would populate pf->hw.debug_mask.  I'm
half tempted to say that we could try adding another module parameter
named i40e_debug that we could use like tg3 does with tg3_debug and
change the debugfs interface to only modify that instead of messing
with the msg level, but the fact is that would probably just be more
confusing.  For now what I would suggest doing is just splitting
msg_enable and pf->hw.debug_mask and for now just default the value of
pf->hw.debug_mask to I40E_DEFAULT_MSG_ENABLE.  That way in a week or
two after netdev/netconf we will hopefully had a chance to hash this
all out and can find a better way to solve this.

- Alex

^ permalink raw reply

* Re: [PATCH] Revert "net: ethernet: bcmgenet: use phydev from struct net_device"
From: David Miller @ 2016-09-24  2:51 UTC (permalink / raw)
  To: jaedon.shin; +Cc: f.fainelli, tremyfr, andrew, netdev
In-Reply-To: <20160923210819.5753-1-jaedon.shin@gmail.com>

From: Jaedon Shin <jaedon.shin@gmail.com>
Date: Sat, 24 Sep 2016 06:08:19 +0900

> This reverts commit 62469c76007e ("net: ethernet: bcmgenet: use phydev
> from struct net_device")
> 
> without this patch, we call twice bcmgenet_mii_reset, and that is intended:
> - first time from bcmgenet_power_up() to make sure the PHY is initialized
>   *before* we get to initialize the UniMAC, this is critical
> - second time from bcmgenet_mii_probe(), through the normal phy_init_hw()
> 
> with this patch, we only get to call bcmgenet_mii_reset once, in
> bcmgenet_mii_probe() because the first time in bcmgenet_power_up(),
> dev->phydev is NULL, because of a prior call to phy_disconnect() in
> bcmgenet_close(), unfortunately, there has been MAC activity, so the PHY
> gets in a bad state
> 
> Signed-off-by: Jaedon Shin <jaedon.shin@gmail.com>

This is needed by the ksettings commit that happened right after this
one, so if you want this reverted you have to revert both commits.

^ permalink raw reply

* Re: [Intel-wired-lan] [PATCH net-next v2 2/2] i40e: fix setting debug parameter early
From: Alexander Duyck @ 2016-09-24  2:51 UTC (permalink / raw)
  To: Stefan Assmann; +Cc: intel-wired-lan, Netdev, David Miller
In-Reply-To: <1474637458-5255-3-git-send-email-sassmann@kpanic.de>

On Fri, Sep 23, 2016 at 6:30 AM, Stefan Assmann <sassmann@kpanic.de> wrote:
> pf->msg_enable is a bitmask, therefore assigning the value of the
> "debug" parameter is wrong. It is initialized again later in
> i40e_sw_init() so it didn't cause any problem, except that we missed
> early debug messages. Moved the initialization and assigned
> pf->hw.debug_mask the bitmask as that's what the driver actually uses
> in i40e_debug(). Otherwise the debug parameter is just a noop.
>
> Fixes: 5b5faa4 ("i40e: enable debug earlier")
>
> Signed-off-by: Stefan Assmann <sassmann@kpanic.de>
> ---
>  drivers/net/ethernet/intel/i40e/i40e_main.c | 16 +++++++---------
>  1 file changed, 7 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
> index 56369761..f972f0d 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e_main.c
> +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
> @@ -8498,11 +8498,6 @@ static int i40e_sw_init(struct i40e_pf *pf)
>         int err = 0;
>         int size;
>
> -       pf->msg_enable = netif_msg_init(debug,
> -                                       NETIF_MSG_DRV    |
> -                                       NETIF_MSG_PROBE  |
> -                                       NETIF_MSG_LINK);
> -
>         /* Set default capability flags */
>         pf->flags = I40E_FLAG_RX_CSUM_ENABLED |
>                     I40E_FLAG_MSI_ENABLED     |
> @@ -10812,10 +10807,13 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
>         mutex_init(&hw->aq.asq_mutex);
>         mutex_init(&hw->aq.arq_mutex);
>
> -       if (debug != -1) {
> -               pf->msg_enable = pf->hw.debug_mask;
> -               pf->msg_enable = debug;
> -       }
> +       /* enable debug prints if requested */
> +       pf->msg_enable = netif_msg_init(debug,
> +                                       NETIF_MSG_DRV   |
> +                                       NETIF_MSG_PROBE |
> +                                       NETIF_MSG_LINK);
> +       if (debug != -1)
> +               pf->hw.debug_mask = pf->msg_enable;
>
>         /* do a special CORER for clearing PXE mode once at init */
>         if (hw->revision_id == 0 &&

The patch is broken, mainly because the code was already broken.  The
flags in pf->hw.debug_mask are in no way related to pf->msg_enable.
For now just use the default mask provided to populate
pf->hw.debug_mask and then the msg_enable portion is fine.

- Alex

^ permalink raw reply

* RE: Alignment issues with freescale FEC driver
From: Andy Duan @ 2016-09-24  5:13 UTC (permalink / raw)
  To: David Miller, eric@nelint.com
  Cc: andrew@lunn.ch, edumazet@google.com, otavio@ossystems.com.br,
	netdev@vger.kernel.org, troy.kisky@boundarydevices.com,
	rmk+kernel@arm.linux.org.uk, cjb.sw.nospam@gmail.com,
	linux-arm-kernel@lists.infradead.org
In-Reply-To: <20160923.224553.1824171056324385383.davem@davemloft.net>

From: David Miller <davem@davemloft.net> Sent: Saturday, September 24, 2016 10:46 AM
> To: eric@nelint.com
> Cc: andrew@lunn.ch; edumazet@google.com; Andy Duan
> <fugang.duan@nxp.com>; otavio@ossystems.com.br;
> netdev@vger.kernel.org; troy.kisky@boundarydevices.com;
> rmk+kernel@arm.linux.org.uk; cjb.sw.nospam@gmail.com; linux-arm-
> kernel@lists.infradead.org
> Subject: Re: Alignment issues with freescale FEC driver
> 
> From: Eric Nelson <eric@nelint.com>
> Date: Fri, 23 Sep 2016 11:35:17 -0700
> 
> > From the i.MX6DQ reference manual, bit 7 of ENET_RACC says this:
> >
> > "RX FIFO Shift-16
> >
> > When this field is set, the actual frame data starts at bit 16 of the
> > first word read from the RX FIFO aligning the Ethernet payload on a
> > 32-bit boundary."
> >
> > Same for the i.MX6UL.
> >
> > I'm not sure what it will take to use this, but it seems to be exactly
> > what we're looking for.
> 
> +1

RACC[SHIFT16] just instructs the MAC to write two additional bytes in front of each frame received into the RX FIFO to align
the Ethernet payload on a 32-bit boundary.
Eric's patch "net: fec: support RRACC_SHIFT16 to align IP header" works fine.

For the alignment issues, that is introduced by commit 1b7bde6d6 and c259c132a in net-next tree. Before these commits, no alignment issue.

How to fix the issue:
Solution1:  to enable HW RRACC_SHIFT16 feature (test pass):
	Eric's patch  "net: fec: support RRACC_SHIFT16 to align IP header".
Solution2: include the correct prefetch() header (test pass):
	--- a/drivers/net/ethernet/freescale/fec_main.c
	+++ b/drivers/net/ethernet/freescale/fec_main.c
	@@ -59,7 +59,7 @@
	 #include <linux/pinctrl/consumer.h>
 	#include <linux/pm_runtime.h>
 	#include <linux/busfreq-imx.h>
	-#include <linux/prefetch.h>
	+#include <asm/processor.h>
Solution3: use __netdev_alloc_skb_ip_align() instead of netdev_alloc_skb(). 
	     Or: still use the previous method before commit 1b7bde6d6:
		skb = netdev_alloc_skb(ndev, pkt_len - 4 + NET_IP_ALIGN);
		skb_reserve(skb, NET_IP_ALIGN);

Comparing these solutions:
	From sw effort and performance, I think these are the similar.  Enable RRACC_SHIFT16 doesn't take extra advantage.

Correct my if I am wrong. Thanks.

Regards,
Andy

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox