Netdev List
 help / color / mirror / Atom feed
* Re: [patch net-next 04/12] ipmr: Send FIB notifications on MFC and VIF entries
From: Nikolay Aleksandrov @ 2017-09-21 11:48 UTC (permalink / raw)
  To: Jiri Pirko, netdev; +Cc: davem, yotamg, idosch, mlxsw
In-Reply-To: <20170921064338.1282-5-jiri@resnulli.us>

On 21/09/17 09:43, Jiri Pirko wrote:
> From: Yotam Gigi <yotamg@mellanox.com>
> 
> Use the newly introduced notification chain to send events upon VIF and MFC
> addition and deletion. The MFC notifications are sent only on resolved MFC
> entries, as unresolved cannot be offloaded.
> 
> Signed-off-by: Yotam Gigi <yotamg@mellanox.com>
> Reviewed-by: Ido Schimmel <idosch@mellanox.com>
> Signed-off-by: Jiri Pirko <jiri@mellanox.com>
> ---
>  net/ipv4/ipmr.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 53 insertions(+)
> 

LGTM, I only wish we could consolidate all of these call_ipmr_mfc_entry_notifiers()
calls inside mroute_netlink_event() but it will need an additional argument for the
ADD vs REPLACE cases. Anyway,

Reviewed-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>

> diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
> index 9d331a74..7891d95 100644
> --- a/net/ipv4/ipmr.c
> +++ b/net/ipv4/ipmr.c
> @@ -627,6 +627,27 @@ static int call_ipmr_vif_entry_notifier(struct notifier_block *nb,
>  	return call_fib_notifier(nb, net, event_type, &info.info);
>  }
>  
> +static int call_ipmr_vif_entry_notifiers(struct net *net,
> +					 enum fib_event_type event_type,
> +					 struct vif_device *vif,
> +					 vifi_t vif_index, u32 tb_id)
> +{
> +	struct vif_entry_notifier_info info = {
> +		.info = {
> +			.family = RTNL_FAMILY_IPMR,
> +			.net = net,
> +		},
> +		.dev = vif->dev,
> +		.vif_index = vif_index,
> +		.vif_flags = vif->flags,
> +		.tb_id = tb_id,
> +	};
> +
> +	ASSERT_RTNL();
> +	net->ipv4.ipmr_seq++;
> +	return call_fib_notifiers(net, event_type, &info.info);
> +}
> +
>  static int call_ipmr_mfc_entry_notifier(struct notifier_block *nb,
>  					struct net *net,
>  					enum fib_event_type event_type,
> @@ -644,6 +665,24 @@ static int call_ipmr_mfc_entry_notifier(struct notifier_block *nb,
>  	return call_fib_notifier(nb, net, event_type, &info.info);
>  }
>  
> +static int call_ipmr_mfc_entry_notifiers(struct net *net,
> +					 enum fib_event_type event_type,
> +					 struct mfc_cache *mfc, u32 tb_id)
> +{
> +	struct mfc_entry_notifier_info info = {
> +		.info = {
> +			.family = RTNL_FAMILY_IPMR,
> +			.net = net,
> +		},
> +		.mfc = mfc,
> +		.tb_id = tb_id
> +	};
> +
> +	ASSERT_RTNL();
> +	net->ipv4.ipmr_seq++;
> +	return call_fib_notifiers(net, event_type, &info.info);
> +}
> +
>  /**
>   *	vif_delete - Delete a VIF entry
>   *	@notify: Set to 1, if the caller is a notifier_call
> @@ -651,6 +690,7 @@ static int call_ipmr_mfc_entry_notifier(struct notifier_block *nb,
>  static int vif_delete(struct mr_table *mrt, int vifi, int notify,
>  		      struct list_head *head)
>  {
> +	struct net *net = read_pnet(&mrt->net);
>  	struct vif_device *v;
>  	struct net_device *dev;
>  	struct in_device *in_dev;
> @@ -660,6 +700,10 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify,
>  
>  	v = &mrt->vif_table[vifi];
>  
> +	if (VIF_EXISTS(mrt, vifi))
> +		call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_DEL, v, vifi,
> +					      mrt->id);
> +
>  	write_lock_bh(&mrt_lock);
>  	dev = v->dev;
>  	v->dev = NULL;
> @@ -909,6 +953,7 @@ static int vif_add(struct net *net, struct mr_table *mrt,
>  	if (vifi+1 > mrt->maxvif)
>  		mrt->maxvif = vifi+1;
>  	write_unlock_bh(&mrt_lock);
> +	call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD, v, vifi, mrt->id);
>  	return 0;
>  }
>  
> @@ -1209,6 +1254,7 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
>  
>  static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent)
>  {
> +	struct net *net = read_pnet(&mrt->net);
>  	struct mfc_cache *c;
>  
>  	/* The entries are added/deleted only under RTNL */
> @@ -1220,6 +1266,7 @@ static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent)
>  		return -ENOENT;
>  	rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params);
>  	list_del_rcu(&c->list);
> +	call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c, mrt->id);
>  	mroute_netlink_event(mrt, c, RTM_DELROUTE);
>  	ipmr_cache_put(c);
>  
> @@ -1248,6 +1295,8 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
>  		if (!mrtsock)
>  			c->mfc_flags |= MFC_STATIC;
>  		write_unlock_bh(&mrt_lock);
> +		call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, c,
> +					      mrt->id);
>  		mroute_netlink_event(mrt, c, RTM_NEWROUTE);
>  		return 0;
>  	}
> @@ -1297,6 +1346,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
>  		ipmr_cache_resolve(net, mrt, uc, c);
>  		ipmr_cache_free(uc);
>  	}
> +	call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD, c, mrt->id);
>  	mroute_netlink_event(mrt, c, RTM_NEWROUTE);
>  	return 0;
>  }
> @@ -1304,6 +1354,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
>  /* Close the multicast socket, and clear the vif tables etc */
>  static void mroute_clean_tables(struct mr_table *mrt, bool all)
>  {
> +	struct net *net = read_pnet(&mrt->net);
>  	struct mfc_cache *c, *tmp;
>  	LIST_HEAD(list);
>  	int i;
> @@ -1322,6 +1373,8 @@ static void mroute_clean_tables(struct mr_table *mrt, bool all)
>  			continue;
>  		rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params);
>  		list_del_rcu(&c->list);
> +		call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c,
> +					      mrt->id);
>  		mroute_netlink_event(mrt, c, RTM_DELROUTE);
>  		ipmr_cache_put(c);
>  	}
> 

^ permalink raw reply

* Re: [patch net-next 02/12] ipmr: Add reference count to MFC entries
From: Nikolay Aleksandrov @ 2017-09-21 11:56 UTC (permalink / raw)
  To: Jiri Pirko, netdev; +Cc: davem, yotamg, idosch, mlxsw
In-Reply-To: <20170921064338.1282-3-jiri@resnulli.us>

On 21/09/17 09:43, Jiri Pirko wrote:
> From: Yotam Gigi <yotamg@mellanox.com>
> 
> Next commits will introduce MFC notifications through the atomic
> fib_notification chain, thus allowing modules to be aware of MFC entries.
> 
> Due to the fact that modules may need to hold a reference to an MFC entry,
> add reference count to MFC entries to prevent them from being freed while
> these modules use them.
> 
> The reference counting is done only on resolved MFC entries currently.
> 
> Signed-off-by: Yotam Gigi <yotamg@mellanox.com>
> Reviewed-by: Ido Schimmel <idosch@mellanox.com>
> Signed-off-by: Jiri Pirko <jiri@mellanox.com>
> ---
>  include/linux/mroute.h | 20 ++++++++++++++++++++
>  net/ipv4/ipmr.c        |  8 +++++---
>  2 files changed, 25 insertions(+), 3 deletions(-)
> 
> diff --git a/include/linux/mroute.h b/include/linux/mroute.h
> index d7f6333..2f88e3d 100644
> --- a/include/linux/mroute.h
> +++ b/include/linux/mroute.h
> @@ -138,6 +138,7 @@ struct mfc_cache {
>  			unsigned long wrong_if;
>  			unsigned long lastuse;
>  			unsigned char ttls[MAXVIFS];
> +			refcount_t refcount;

There's a struct comment above that has a short description for each entry,
I know it seems redundant, but could you please add one for this ?

>  		} res;
>  	} mfc_un;
>  	struct list_head list;
> @@ -148,4 +149,23 @@ struct rtmsg;
>  int ipmr_get_route(struct net *net, struct sk_buff *skb,
>  		   __be32 saddr, __be32 daddr,
>  		   struct rtmsg *rtm, u32 portid);
> +
> +#ifdef CONFIG_IP_MROUTE
> +void ipmr_cache_free(struct mfc_cache *mfc_cache);
> +#else
> +static inline void ipmr_cache_free(struct mfc_cache *mfc_cache)
> +{
> +}
> +#endif
> +
> +static inline void ipmr_cache_put(struct mfc_cache *c)
> +{
> +	if (refcount_dec_and_test(&c->mfc_un.res.refcount))
> +		ipmr_cache_free(c);
> +}
> +static inline void ipmr_cache_hold(struct mfc_cache *c)
> +{
> +	refcount_inc(&c->mfc_un.res.refcount);
> +}
> +
>  #endif
> diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
> index c9b3e6e..86dc5f9 100644
> --- a/net/ipv4/ipmr.c
> +++ b/net/ipv4/ipmr.c
> @@ -652,10 +652,11 @@ static void ipmr_cache_free_rcu(struct rcu_head *head)
>  	kmem_cache_free(mrt_cachep, c);
>  }
>  
> -static inline void ipmr_cache_free(struct mfc_cache *c)
> +void ipmr_cache_free(struct mfc_cache *c)
>  {
>  	call_rcu(&c->rcu, ipmr_cache_free_rcu);
>  }
> +EXPORT_SYMBOL(ipmr_cache_free);
>  
>  /* Destroy an unresolved cache entry, killing queued skbs
>   * and reporting error to netlink readers.
> @@ -949,6 +950,7 @@ static struct mfc_cache *ipmr_cache_alloc(void)
>  	if (c) {
>  		c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
>  		c->mfc_un.res.minvif = MAXVIFS;
> +		refcount_set(&c->mfc_un.res.refcount, 1);
>  	}
>  	return c;
>  }
> @@ -1162,7 +1164,7 @@ static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent)
>  	rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params);
>  	list_del_rcu(&c->list);
>  	mroute_netlink_event(mrt, c, RTM_DELROUTE);
> -	ipmr_cache_free(c);
> +	ipmr_cache_put(c);
>  
>  	return 0;
>  }
> @@ -1264,7 +1266,7 @@ static void mroute_clean_tables(struct mr_table *mrt, bool all)
>  		rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params);
>  		list_del_rcu(&c->list);
>  		mroute_netlink_event(mrt, c, RTM_DELROUTE);
> -		ipmr_cache_free(c);
> +		ipmr_cache_put(c);
>  	}
>  
>  	if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
> 

^ permalink raw reply

* Re: [PATCH net-next 2/3] net: ethernet: socionext: add AVE ethernet driver
From: Kunihiko Hayashi @ 2017-09-21 12:27 UTC (permalink / raw)
  To: Florian Fainelli, netdev
  Cc: David S. Miller, Andrew Lunn, Rob Herring, Mark Rutland,
	linux-arm-kernel, linux-kernel, devicetree, Masahiro Yamada,
	Masami Hiramatsu, Jassi Brar
In-Reply-To: <20170911155555.6719.4A936039@socionext.com>

On Mon, 11 Sep 2017 15:55:56 +0900 <hayashi.kunihiko@socionext.com> wrote:

> > > +static int ave_set_rxdesc(struct net_device *ndev, int entry)
> > > +{
> > > +	struct ave_private *priv = netdev_priv(ndev);
> > > +	struct sk_buff *skb;
> > > +	unsigned long align;
> > > +	dma_addr_t paddr;
> > > +	void *buffptr;
> > > +	int ret = 0;
> > > +
> > > +	skb = priv->rx.desc[entry].skbs;
> > > +	if (!skb) {
> > > +		skb = netdev_alloc_skb_ip_align(ndev,
> > > +						AVE_MAX_ETHFRAME + NET_SKB_PAD);
> > > +		if (!skb) {
> > > +			netdev_err(ndev, "can't allocate skb for Rx\n");
> > > +			return -ENOMEM;
> > > +		}
> > > +	}
> > > +
> > > +	/* set disable to cmdsts */
> > > +	ave_wdesc(ndev, AVE_DESCID_RX, entry, 0, AVE_STS_INTR | AVE_STS_OWN);
> > > +
> > > +	/* align skb data for cache size */
> > > +	align = (unsigned long)skb_tail_pointer(skb) & (NET_SKB_PAD - 1);
> > > +	align = NET_SKB_PAD - align;
> > > +	skb_reserve(skb, align);
> > > +	buffptr = (void *)skb_tail_pointer(skb);
> > 
> > Are you positive you need this? Because by default, the networking stack
> > will align to the maximum between your L1 cache line size and 64 bytes,
> > which should be a pretty good alignment guarantee.
> 
> Now if L1 cache line size is 128,
> the skb buffer is also aligned to 128, isn't it?
> So this code doesn't make sense.

Although the above cache-alignment operation isn't necessary,
we should add the address adjustment because of the restriction of the hardware
specification.

The netdev_alloc_skb_ip_align() allocates the cache-aligned buffer
and add 2 byte to skb->data by skb_reserve(skb, NET_IP_ALIGN).
Then skb->data points to "aligned address + 2 byte".

When we call dma_map_single() with skb->data, it might return the aligned address
and there might not be 2 byte space.

On the other hand, according to the hardware specification,
the Rx buffer address set to the descriptor is assumed that:
 - the Rx address is 4 byte aligned,
 - the Rx address begins with 2 byte headroom, data will be put from (buffer+2).

Therefore, to make headroom in front of returned address from ave_dma_map(),
I think that the buffer address should be adjusted like that:

    skb = netdev_alloc_skb_ip_align(ndev, AVE_MAX_ETHFRAME);

    paddr = ave_dma_map(ndev, &priv->rx.desc[entry],
		skb->data - NET_IP_ALIGN,
		AVE_MAX_ETHFRAME + NET_IP_ALIGN, DMA_FROM_DEVICE);

    ave_wdesc_addr(ndev, AVE_DESCID_RX, entry, 4, paddr);

I'll apply the code to next patch.

BTW, since the Tx buffer address doesn't have any restrictions, the adjustment
like this isn't necessary.


> > > +
> > > +	/* enable clock */
> > > +	priv->clk = devm_clk_get(dev, NULL);
> > > +	if (IS_ERR(priv->clk))
> > > +		priv->clk = NULL;
> > > +	clk_prepare_enable(priv->clk);
> > 
> > Same here with the clock, the block is clocked, so it can consume some
> > amount of power, just do the necessary HW initialization with the clock
> > enabled, then defer until ndo_open() before turning it back on.

There are a number of the functions that needs clock enabled and "block reset"
operations, like mdiobus_register(), phy_connect(), and so on.

I tried to move such functions to ndo_open() to defer clock enabled until ndo_open().
However, the driver didn't work for some reasons of hardware restriction.
I think it's hard to change this sequence.

---
Best Regards,
Kunihiko Hayashi

^ permalink raw reply

* [PATCH 1/1] net: wan : hdlc: use setup_timer() helper
From: Allen Pais @ 2017-09-21 12:47 UTC (permalink / raw)
  To: linux-kernel; +Cc: khc, netdev, Allen Pais

    Use setup_timer function instead of initializing timer with the
    function and data fields.

Signed-off-by: Allen Pais <allen.lkml@gmail.com>
---
 drivers/net/wan/hdlc_fr.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c
index 78596e4..425a47f 100644
--- a/drivers/net/wan/hdlc_fr.c
+++ b/drivers/net/wan/hdlc_fr.c
@@ -1003,11 +1003,10 @@ static void fr_start(struct net_device *dev)
 		state(hdlc)->n391cnt = 0;
 		state(hdlc)->txseq = state(hdlc)->rxseq = 0;
 
-		init_timer(&state(hdlc)->timer);
+		setup_timer(&state(hdlc)->timer, fr_timer,
+			    (unsigned long)dev);
 		/* First poll after 1 s */
 		state(hdlc)->timer.expires = jiffies + HZ;
-		state(hdlc)->timer.function = fr_timer;
-		state(hdlc)->timer.data = (unsigned long)dev;
 		add_timer(&state(hdlc)->timer);
 	} else
 		fr_set_link_state(1, dev);
-- 
2.7.4

^ permalink raw reply related

* [PATCH 1/1] net: usb: catc: use setup_timer() helper
From: Allen Pais @ 2017-09-21 12:54 UTC (permalink / raw)
  To: linux-kernel; +Cc: linux-usb, netdev, Allen Pais

    Use setup_timer function instead of initializing timer with the
    function and data fields.

Signed-off-by: Allen Pais <allen.lkml@gmail.com>
---
 drivers/net/usb/catc.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/net/usb/catc.c b/drivers/net/usb/catc.c
index dbc9031..aeb62e1 100644
--- a/drivers/net/usb/catc.c
+++ b/drivers/net/usb/catc.c
@@ -805,9 +805,7 @@ static int catc_probe(struct usb_interface *intf, const struct usb_device_id *id
 	spin_lock_init(&catc->tx_lock);
 	spin_lock_init(&catc->ctrl_lock);
 
-	init_timer(&catc->timer);
-	catc->timer.data = (long) catc;
-	catc->timer.function = catc_stats_timer;
+	setup_timer(&catc->timer, catc_stats_timer, (long)catc);
 
 	catc->ctrl_urb = usb_alloc_urb(0, GFP_KERNEL);
 	catc->tx_urb = usb_alloc_urb(0, GFP_KERNEL);
-- 
2.7.4

^ permalink raw reply related

* [PATCH 1/1] net: ti: netcp: use setup_timer
From: Allen Pais @ 2017-09-21 13:02 UTC (permalink / raw)
  To: linux-kernel; +Cc: w-kwok2, m-karicheri2, netdev, Allen Pais

    Use setup_timer function instead of initializing timer with the
    function and data fields.

Signed-off-by: Allen Pais <allen.lkml@gmail.com>
---
 drivers/net/ethernet/ti/netcp_ethss.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c
index 28cb38a..4ad8216 100644
--- a/drivers/net/ethernet/ti/netcp_ethss.c
+++ b/drivers/net/ethernet/ti/netcp_ethss.c
@@ -3616,9 +3616,8 @@ static int gbe_probe(struct netcp_device *netcp_device, struct device *dev,
 	}
 	spin_unlock_bh(&gbe_dev->hw_stats_lock);
 
-	init_timer(&gbe_dev->timer);
-	gbe_dev->timer.data	 = (unsigned long)gbe_dev;
-	gbe_dev->timer.function = netcp_ethss_timer;
+	setup_timer(&gbe_dev->timer, netcp_ethss_timer,
+		    (unsigned long)gbe_dev);
 	gbe_dev->timer.expires	 = jiffies + GBE_TIMER_INTERVAL;
 	add_timer(&gbe_dev->timer);
 	*inst_priv = gbe_dev;
-- 
2.7.4

^ permalink raw reply related

* Re: [PATCH net-next] bpf: Optimize lpm trie delete
From: Craig Gallek @ 2017-09-21 13:13 UTC (permalink / raw)
  To: Daniel Mack; +Cc: Alexei Starovoitov, Daniel Borkmann, David S . Miller, netdev
In-Reply-To: <dc09f044-be69-a2a9-bf77-331ce558eee9@zonque.org>

On Wed, Sep 20, 2017 at 6:56 PM, Daniel Mack <daniel@zonque.org> wrote:
> On 09/20/2017 08:51 PM, Craig Gallek wrote:
>> On Wed, Sep 20, 2017 at 12:51 PM, Daniel Mack <daniel@zonque.org> wrote:
>>> Hi Craig,
>>>
>>> Thanks, this looks much cleaner already :)
>>>
>>> On 09/20/2017 06:22 PM, Craig Gallek wrote:
>>>> diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
>>>> index 9d58a576b2ae..b5a7d70ec8b5 100644
>>>> --- a/kernel/bpf/lpm_trie.c
>>>> +++ b/kernel/bpf/lpm_trie.c
>>>> @@ -397,7 +397,7 @@ static int trie_delete_elem(struct bpf_map *map, void *_key)
>>>>       struct lpm_trie_node __rcu **trim;
>>>>       struct lpm_trie_node *node;
>>>>       unsigned long irq_flags;
>>>> -     unsigned int next_bit;
>>>> +     unsigned int next_bit = 0;
>>>
>>> This default assignment seems wrong, and I guess you only added it to
>>> squelch a compiler warning?
>> Yes, this variable is only initialized after the lookup iterations
>> below (meaning it will never be initialized the the root-removal
>> case).
>
> Right, and once set, it's only updated in case we don't have an exact
> match and try to drill down further.
>
>>> [...]
>>>
>>>> +     /* If the node has one child, we may be able to collapse the tree
>>>> +      * while removing this node if the node's child is in the same
>>>> +      * 'next bit' slot as this node was in its parent or if the node
>>>> +      * itself is the root.
>>>> +      */
>>>> +     if (trim == &trie->root) {
>>>> +             next_bit = node->child[0] ? 0 : 1;
>>>> +             rcu_assign_pointer(trie->root, node->child[next_bit]);
>>>> +             kfree_rcu(node, rcu);
>>>
>>> I don't think you should treat this 'root' case special.
>>>
>>> Instead, move the 'next_bit' assignment outside of the condition ...
>> I'm not quite sure I follow.  Are you saying do something like this:
>>
>> if (trim == &trie->root) {
>>   next_bit = node->child[0] ? 0 : 1;
>> }
>> if (rcu_access_pointer(node->child[next_bit])) {
>> ...
>>
>> This would save a couple lines of code, but I think the as-is
>> implementation is slightly easier to understand.  I don't have a
>> strong opinion either way, though.
>
> Me neither :)
>
> My idea was to set
>
>   next_bit = node->child[0] ? 0 : 1;
>
> unconditionally, because it should result in the same in both cases.
>
> It might be a bit of bike shedding, but I dislike this default
> assignment, and I believe that not relying on next_bit to be set as a
> side effect of the lookup loop makes the code a bit more readable.
>
> WDYT?
That sounds reasonable.  I'll spin a v2 today if no one else has any comments.

Thanks again,
Craig

^ permalink raw reply

* Re: Latest net-next from GIT panic
From: Paweł Staszewski @ 2017-09-21 13:18 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Wei Wang, Cong Wang, Linux Kernel Network Developers,
	Eric Dumazet
In-Reply-To: <8d2b4263-bd81-9cff-81ac-2df686cf29aa@itcare.pl>



W dniu 2017-09-21 o 13:31, Paweł Staszewski pisze:
>
>
> W dniu 2017-09-21 o 13:03, Eric Dumazet pisze:
>> OK we have two problems here
>>
>> 1) We need to unify skb_dst_force()  ( for net tree )
>>
>> 2) Vlan devices should try to correctly handle IFF_XMIT_DST_RELEASE from
>> lower device. This will considerably help your performance.
>>
>>
>> For 1), this is what I had in mind, can you try it ?
>>
>> Thanks a lot !
>>
>> diff --git a/include/net/dst.h b/include/net/dst.h
>> index 
>> 93568bd0a3520bb7402f04d90cf04ac99c81cfbe..f23851eeaad917e8dafc06b58d23a2575405c894 
>> 100644
>> --- a/include/net/dst.h
>> +++ b/include/net/dst.h
>> @@ -271,7 +271,7 @@ static inline void dst_use_noref(struct dst_entry 
>> *dst, unsigned long time)
>>   static inline struct dst_entry *dst_clone(struct dst_entry *dst)
>>   {
>>       if (dst)
>> -        atomic_inc(&dst->__refcnt);
>> +        dst_hold(dst);
>>       return dst;
>>   }
>>   @@ -311,21 +311,6 @@ static inline void skb_dst_copy(struct sk_buff 
>> *nskb, const struct sk_buff *oskb
>>       __skb_dst_copy(nskb, oskb->_skb_refdst);
>>   }
>>   -/**
>> - * skb_dst_force - makes sure skb dst is refcounted
>> - * @skb: buffer
>> - *
>> - * If dst is not yet refcounted, let's do it
>> - */
>> -static inline void skb_dst_force(struct sk_buff *skb)
>> -{
>> -    if (skb_dst_is_noref(skb)) {
>> -        WARN_ON(!rcu_read_lock_held());
>> -        skb->_skb_refdst &= ~SKB_DST_NOREF;
>> -        dst_clone(skb_dst(skb));
>> -    }
>> -}
>> -
>>   /**
>>    * dst_hold_safe - Take a reference on a dst if possible
>>    * @dst: pointer to dst entry
>> @@ -356,6 +341,23 @@ static inline void skb_dst_force_safe(struct 
>> sk_buff *skb)
>>       }
>>   }
>>   +/**
>> + * skb_dst_force - makes sure skb dst is refcounted
>> + * @skb: buffer
>> + *
>> + * If dst is not yet refcounted, let's do it
>> + */
>> +static inline void skb_dst_force(struct sk_buff *skb)
>> +{
>> +    if (skb_dst_is_noref(skb)) {
>> +        struct dst_entry *dst = skb_dst(skb);
>> +
>> +        WARN_ON(!rcu_read_lock_held());
>> +        if (!dst_hold_safe(dst))
>> +            dst = NULL;
>> +        skb->_skb_refdst = (unsigned long)dst;
>> +    }
>> +}
>>     /**
>>    *    __skb_tunnel_rx - prepare skb for rx reinsert
>>
>>
>
> Patch applied - soo far no problems - and no warnings in dmesg
>
>
ok after adding patch all is working from now for about 1 hour of normal 
traffic witc all bgp sessions connected and about 600k prefixes in kernel.

^ permalink raw reply

* [net-next 0/2] add device MTU validation check
From: Zhang Shengju @ 2017-09-21 13:32 UTC (permalink / raw)
  To: davem, willemb, stephen, netdev

This patch serial add device MTU validation check, so that only valid mtu value 
can be set when adding new device.

Zhang Shengju (2):
  dummy: add device MTU validation check
  ifb: add device MTU validation check

 drivers/net/dummy.c | 8 ++++++++
 drivers/net/ifb.c   | 8 ++++++++
 2 files changed, 16 insertions(+)

-- 
1.8.3.1

^ permalink raw reply

* [net-next 2/2] ifb: add device MTU validation check
From: Zhang Shengju @ 2017-09-21 13:32 UTC (permalink / raw)
  To: davem, willemb, stephen, netdev
In-Reply-To: <1506000722-40095-1-git-send-email-zhangshengju@cmss.chinamobile.com>

Currently, any mtu value can be assigned when adding a new ifb device:
[~]# ip link add name ifb2 mtu 100000 type ifb
[~]# ip link show ifb2
18: ifb2: <BROADCAST,NOARP> mtu 100000 qdisc noop state DOWN mode DEFAULT group default qlen 32
    link/ether 7a:bf:f4:63:da:d1 brd ff:ff:ff:ff:ff:ff

This patch adds device MTU validation check.

Signed-off-by: Zhang Shengju <zhangshengju@cmss.chinamobile.com>
---
 drivers/net/ifb.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c
index 8870bd2..ce84ad2 100644
--- a/drivers/net/ifb.c
+++ b/drivers/net/ifb.c
@@ -282,6 +282,14 @@ static int ifb_validate(struct nlattr *tb[], struct nlattr *data[],
 		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
 			return -EADDRNOTAVAIL;
 	}
+
+	if (tb[IFLA_MTU]) {
+		u32 mtu = nla_get_u32(tb[IFLA_MTU]);
+
+		if (mtu < ETH_MIN_MTU || mtu > ETH_DATA_LEN)
+			return -EINVAL;
+	}
+
 	return 0;
 }
 
-- 
1.8.3.1

^ permalink raw reply related

* [net-next 1/2] dummy: add device MTU validation check
From: Zhang Shengju @ 2017-09-21 13:32 UTC (permalink / raw)
  To: davem, willemb, stephen, netdev
In-Reply-To: <1506000722-40095-1-git-send-email-zhangshengju@cmss.chinamobile.com>

Currently, any mtu value can be assigned when adding a new dummy device:
[~]# ip link add name dummy1 mtu 100000 type dummy
[~]# ip link show dummy1
15: dummy1: <BROADCAST,NOARP> mtu 100000 qdisc noop state DOWN mode DEFAULT group default qlen 1000
    link/ether 0a:61:6b:16:14:ce brd ff:ff:ff:ff:ff:ff

This patch adds device MTU validation check.

Signed-off-by: Zhang Shengju <zhangshengju@cmss.chinamobile.com>
---
 drivers/net/dummy.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c
index e31ab3b..0276b2b 100644
--- a/drivers/net/dummy.c
+++ b/drivers/net/dummy.c
@@ -365,6 +365,14 @@ static int dummy_validate(struct nlattr *tb[], struct nlattr *data[],
 		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
 			return -EADDRNOTAVAIL;
 	}
+
+	if (tb[IFLA_MTU]) {
+		u32 mtu = nla_get_u32(tb[IFLA_MTU]);
+
+		if (mtu > ETH_MAX_MTU)
+			return -EINVAL;
+	}
+
 	return 0;
 }
 
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH net-next] net: mvpp2: phylink support
From: Antoine Tenart @ 2017-09-21 13:45 UTC (permalink / raw)
  To: davem, linux
  Cc: Antoine Tenart, andrew, gregory.clement, thomas.petazzoni,
	miquel.raynal, nadavh, linux-kernel, mw, stefanc, netdev

Convert the PPv2 driver to use phylink, which models the MAC to PHY
link. The phylink support is made such a way the GoP link IRQ can still
be used: the two modes are incompatible and the GoP link IRQ will be
used if no PHY is described in the device tree. This is the same
behaviour as before.

Signed-off-by: Antoine Tenart <antoine.tenart@free-electrons.com>
---
 drivers/net/ethernet/marvell/Kconfig |   1 +
 drivers/net/ethernet/marvell/mvpp2.c | 502 +++++++++++++++++++++--------------
 2 files changed, 303 insertions(+), 200 deletions(-)

diff --git a/drivers/net/ethernet/marvell/Kconfig b/drivers/net/ethernet/marvell/Kconfig
index da6fb825afea..991138b8dfba 100644
--- a/drivers/net/ethernet/marvell/Kconfig
+++ b/drivers/net/ethernet/marvell/Kconfig
@@ -86,6 +86,7 @@ config MVPP2
 	depends on ARCH_MVEBU || COMPILE_TEST
 	depends on HAS_DMA
 	select MVMDIO
+	select PHYLINK
 	---help---
 	  This driver supports the network interface units in the
 	  Marvell ARMADA 375, 7K and 8K SoCs.
diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c
index 8041d692db3c..5fb7e76ee128 100644
--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c
@@ -28,6 +28,7 @@
 #include <linux/of_address.h>
 #include <linux/of_device.h>
 #include <linux/phy.h>
+#include <linux/phylink.h>
 #include <linux/phy/phy.h>
 #include <linux/clk.h>
 #include <linux/hrtimer.h>
@@ -341,6 +342,7 @@
 #define     MVPP2_GMAC_FORCE_LINK_PASS		BIT(1)
 #define     MVPP2_GMAC_IN_BAND_AUTONEG		BIT(2)
 #define     MVPP2_GMAC_IN_BAND_AUTONEG_BYPASS	BIT(3)
+#define     MVPP2_GMAC_IN_BAND_RESTART_AN	BIT(4)
 #define     MVPP2_GMAC_CONFIG_MII_SPEED	BIT(5)
 #define     MVPP2_GMAC_CONFIG_GMII_SPEED	BIT(6)
 #define     MVPP2_GMAC_AN_SPEED_EN		BIT(7)
@@ -350,6 +352,12 @@
 #define     MVPP2_GMAC_AN_DUPLEX_EN		BIT(13)
 #define MVPP2_GMAC_STATUS0			0x10
 #define     MVPP2_GMAC_STATUS0_LINK_UP		BIT(0)
+#define	    MVPP2_GMAC_STATUS0_GMII_SPEED	BIT(1)
+#define	    MVPP2_GMAC_STATUS0_MII_SPEED	BIT(2)
+#define	    MVPP2_GMAC_STATUS0_FULL_DUPLEX	BIT(3)
+#define     MVPP2_GMAC_STATUS0_RX_PAUSE		BIT(6)
+#define     MVPP2_GMAC_STATUS0_TX_PAUSE		BIT(7)
+#define     MVPP2_GMAC_STATUS0_AN_COMPLETE	BIT(11)
 #define MVPP2_GMAC_PORT_FIFO_CFG_1_REG		0x1c
 #define     MVPP2_GMAC_TX_FIFO_MIN_TH_OFFS	6
 #define     MVPP2_GMAC_TX_FIFO_MIN_TH_ALL_MASK	0x1fc0
@@ -878,12 +886,11 @@ struct mvpp2_port {
 	u16 rx_ring_size;
 	struct mvpp2_pcpu_stats __percpu *stats;
 
+	struct device_node *of_node;
+
 	phy_interface_t phy_interface;
-	struct device_node *phy_node;
+	struct phylink *phylink;
 	struct phy *comphy;
-	unsigned int link;
-	unsigned int duplex;
-	unsigned int speed;
 
 	struct mvpp2_bm_pool *pool_long;
 	struct mvpp2_bm_pool *pool_short;
@@ -4716,13 +4723,14 @@ static void mvpp2_port_periodic_xon_disable(struct mvpp2_port *port)
 }
 
 /* Configure loopback port */
-static void mvpp2_port_loopback_set(struct mvpp2_port *port)
+static void mvpp2_port_loopback_set(struct mvpp2_port *port,
+				    const struct phylink_link_state *state)
 {
 	u32 val;
 
 	val = readl(port->base + MVPP2_GMAC_CTRL_1_REG);
 
-	if (port->speed == 1000)
+	if (state->speed == 1000)
 		val |= MVPP2_GMAC_GMII_LB_EN_MASK;
 	else
 		val &= ~MVPP2_GMAC_GMII_LB_EN_MASK;
@@ -4778,10 +4786,6 @@ static void mvpp2_defaults_set(struct mvpp2_port *port)
 	int tx_port_num, val, queue, ptxq, lrxq;
 
 	if (port->priv->hw_version == MVPP21) {
-		/* Configure port to loopback if needed */
-		if (port->flags & MVPP2_F_LOOPBACK)
-			mvpp2_port_loopback_set(port);
-
 		/* Update TX FIFO MIN Threshold */
 		val = readl(port->base + MVPP2_GMAC_PORT_FIFO_CFG_1_REG);
 		val &= ~MVPP2_GMAC_TX_FIFO_MIN_TH_ALL_MASK;
@@ -5860,111 +5864,6 @@ static irqreturn_t mvpp2_link_status_isr(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-static void mvpp2_gmac_set_autoneg(struct mvpp2_port *port,
-				   struct phy_device *phydev)
-{
-	u32 val;
-
-	if (port->phy_interface != PHY_INTERFACE_MODE_RGMII &&
-	    port->phy_interface != PHY_INTERFACE_MODE_RGMII_ID &&
-	    port->phy_interface != PHY_INTERFACE_MODE_RGMII_RXID &&
-	    port->phy_interface != PHY_INTERFACE_MODE_RGMII_TXID &&
-	    port->phy_interface != PHY_INTERFACE_MODE_SGMII)
-		return;
-
-	val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
-	val &= ~(MVPP2_GMAC_CONFIG_MII_SPEED |
-		 MVPP2_GMAC_CONFIG_GMII_SPEED |
-		 MVPP2_GMAC_CONFIG_FULL_DUPLEX |
-		 MVPP2_GMAC_AN_SPEED_EN |
-		 MVPP2_GMAC_AN_DUPLEX_EN);
-
-	if (phydev->duplex)
-		val |= MVPP2_GMAC_CONFIG_FULL_DUPLEX;
-
-	if (phydev->speed == SPEED_1000)
-		val |= MVPP2_GMAC_CONFIG_GMII_SPEED;
-	else if (phydev->speed == SPEED_100)
-		val |= MVPP2_GMAC_CONFIG_MII_SPEED;
-
-	writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
-}
-
-/* Adjust link */
-static void mvpp2_link_event(struct net_device *dev)
-{
-	struct mvpp2_port *port = netdev_priv(dev);
-	struct phy_device *phydev = dev->phydev;
-	bool link_reconfigured = false;
-	u32 val;
-
-	if (phydev->link) {
-		if (port->phy_interface != phydev->interface && port->comphy) {
-	                /* disable current port for reconfiguration */
-	                mvpp2_interrupts_disable(port);
-	                netif_carrier_off(port->dev);
-	                mvpp2_port_disable(port);
-			phy_power_off(port->comphy);
-
-	                /* comphy reconfiguration */
-	                port->phy_interface = phydev->interface;
-	                mvpp22_comphy_init(port);
-
-	                /* gop/mac reconfiguration */
-	                mvpp22_gop_init(port);
-	                mvpp2_port_mii_set(port);
-
-	                link_reconfigured = true;
-		}
-
-		if ((port->speed != phydev->speed) ||
-		    (port->duplex != phydev->duplex)) {
-			mvpp2_gmac_set_autoneg(port, phydev);
-
-			port->duplex = phydev->duplex;
-			port->speed  = phydev->speed;
-		}
-	}
-
-	if (phydev->link != port->link || link_reconfigured) {
-		port->link = phydev->link;
-
-		if (phydev->link) {
-			if (port->phy_interface == PHY_INTERFACE_MODE_RGMII ||
-			    port->phy_interface == PHY_INTERFACE_MODE_RGMII_ID ||
-			    port->phy_interface == PHY_INTERFACE_MODE_RGMII_RXID ||
-			    port->phy_interface == PHY_INTERFACE_MODE_RGMII_TXID ||
-			    port->phy_interface == PHY_INTERFACE_MODE_SGMII) {
-				val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
-				val |= (MVPP2_GMAC_FORCE_LINK_PASS |
-					MVPP2_GMAC_FORCE_LINK_DOWN);
-				writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
-			}
-
-			mvpp2_interrupts_enable(port);
-			mvpp2_port_enable(port);
-
-			mvpp2_egress_enable(port);
-			mvpp2_ingress_enable(port);
-			netif_carrier_on(dev);
-			netif_tx_wake_all_queues(dev);
-		} else {
-			port->duplex = -1;
-			port->speed = 0;
-
-			netif_tx_stop_all_queues(dev);
-			netif_carrier_off(dev);
-			mvpp2_ingress_disable(port);
-			mvpp2_egress_disable(port);
-
-			mvpp2_port_disable(port);
-			mvpp2_interrupts_disable(port);
-		}
-
-		phy_print_status(phydev);
-	}
-}
-
 static void mvpp2_timer_set(struct mvpp2_port_pcpu *port_pcpu)
 {
 	ktime_t interval;
@@ -6582,7 +6481,6 @@ static int mvpp2_poll(struct napi_struct *napi, int budget)
 /* Set hw internals when starting port */
 static void mvpp2_start_dev(struct mvpp2_port *port)
 {
-	struct net_device *ndev = port->dev;
 	int i;
 
 	if (port->gop_id == 0 &&
@@ -6607,15 +6505,14 @@ static void mvpp2_start_dev(struct mvpp2_port *port)
 
 	mvpp2_port_mii_set(port);
 	mvpp2_port_enable(port);
-	if (ndev->phydev)
-		phy_start(ndev->phydev);
+	if (port->phylink)
+		phylink_start(port->phylink);
 	netif_tx_start_all_queues(port->dev);
 }
 
 /* Set hw internals when stopping port */
 static void mvpp2_stop_dev(struct mvpp2_port *port)
 {
-	struct net_device *ndev = port->dev;
 	int i;
 
 	/* Stop new packets from arriving to RXQs */
@@ -6634,8 +6531,8 @@ static void mvpp2_stop_dev(struct mvpp2_port *port)
 
 	mvpp2_egress_disable(port);
 	mvpp2_port_disable(port);
-	if (ndev->phydev)
-		phy_stop(ndev->phydev);
+	if (port->phylink)
+		phylink_stop(port->phylink);
 	phy_power_off(port->comphy);
 }
 
@@ -6688,40 +6585,6 @@ static void mvpp21_get_mac_address(struct mvpp2_port *port, unsigned char *addr)
 	addr[5] = (mac_addr_l >> MVPP2_GMAC_SA_LOW_OFFS) & 0xFF;
 }
 
-static int mvpp2_phy_connect(struct mvpp2_port *port)
-{
-	struct phy_device *phy_dev;
-
-	/* No PHY is attached */
-	if (!port->phy_node)
-		return 0;
-
-	phy_dev = of_phy_connect(port->dev, port->phy_node, mvpp2_link_event, 0,
-				 port->phy_interface);
-	if (!phy_dev) {
-		netdev_err(port->dev, "cannot connect to phy\n");
-		return -ENODEV;
-	}
-	phy_dev->supported &= PHY_GBIT_FEATURES;
-	phy_dev->advertising = phy_dev->supported;
-
-	port->link    = 0;
-	port->duplex  = 0;
-	port->speed   = 0;
-
-	return 0;
-}
-
-static void mvpp2_phy_disconnect(struct mvpp2_port *port)
-{
-	struct net_device *ndev = port->dev;
-
-	if (!ndev->phydev)
-		return;
-
-	phy_disconnect(ndev->phydev);
-}
-
 static int mvpp2_irqs_init(struct mvpp2_port *port)
 {
 	int err, i;
@@ -6765,7 +6628,6 @@ static void mvpp2_irqs_deinit(struct mvpp2_port *port)
 static int mvpp2_open(struct net_device *dev)
 {
 	struct mvpp2_port *port = netdev_priv(dev);
-	struct mvpp2 *priv = port->priv;
 	unsigned char mac_bcast[ETH_ALEN] = {
 			0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
 	int err;
@@ -6811,7 +6673,16 @@ static int mvpp2_open(struct net_device *dev)
 		goto err_cleanup_txqs;
 	}
 
-	if (priv->hw_version == MVPP22 && !port->phy_node && port->link_irq) {
+	/* In default link is down */
+	netif_carrier_off(port->dev);
+
+	if (port->phylink) {
+		err = phylink_of_phy_connect(port->phylink, port->of_node);
+		if (err) {
+			netdev_err(port->dev, "could not attach PHY (%d)\n", err);
+			goto err_free_irq;
+		}
+	} else if (port->link_irq) {
 		err = request_irq(port->link_irq, mvpp2_link_status_isr, 0,
 				  dev->name, port);
 		if (err) {
@@ -6821,15 +6692,11 @@ static int mvpp2_open(struct net_device *dev)
 		}
 
 		mvpp22_gop_setup_irq(port);
+	} else {
+		netdev_err(dev, "cannot use phylink or GoP link IRQ\n");
+		goto err_free_irq;
 	}
 
-	/* In default link is down */
-	netif_carrier_off(port->dev);
-
-	err = mvpp2_phy_connect(port);
-	if (err < 0)
-		goto err_free_link_irq;
-
 	/* Unmask interrupts on all CPUs */
 	on_each_cpu(mvpp2_interrupts_unmask, port, 1);
 	mvpp2_shared_interrupt_mask_unmask(port, false);
@@ -6838,9 +6705,6 @@ static int mvpp2_open(struct net_device *dev)
 
 	return 0;
 
-err_free_link_irq:
-	if (priv->hw_version == MVPP22 && !port->phy_node && port->link_irq)
-		free_irq(port->link_irq, port);
 err_free_irq:
 	mvpp2_irqs_deinit(port);
 err_cleanup_txqs:
@@ -6854,17 +6718,17 @@ static int mvpp2_stop(struct net_device *dev)
 {
 	struct mvpp2_port *port = netdev_priv(dev);
 	struct mvpp2_port_pcpu *port_pcpu;
-	struct mvpp2 *priv = port->priv;
 	int cpu;
 
 	mvpp2_stop_dev(port);
-	mvpp2_phy_disconnect(port);
+	if (port->phylink)
+		phylink_disconnect_phy(port->phylink);
 
 	/* Mask interrupts on all CPUs */
 	on_each_cpu(mvpp2_interrupts_mask, port, 1);
 	mvpp2_shared_interrupt_mask_unmask(port, true);
 
-	if (priv->hw_version == MVPP22 && !port->phy_node && port->link_irq)
+	if (port->link_irq)
 		free_irq(port->link_irq, port);
 
 	mvpp2_irqs_deinit(port);
@@ -7029,20 +6893,26 @@ mvpp2_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
 
 static int mvpp2_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
-	int ret;
+	struct mvpp2_port *port = netdev_priv(dev);
 
-	if (!dev->phydev)
+	if (!port->phylink)
 		return -ENOTSUPP;
 
-	ret = phy_mii_ioctl(dev->phydev, ifr, cmd);
-	if (!ret)
-		mvpp2_link_event(dev);
-
-	return ret;
+	return phylink_mii_ioctl(port->phylink, ifr, cmd);
 }
 
 /* Ethtool methods */
 
+static int mvpp2_ethtool_nway_reset(struct net_device *dev)
+{
+	struct mvpp2_port *port = netdev_priv(dev);
+
+	if (!port->phylink)
+		return -ENOTSUPP;
+
+	return phylink_ethtool_nway_reset(port->phylink);
+}
+
 /* Set interrupt coalescing for ethtools */
 static int mvpp2_ethtool_set_coalesce(struct net_device *dev,
 				      struct ethtool_coalesce *c)
@@ -7170,6 +7040,50 @@ static int mvpp2_ethtool_set_ringparam(struct net_device *dev,
 	return err;
 }
 
+static void mvpp2_ethtool_get_pause_param(struct net_device *dev,
+					  struct ethtool_pauseparam *pause)
+{
+	struct mvpp2_port *port = netdev_priv(dev);
+
+	if (!port->phylink)
+		return;
+
+	phylink_ethtool_get_pauseparam(port->phylink, pause);
+}
+
+static int mvpp2_ethtool_set_pause_param(struct net_device *dev,
+					 struct ethtool_pauseparam *pause)
+{
+	struct mvpp2_port *port = netdev_priv(dev);
+
+	if (!port->phylink)
+		return -ENOTSUPP;
+
+	return phylink_ethtool_set_pauseparam(port->phylink, pause);
+}
+
+static int mvpp2_ethtool_get_link_ksettings(struct net_device *dev,
+					    struct ethtool_link_ksettings *cmd)
+{
+	struct mvpp2_port *port = netdev_priv(dev);
+
+	if (!port->phylink)
+		return -ENOTSUPP;
+
+	return phylink_ethtool_ksettings_get(port->phylink, cmd);
+}
+
+static int mvpp2_ethtool_set_link_ksettings(struct net_device *dev,
+					    const struct ethtool_link_ksettings *cmd)
+{
+	struct mvpp2_port *port = netdev_priv(dev);
+
+	if (!port->phylink)
+		return -ENOTSUPP;
+
+	return phylink_ethtool_ksettings_set(port->phylink, cmd);
+}
+
 /* Device ops */
 
 static const struct net_device_ops mvpp2_netdev_ops = {
@@ -7184,15 +7098,17 @@ static const struct net_device_ops mvpp2_netdev_ops = {
 };
 
 static const struct ethtool_ops mvpp2_eth_tool_ops = {
-	.nway_reset	= phy_ethtool_nway_reset,
+	.nway_reset	= mvpp2_ethtool_nway_reset,
 	.get_link	= ethtool_op_get_link,
 	.set_coalesce	= mvpp2_ethtool_set_coalesce,
 	.get_coalesce	= mvpp2_ethtool_get_coalesce,
 	.get_drvinfo	= mvpp2_ethtool_get_drvinfo,
 	.get_ringparam	= mvpp2_ethtool_get_ringparam,
 	.set_ringparam	= mvpp2_ethtool_set_ringparam,
-	.get_link_ksettings = phy_ethtool_get_link_ksettings,
-	.set_link_ksettings = phy_ethtool_set_link_ksettings,
+	.get_pauseparam	= mvpp2_ethtool_get_pause_param,
+	.set_pauseparam	= mvpp2_ethtool_set_pause_param,
+	.get_link_ksettings = mvpp2_ethtool_get_link_ksettings,
+	.set_link_ksettings = mvpp2_ethtool_set_link_ksettings,
 };
 
 /* Used for PPv2.1, or PPv2.2 with the old Device Tree binding that
@@ -7492,17 +7408,185 @@ static void mvpp2_port_copy_mac_addr(struct net_device *dev, struct mvpp2 *priv,
 	eth_hw_addr_random(dev);
 }
 
+static void mvpp2_phylink_validate(struct net_device *dev,
+				   unsigned long *supported,
+				   struct phylink_link_state *state)
+{
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
+
+	phylink_set_port_modes(mask);
+
+	phylink_set(mask, Autoneg);
+	phylink_set(mask, Pause);
+	phylink_set(mask, Asym_Pause);
+
+	phylink_set(mask, 10baseT_Half);
+	phylink_set(mask, 10baseT_Full);
+	phylink_set(mask, 100baseT_Half);
+	phylink_set(mask, 100baseT_Full);
+	phylink_set(mask, 1000baseT_Half);
+	phylink_set(mask, 1000baseT_Full);
+	phylink_set(mask, 1000baseX_Full);
+	phylink_set(mask, 10000baseKR_Full);
+
+	bitmap_and(supported, supported, mask, __ETHTOOL_LINK_MODE_MASK_NBITS);
+	bitmap_and(state->advertising, state->advertising, mask,
+		   __ETHTOOL_LINK_MODE_MASK_NBITS);
+}
+
+static int mvpp2_phylink_mac_link_state(struct net_device *dev,
+					struct phylink_link_state *state)
+{
+	struct mvpp2_port *port = netdev_priv(dev);
+	u32 val;
+
+	if (!phy_interface_mode_is_rgmii(port->phy_interface) &&
+	    port->phy_interface != PHY_INTERFACE_MODE_SGMII)
+		return 0;
+
+	val = readl(port->base + MVPP2_GMAC_STATUS0);
+
+	state->an_complete = !!(val & MVPP2_GMAC_STATUS0_AN_COMPLETE);
+	state->link = !!(val & MVPP2_GMAC_STATUS0_LINK_UP);
+	state->duplex = !!(val & MVPP2_GMAC_STATUS0_FULL_DUPLEX);
+
+	if (val & MVPP2_GMAC_STATUS0_GMII_SPEED)
+		state->speed = SPEED_1000;
+	else
+		state->speed = (val & MVPP2_GMAC_STATUS0_MII_SPEED) ?
+			       SPEED_100 : SPEED_10;
+
+	state->pause = 0;
+	if (val & MVPP2_GMAC_STATUS0_RX_PAUSE)
+		state->pause |= MLO_PAUSE_RX;
+	if (val & MVPP2_GMAC_STATUS0_TX_PAUSE)
+		state->pause |= MLO_PAUSE_TX;
+
+	return 1;
+}
+
+static void mvpp2_mac_an_restart(struct net_device *dev)
+{
+	struct mvpp2_port *port = netdev_priv(dev);
+	u32 val;
+
+	if (!phy_interface_mode_is_rgmii(port->phy_interface) &&
+	    port->phy_interface != PHY_INTERFACE_MODE_SGMII)
+		return;
+
+	val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+	val |= MVPP2_GMAC_IN_BAND_RESTART_AN;
+	writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+}
+
+static void mvpp2_mac_config(struct net_device *dev, unsigned int mode,
+			     const struct phylink_link_state *state)
+{
+	struct mvpp2_port *port = netdev_priv(dev);
+	u32 val;
+
+	/* disable current port for reconfiguration */
+	mvpp2_interrupts_disable(port);
+	netif_carrier_off(port->dev);
+	mvpp2_port_disable(port);
+	phy_power_off(port->comphy);
+
+	/* comphy reconfiguration */
+	port->phy_interface = state->interface;
+	mvpp22_comphy_init(port);
+
+	/* gop/mac reconfiguration */
+	mvpp22_gop_init(port);
+	mvpp2_port_mii_set(port);
+
+	if (!phy_interface_mode_is_rgmii(port->phy_interface) &&
+	    port->phy_interface != PHY_INTERFACE_MODE_SGMII)
+		return;
+
+	val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+	val &= ~(MVPP2_GMAC_CONFIG_MII_SPEED |
+		 MVPP2_GMAC_CONFIG_GMII_SPEED |
+		 MVPP2_GMAC_CONFIG_FULL_DUPLEX |
+		 MVPP2_GMAC_AN_SPEED_EN |
+		 MVPP2_GMAC_AN_DUPLEX_EN);
+
+	if (state->duplex)
+		val |= MVPP2_GMAC_CONFIG_FULL_DUPLEX;
+
+	if (state->speed == SPEED_1000)
+		val |= MVPP2_GMAC_CONFIG_GMII_SPEED;
+	else if (state->speed == SPEED_100)
+		val |= MVPP2_GMAC_CONFIG_MII_SPEED;
+
+	writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+
+	if (port->priv->hw_version == MVPP21 && port->flags & MVPP2_F_LOOPBACK)
+		mvpp2_port_loopback_set(port, state);
+}
+
+static void mvpp2_mac_link_down(struct net_device *dev, unsigned int mode)
+{
+	struct mvpp2_port *port = netdev_priv(dev);
+	u32 val;
+
+	netif_tx_stop_all_queues(dev);
+	netif_carrier_off(dev);
+	mvpp2_ingress_disable(port);
+	mvpp2_egress_disable(port);
+
+	mvpp2_port_disable(port);
+	mvpp2_interrupts_disable(port);
+
+	if (!phylink_autoneg_inband(mode)) {
+		val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+		val &= ~MVPP2_GMAC_FORCE_LINK_PASS;
+		val |= MVPP2_GMAC_FORCE_LINK_DOWN;
+		writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+	}
+}
+
+static void mvpp2_mac_link_up(struct net_device *dev, unsigned int mode,
+			      struct phy_device *phy)
+{
+	struct mvpp2_port *port = netdev_priv(dev);
+	u32 val;
+
+	if (!phylink_autoneg_inband(mode)) {
+		val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+		val &= ~MVPP2_GMAC_FORCE_LINK_DOWN;
+		val |= MVPP2_GMAC_FORCE_LINK_PASS;
+		writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+	}
+
+	mvpp2_interrupts_enable(port);
+	mvpp2_port_enable(port);
+
+	mvpp2_egress_enable(port);
+	mvpp2_ingress_enable(port);
+	netif_carrier_on(dev);
+	netif_tx_wake_all_queues(dev);
+}
+
+static const struct phylink_mac_ops mvpp2_phylink_ops = {
+	.validate = mvpp2_phylink_validate,
+	.mac_link_state = mvpp2_phylink_mac_link_state,
+	.mac_an_restart = mvpp2_mac_an_restart,
+	.mac_config = mvpp2_mac_config,
+	.mac_link_down = mvpp2_mac_link_down,
+	.mac_link_up = mvpp2_mac_link_up,
+};
+
 /* Ports initialization */
 static int mvpp2_port_probe(struct platform_device *pdev,
 			    struct device_node *port_node,
 			    struct mvpp2 *priv)
 {
-	struct device_node *phy_node;
 	struct phy *comphy;
 	struct mvpp2_port *port;
 	struct mvpp2_port_pcpu *port_pcpu;
 	struct net_device *dev;
 	struct resource *res;
+	struct phylink *phylink;
 	char *mac_from = "";
 	unsigned int ntxqs, nrxqs;
 	bool has_tx_irqs;
@@ -7526,7 +7610,6 @@ static int mvpp2_port_probe(struct platform_device *pdev,
 	if (!dev)
 		return -ENOMEM;
 
-	phy_node = of_parse_phandle(port_node, "phy", 0);
 	phy_mode = of_get_phy_mode(port_node);
 	if (phy_mode < 0) {
 		dev_err(&pdev->dev, "incorrect phy mode\n");
@@ -7565,15 +7648,6 @@ static int mvpp2_port_probe(struct platform_device *pdev,
 	if (err)
 		goto err_free_netdev;
 
-	port->link_irq = of_irq_get_byname(port_node, "link");
-	if (port->link_irq == -EPROBE_DEFER) {
-		err = -EPROBE_DEFER;
-		goto err_deinit_qvecs;
-	}
-	if (port->link_irq <= 0)
-		/* the link irq is optional */
-		port->link_irq = 0;
-
 	if (of_property_read_bool(port_node, "marvell,loopback"))
 		port->flags |= MVPP2_F_LOOPBACK;
 
@@ -7583,7 +7657,7 @@ static int mvpp2_port_probe(struct platform_device *pdev,
 	else
 		port->first_rxq = port->id * priv->max_port_rxqs;
 
-	port->phy_node = phy_node;
+	port->of_node = port_node;
 	port->phy_interface = phy_mode;
 	port->comphy = comphy;
 
@@ -7592,7 +7666,7 @@ static int mvpp2_port_probe(struct platform_device *pdev,
 		port->base = devm_ioremap_resource(&pdev->dev, res);
 		if (IS_ERR(port->base)) {
 			err = PTR_ERR(port->base);
-			goto err_free_irq;
+			goto err_deinit_qvecs;
 		}
 	} else {
 		if (of_property_read_u32(port_node, "gop-port-id",
@@ -7609,7 +7683,7 @@ static int mvpp2_port_probe(struct platform_device *pdev,
 	port->stats = netdev_alloc_pcpu_stats(struct mvpp2_pcpu_stats);
 	if (!port->stats) {
 		err = -ENOMEM;
-		goto err_free_irq;
+		goto err_deinit_qvecs;
 	}
 
 	mvpp2_port_copy_mac_addr(dev, priv, port_node, &mac_from);
@@ -7662,16 +7736,47 @@ static int mvpp2_port_probe(struct platform_device *pdev,
 	/* 9676 == 9700 - 20 and rounding to 8 */
 	dev->max_mtu = 9676;
 
+	/* The PHY node is optional. If not present the GoP link IRQ will be
+	 * used to handle link updates. Otherwise use phylink.
+	 */
+	if (of_find_property(port_node, "phy", NULL)) {
+		phylink = phylink_create(dev, port_node, phy_mode,
+					 &mvpp2_phylink_ops);
+		if (IS_ERR(phylink)) {
+			err = PTR_ERR(phylink);
+			goto err_free_port_pcpu;
+		}
+		port->phylink = phylink;
+		port->link_irq = 0;
+	} else {
+		port->phylink = NULL;
+		if (priv->hw_version == MVPP22) {
+			port->link_irq = of_irq_get_byname(port_node, "link");
+			if (port->link_irq == -EPROBE_DEFER) {
+				err = -EPROBE_DEFER;
+				goto err_free_port_pcpu;
+			}
+			if (port->link_irq <= 0)
+				/* the link irq is optional */
+				port->link_irq = 0;
+		}
+	}
+
 	err = register_netdev(dev);
 	if (err < 0) {
 		dev_err(&pdev->dev, "failed to register netdev\n");
-		goto err_free_port_pcpu;
+		goto err_phylink_irq;
 	}
 	netdev_info(dev, "Using %s mac address %pM\n", mac_from, dev->dev_addr);
 
 	priv->port_list[id] = port;
 	return 0;
 
+err_phylink_irq:
+	if (port->phylink)
+		phylink_destroy(port->phylink);
+	else if (port->link_irq)
+		irq_dispose_mapping(port->link_irq);
 err_free_port_pcpu:
 	free_percpu(port->pcpu);
 err_free_txq_pcpu:
@@ -7679,13 +7784,9 @@ static int mvpp2_port_probe(struct platform_device *pdev,
 		free_percpu(port->txqs[i]->pcpu);
 err_free_stats:
 	free_percpu(port->stats);
-err_free_irq:
-	if (port->link_irq)
-		irq_dispose_mapping(port->link_irq);
 err_deinit_qvecs:
 	mvpp2_queue_vectors_deinit(port);
 err_free_netdev:
-	of_node_put(phy_node);
 	free_netdev(dev);
 	return err;
 }
@@ -7696,7 +7797,8 @@ static void mvpp2_port_remove(struct mvpp2_port *port)
 	int i;
 
 	unregister_netdev(port->dev);
-	of_node_put(port->phy_node);
+	if (port->phylink)
+		phylink_destroy(port->phylink);
 	free_percpu(port->pcpu);
 	free_percpu(port->stats);
 	for (i = 0; i < port->ntxqs; i++)
-- 
2.13.5

^ permalink raw reply related

* Re: [PATCH net] bpf: one perf event close won't free bpf program attached by another perf event
From: Steven Rostedt @ 2017-09-21 14:02 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Yonghong Song, ast, daniel, netdev, kernel-team
In-Reply-To: <20170921111706.343om7252gcagco6@hirez.programming.kicks-ass.net>

On Thu, 21 Sep 2017 13:17:06 +0200
Peter Zijlstra <peterz@infradead.org> wrote:

> I suspect that would break a fair bunch of bpf proglets, since the data
> access to the trace data would be completely different, but it would be
> much nicer to not have this distinction based on event type.

Maybe this would be a good test to see if bpf is considered an ABI or
not.

-- Steve

^ permalink raw reply

* Re: [v2,1/3] can: m_can: Make hclk optional
From: Sekhar Nori @ 2017-09-21 14:08 UTC (permalink / raw)
  To: Franklin S Cooper Jr, linux-kernel, devicetree, netdev, linux-can,
	wg, mkl, robh+dt, quentin.schulz
  Cc: Kristo, Tero, Tony Lindgren, Linux OMAP List
In-Reply-To: <eb45fe94-c476-3919-603d-ff2a6b13487c@ti.com>

On Thursday 21 September 2017 06:01 AM, Franklin S Cooper Jr wrote:
> 
> 
> On 08/24/2017 03:00 AM, Sekhar Nori wrote:
>> + some OMAP folks and Linux OMAP list
>>
>> On Tuesday 25 July 2017 04:21 AM, Franklin Cooper wrote:
>>> Hclk is the MCAN's interface clock. However, for OMAP based devices such as
>>> DRA7 SoC family the interface clock is handled by hwmod. Therefore, this
>>> interface clock is managed by hwmod driver via pm_runtime_get and
>>> pm_runtime_put calls. Therefore, this interface clock isn't defined in DT
>>> and thus the driver shouldn't fail if this clock isn't found.
>>
>> I agree that hclk is defined as interface clock for M_CAN IP on DRA76x.
>>
>> However, there may be a need for the driver to know the value of hclk to
>> properly configure the RAM watchdog register which has a counter
>> counting down using hclk. Looks like the driver does not use the RAM
>> watchdog today. But if there is a need to configure it in future, it
>> might be a problem.
> 
> Honestly the RAM watchdog seems like a fundamental design problem.
> This RAM watchdog seems to be used in case a request to access the
> message ram is made but it hangs for what ever reason. Its even more
> complicated since the Message RAM is external to the MCAN IP so its
> implementation or how its handled probably differs from device to
> device. From example say you do have this error it isn't clear how you
> would recover from it. A logically answer would be to reset the entire
> IP but that also assumes that Message RAM will be reset along with the
> ip which likely depends on each SoC.
> 
> But if a readl/writel command hangs will the kernel eventually throw an
> error on its on or will the driver just hang? If it does hang can a
> driver in the ISR do something to properly terminate the driver or even
> recover from it?
>>
>> Is there a restriction in OMAP architecture against passing the
>> interface clock also in the 'clocks' property in DT. I have not tried it
>> myself, but wonder if you hit an issue that led to this patch.
> 
> No but not passing the interface clock is typical.

Okay, then it sounds like it will just be easier to pass the hclk too?

So it can be used if needed in future and also so that the driver can
stay the same as today.

Thanks,
Sekhar

^ permalink raw reply

* Re: RTL8192EE PCIe Wireless Network Adapter crashed with linux-4.13
From: Larry Finger @ 2017-09-21 14:21 UTC (permalink / raw)
  To: Zwindl
  Cc: linux-wireless@vger.kernel.org, chaoming_li@realsil.com.cn,
	kvalo@codeaurora.org, pkshih@realtek.com, johannes.berg@intel.com,
	gregkh@linuxfoundation.org, netdev@vger.kernel.org,
	linux-kernel@vger.kernel.org
In-Reply-To: <ZeTD57H3euqWFB5wrD_vYAkdu91l_6Yalbz_mRWtrrb2t17d0cdWOpCaXw33jcPYNKhgqnHbuqHTI9xkVnrkliRTGxraEqWnk5GZ63xKifI=@protonmail.com>

On 09/21/2017 06:37 AM, Zwindl wrote:
> Hi, I've reported to archlinux's bugzilla, and finally found out the flag which 
> caused that issue, it's the `CONFIG_INTEL_IOMMU_DEFAULT_ON=y` flag, I think may 
> this is a kernel bug, more details at https://bugs.archlinux.org/task/55665

My standard kernel has the following:

CONFIG_INTEL_IOMMU=y
# CONFIG_INTEL_IOMMU_SVM is not set
# CONFIG_INTEL_IOMMU_DEFAULT_ON is not set

I will do some further testing to see if turning CONFIG_INTEL_IOMMU_DEFAULT_ON 
also breaks my system.

Larry

^ permalink raw reply

* Re: [PATCH net-next 2/2] net: dsa: lan9303: Add basic offloading of unicast traffic
From: Andrew Lunn @ 2017-09-21 14:21 UTC (permalink / raw)
  To: Egil Hjelmeland; +Cc: vivien.didelot, f.fainelli, netdev, linux-kernel
In-Reply-To: <20170921094139.4250-3-privat@egil-hjelmeland.no>

Hi Egil

> +static void lan9303_bridge_ports(struct lan9303 *chip)
> +{
> +	/* ports bridged: remove mirroring */
> +	lan9303_write_switch_reg(chip, LAN9303_SWE_PORT_MIRROR, 0);
> +}

Could you replace the 0 with something symbolic which makes this
easier to understand.

#define LAN9303_SWE_PORT_MIRROR_DISABLED 0

> +
>  static int lan9303_handle_reset(struct lan9303 *chip)
>  {
>  	if (!chip->reset_gpio)
> @@ -844,6 +866,69 @@ static void lan9303_port_disable(struct dsa_switch *ds, int port,
>  	}
>  }
>  
> +static int lan9303_port_bridge_join(struct dsa_switch *ds, int port,
> +				    struct net_device *br)
> +{
> +	struct lan9303 *chip = ds->priv;
> +
> +	dev_dbg(chip->dev, "%s(port %d)\n", __func__, port);
> +	if (ds->ports[1].bridge_dev ==  ds->ports[2].bridge_dev) {
> +		lan9303_bridge_ports(chip);
> +		chip->is_bridged = true;  /* unleash stp_state_set() */
> +	}
> +
> +	return 0;
> +}
> +
> +static void lan9303_port_bridge_leave(struct dsa_switch *ds, int port,
> +				      struct net_device *br)
> +{
> +	struct lan9303 *chip = ds->priv;
> +
> +	dev_dbg(chip->dev, "%s(port %d)\n", __func__, port);
> +	if (chip->is_bridged) {
> +		lan9303_separate_ports(chip);
> +		chip->is_bridged = false;
> +	}
> +}
> +
> +static void lan9303_port_stp_state_set(struct dsa_switch *ds, int port,
> +				       u8 state)
> +{
> +	int portmask, portstate;
> +	struct lan9303 *chip = ds->priv;
> +
> +	dev_dbg(chip->dev, "%s(port %d, state %d)\n",
> +		__func__, port, state);
> +	if (!chip->is_bridged)
> +		return; /* touching SWE_PORT_STATE will break port separation */

I'm wondering how this is supposed to work. Please add a good comment
here, since the hardware is forcing you to do something odd.

Maybe it would be a good idea to save the STP state in chip.  And then
when chip->is_bridged is set true, change the state in the hardware to
the saved value?

What happens when port 0 is added to the bridge, there is then a
minute pause and then port 1 is added? I would expect that as soon as
port 0 is added, the STP state machine for port 0 will start and move
into listening and then forwarding. Due to hardware limitations it
looks like you cannot do this. So what state is the hardware
effectively in? Blocking? Forwarding?

Then port 1 is added. You can then can respect the states. port 1 will
do blocking->listening->forwarding, but what about port 0? The calls
won't get repeated? How does it transition to forwarding?

  Andrew

> +
> +	switch (state) {
> +	case BR_STATE_DISABLED:
> +		portstate = LAN9303_SWE_PORT_STATE_DISABLED_PORT0;
> +		break;
> +	case BR_STATE_BLOCKING:
> +	case BR_STATE_LISTENING:
> +		portstate = LAN9303_SWE_PORT_STATE_BLOCKING_PORT0;
> +		break;
> +	case BR_STATE_LEARNING:
> +		portstate = LAN9303_SWE_PORT_STATE_LEARNING_PORT0;
> +		break;
> +	case BR_STATE_FORWARDING:
> +		portstate = LAN9303_SWE_PORT_STATE_FORWARDING_PORT0;
> +		break;
> +	default:
> +		portstate = LAN9303_SWE_PORT_STATE_DISABLED_PORT0;
> +		dev_err(chip->dev, "unknown stp state: port %d, state %d\n",
> +			port, state);
> +	}
> +
> +	portmask = 0x3 << (port * 2);
> +	portstate     <<= (port * 2);
> +	lan9303_write_switch_reg_mask(chip, LAN9303_SWE_PORT_STATE,
> +				      portstate, portmask);
> +}

^ permalink raw reply

* tg3 pxe weirdness
From: Berend De Schouwer @ 2017-09-21 14:23 UTC (permalink / raw)
  To: netdev

Hi,

I've got a machine with a Broadcom bcm5762c, using the tg3 driver, that
fails to receive network packets under some very specific conditions.

It works perfectly using a 1Gbps switch.  If, however, it first uses
PXE and then loads the Linux tg3 driver, and the switch is 100Mbps, it
no longer receives packets larger than ICMP.

It does do ARP and ping.

If it boots using PXE on a 1Gbps switch, boots into Linux, and then
it's plugged into 100 Mbps it also stops receiving packets.

mii-diag and dmesg confirm auto-negotiated speed and flow control, and
confirm temporary disconnect as the cables are moved.

PXE boots using UNDI, which then transfers a kernel using TFTP, which
transfers correctly.  The kernel boots, loads the tg3 driver, connects
the network.  Up to this point everything works.  Ping will work too. 
Any other network traffic fails.

Booting from a harddrive works fine.  I assume the UNDI driver
somewhere breaks auto-negotiation.  I've tried using mii-tool and
ethtool, but I haven't managed to make it work yet.

Is it possible to get negotiation working after PXE boot?  Are there
any tg3 driver flags that might make a difference?


Berend

^ permalink raw reply

* Re: [PATCH net 1/3] net: mvpp2: fix the dma_mask and coherent_dma_mask settings for PPv2.2
From: Antoine Tenart @ 2017-09-21 14:24 UTC (permalink / raw)
  To: David Miller
  Cc: antoine.tenart, andrew, gregory.clement, thomas.petazzoni,
	miquel.raynal, nadavh, linux, linux-kernel, mw, stefanc, netdev
In-Reply-To: <20170918.171858.667084185425779267.davem@davemloft.net>

Hi David,

On Mon, Sep 18, 2017 at 05:18:58PM -0700, David Miller wrote:
> From: Antoine Tenart <antoine.tenart@free-electrons.com>
> Date: Mon, 18 Sep 2017 15:04:06 +0200
> 
> > The dev->dma_mask usually points to dev->coherent_dma_mask. This is an
> > issue as setting both of them will override the other. This is
> > problematic here as the PPv2 driver uses a 32-bit-mask for coherent
> > accesses (txq, rxq, bm) and a 40-bit mask for all other accesses due to
> > an hardware limitation.
> > 
> > This can lead to a memory remap for all dma_map_single() calls when
> > dealing with memory above 4GB.
> > 
> > Fixes: 2067e0a13cfe ("net: mvpp2: set dma mask and coherent dma mask on PPv2.2")
> > Reported-by: Stefan Chulski <stefanc@marvell.com>
> > Signed-off-by: Antoine Tenart <antoine.tenart@free-electrons.com>
> 
> I surrmise that if the platform has made dev->dma_mask point to
> &dev->coherent_dma_mask, it is because it does not allow the two
> settings to be set separately.

That's also the default when the platform does not allocate dma_mask.
You have a point, that could be because it's not supported. But I don't
know what would be a good check then.

> By rearranging the pointer, you are bypassing that, and probably
> breaking things or creating a situation that the DMA mapping
> layer is not expecting.
> 
> I want to know more about the situations where dma_mask is set to
> point to &coherent_dma_mask and how that is supposed to work.

>From what I see in other parts of the kernel, dma_mask points to
&coherent_dma_mask by default and having two different values for these
two masks isn't a use case for many drivers.

Antoine

-- 
Antoine Ténart, Free Electrons
Embedded Linux and Kernel engineering
http://free-electrons.com

^ permalink raw reply

* Re: [PATCH net-next 2/2] net: dsa: lan9303: Add basic offloading of unicast traffic
From: Vivien Didelot @ 2017-09-21 14:26 UTC (permalink / raw)
  To: Egil Hjelmeland, andrew, f.fainelli, netdev, linux-kernel; +Cc: Egil Hjelmeland
In-Reply-To: <20170921094139.4250-3-privat@egil-hjelmeland.no>

Hi Egil,

Egil Hjelmeland <privat@egil-hjelmeland.no> writes:

> When both user ports are joined to the same bridge, the normal
> HW MAC learning is enabled. This means that unicast traffic is forwarded
> in HW.
>
> If one of the user ports leave the bridge,
> the ports goes back to the initial separated operation.
>
> Port separation relies on disabled HW MAC learning. Hence the condition
> that both ports must join same bridge.
>
> Add brigde methods port_bridge_join, port_bridge_leave and
> port_stp_state_set.
>
> Signed-off-by: Egil Hjelmeland <privat@egil-hjelmeland.no>

Styling nitpicks below, other than that, the patch LGTM:

Reviewed-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>

>  #include <linux/mutex.h>
>  #include <linux/mii.h>
>  #include <linux/phy.h>
> +#include <linux/if_bridge.h>

It's nice to order header inclusions alphabetically.

>  
>  #include "lan9303.h"
>  
> @@ -146,6 +147,7 @@
>  # define LAN9303_SWE_PORT_STATE_FORWARDING_PORT0 (0)
>  # define LAN9303_SWE_PORT_STATE_LEARNING_PORT0 BIT(1)
>  # define LAN9303_SWE_PORT_STATE_BLOCKING_PORT0 BIT(0)
> +# define LAN9303_SWE_PORT_STATE_DISABLED_PORT0 (3)
>  #define LAN9303_SWE_PORT_MIRROR 0x1846
>  # define LAN9303_SWE_PORT_MIRROR_SNIFF_ALL BIT(8)
>  # define LAN9303_SWE_PORT_MIRROR_SNIFFER_PORT2 BIT(7)
> @@ -431,6 +433,20 @@ static int lan9303_read_switch_reg(struct lan9303 *chip, u16 regnum, u32 *val)
>  	return ret;
>  }
>  
> +static int lan9303_write_switch_reg_mask(
> +	struct lan9303 *chip, u16 regnum, u32 val, u32 mask)

That is unlikely to respect the Kernel Coding Style. Please fill the
line as much as possible and align with the opening parenthesis:

    static int lan9303_write_switch_reg_mask(struct lan9303 *chip, u16 regnum,
                                             u32 val, u32 mask)

> +{
> +	int ret;
> +	u32 reg;
> +
> +	ret = lan9303_read_switch_reg(chip, regnum, &reg);
> +	if (ret)
> +		return ret;
> +	reg = (reg & ~mask) | val;
> +
> +	return lan9303_write_switch_reg(chip, regnum, reg);
> +}

...

> +
> +	portmask = 0x3 << (port * 2);
> +	portstate     <<= (port * 2);

Unnecessary alignment, please remove the extra space characters.

> +	lan9303_write_switch_reg_mask(chip, LAN9303_SWE_PORT_STATE,
> +				      portstate, portmask);
> +}
> +
>  static const struct dsa_switch_ops lan9303_switch_ops = {
>  	.get_tag_protocol = lan9303_get_tag_protocol,
>  	.setup = lan9303_setup,
> @@ -855,6 +940,9 @@ static const struct dsa_switch_ops lan9303_switch_ops = {
>  	.get_sset_count = lan9303_get_sset_count,
>  	.port_enable = lan9303_port_enable,
>  	.port_disable = lan9303_port_disable,
> +	.port_bridge_join       = lan9303_port_bridge_join,
> +	.port_bridge_leave      = lan9303_port_bridge_leave,
> +	.port_stp_state_set     = lan9303_port_stp_state_set,

Same here, alignment unnecessary, especially since the rest of the
structure does not do that.

>  };
>  
>  static int lan9303_register_switch(struct lan9303 *chip)
> diff --git a/drivers/net/dsa/lan9303.h b/drivers/net/dsa/lan9303.h
> index 4d8be555ff4d..5be246f05965 100644
> --- a/drivers/net/dsa/lan9303.h
> +++ b/drivers/net/dsa/lan9303.h
> @@ -21,6 +21,7 @@ struct lan9303 {
>  	struct dsa_switch *ds;
>  	struct mutex indirect_mutex; /* protect indexed register access */
>  	const struct lan9303_phy_ops *ops;
> +	bool is_bridged; /* true if port 1 and 2 is bridged */
>  };
>  
>  extern const struct regmap_access_table lan9303_register_set;

Please use the checkpatch.pl script to ensure your patch respects the
kernel conventions before submitting, it can spot nice stuffs!

I use a Git alias(*) to check a commit which does basically this:

    git format-patch --stdout -1 | ./scripts/checkpatch.pl -


(*) in details, especially convenient during interactive rebases:

    $ git config alias.checkcommit '!f () { git format-patch --stdout -1 ${1:-HEAD} | ./scripts/checkpatch.pl - ; }; f'
    $ git checkcommit # i.e. current one
    $ git checkcommit HEAD^^
    $ git checkcommit d329ac88eb21
    ...


Thanks,

        Vivien

^ permalink raw reply

* Re: [PATCH net 2/4] net:ethernet:aquantia: Fix Tx queue hangups
From: Igor Russkikh @ 2017-09-21 14:31 UTC (permalink / raw)
  To: Yunsheng Lin, David S . Miller
  Cc: netdev, David Arcari, Pavel Belous, Nadezhda Krupnina,
	Simon Edelhaus
In-Reply-To: <024e7fb3-9b53-f4f2-3901-38a63c47d76c@huawei.com>

Thanks for the comments, Yunsheng,

>>  
>> +static int aq_nic_update_link_status(struct aq_nic_s *self)
>> +{
>> +	int err = self->aq_hw_ops.hw_get_link_status(self->aq_hw);
>> +
>> +	if (err < 0)
>> +		return -1;
> why not just return err?

Agreed, that could be improved.

>> +	if (self->link_status.mbps != self->aq_hw->aq_link_status.mbps)
>> +		pr_info("%s: link change old %d new %d\n",
>> +			AQ_CFG_DRV_NAME, self->link_status.mbps,
>> +			self->aq_hw->aq_link_status.mbps);
> You has ndev in struct aq_nic_s *self, why not use netdev_*?

We are planning to introduce a generic rework commit separately and use netif_* set of macro's through all the driver's code.

-- 
BR, Igor

^ permalink raw reply

* Re: [PATCH net-next 1/2] net: dsa: lan9303: Move tag setup to new lan9303_setup_tagging
From: Vivien Didelot @ 2017-09-21 14:40 UTC (permalink / raw)
  To: Egil Hjelmeland, andrew, f.fainelli, netdev, linux-kernel; +Cc: Egil Hjelmeland
In-Reply-To: <20170921094139.4250-2-privat@egil-hjelmeland.no>

Hi Egil,

Egil Hjelmeland <privat@egil-hjelmeland.no> writes:

> Prepare for next patch:
> Move tag setup from lan9303_separate_ports() to new function
> lan9303_setup_tagging()
>
> Signed-off-by: Egil Hjelmeland <privat@egil-hjelmeland.no>

Minor styling issues, otherwise LGTM:

Reviewed-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>

> +/* forward special tagged packets from port 0 to port 1 *or* port 2 */
> +static int lan9303_setup_tagging(struct lan9303 *chip)
> +{
> +	int ret;
> +	/* enable defining the destination port via special VLAN tagging
> +	 * for port 0
> +	 */
> +	ret = lan9303_write_switch_reg(chip, LAN9303_SWE_INGRESS_PORT_TYPE,
> +				       LAN9303_SWE_INGRESS_PORT_TYPE_VLAN);
> +	if (ret)
> +		return ret;
> +
> +	/* tag incoming packets at port 1 and 2 on their way to port 0 to be
> +	 * able to discover their source port
> +	 */
> +	return lan9303_write_switch_reg(
> +		chip, LAN9303_BM_EGRSS_PORT_TYPE,
> +		LAN9303_BM_EGRSS_PORT_TYPE_SPECIAL_TAG_PORT0);

Please avoid this kind of alignment as much as possible.

    u32 val = LAN9303_BM_EGRSS_PORT_TYPE_SPECIAL_TAG_PORT0;

would do the trick for the +80 chars issue.

BTW, it'd be great to see sometime soon a cleanup patch which makes use
of such temporary u32 val variable for most of the
lan9303_write_switch_reg and lan9303_write_switch_port calls. ;-)


Thanks,

        Vivien

^ permalink raw reply

* Re: RTL8192EE PCIe Wireless Network Adapter crashed with linux-4.13
From: Kalle Valo @ 2017-09-21 14:49 UTC (permalink / raw)
  To: Larry Finger
  Cc: Zwindl, linux-wireless@vger.kernel.org,
	chaoming_li@realsil.com.cn, pkshih@realtek.com,
	johannes.berg@intel.com, gregkh@linuxfoundation.org,
	netdev@vger.kernel.org, linux-kernel@vger.kernel.org
In-Reply-To: <5a6aa431-fdde-a329-4168-44490c37f197@lwfinger.net>

Larry Finger <Larry.Finger@lwfinger.net> writes:

> On 09/21/2017 06:37 AM, Zwindl wrote:
>> Hi, I've reported to archlinux's bugzilla, and finally found out the
>> flag which caused that issue, it's the
>> `CONFIG_INTEL_IOMMU_DEFAULT_ON=y` flag, I think may this is a kernel
>> bug, more details at https://bugs.archlinux.org/task/55665
>
> My standard kernel has the following:
>
> CONFIG_INTEL_IOMMU=y
> # CONFIG_INTEL_IOMMU_SVM is not set
> # CONFIG_INTEL_IOMMU_DEFAULT_ON is not set
>
> I will do some further testing to see if turning
> CONFIG_INTEL_IOMMU_DEFAULT_ON also breaks my system.

But not all systems have iommu so check from dmesg that iommu is really
enabled.

-- 
Kalle Valo

^ permalink raw reply

* [PATCH net-next] net: vrf: remove skb_dst_force() after skb_dst_set()
From: Eric Dumazet @ 2017-09-21 14:50 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, David Ahern, Shrijeet Mukherjee

From: Eric Dumazet <edumazet@google.com>

skb_dst_set(skb, dst) installs a normal (refcounted) dst, there is no
point using skb_dst_force(skb)

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 drivers/net/vrf.c |    1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 9b243e6f3008bb5319844412cd49db1cd7bce594..cc18b7b11612d16271a1dbbb2d55c8f61781b9be 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -132,7 +132,6 @@ static int vrf_local_xmit(struct sk_buff *skb, struct net_device *dev,
 	skb_orphan(skb);
 
 	skb_dst_set(skb, dst);
-	skb_dst_force(skb);
 
 	/* set pkt_type to avoid skb hitting packet taps twice -
 	 * once on Tx and again in Rx processing

^ permalink raw reply related

* Re: [PATCH net-next] net: vrf: remove skb_dst_force() after skb_dst_set()
From: David Ahern @ 2017-09-21 14:52 UTC (permalink / raw)
  To: Eric Dumazet, David Miller; +Cc: netdev, Shrijeet Mukherjee
In-Reply-To: <1506005428.29839.129.camel@edumazet-glaptop3.roam.corp.google.com>

On 9/21/17 8:50 AM, Eric Dumazet wrote:
> From: Eric Dumazet <edumazet@google.com>
> 
> skb_dst_set(skb, dst) installs a normal (refcounted) dst, there is no
> point using skb_dst_force(skb)
> 
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> ---
>  drivers/net/vrf.c |    1 -
>  1 file changed, 1 deletion(-)
> 
> diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
> index 9b243e6f3008bb5319844412cd49db1cd7bce594..cc18b7b11612d16271a1dbbb2d55c8f61781b9be 100644
> --- a/drivers/net/vrf.c
> +++ b/drivers/net/vrf.c
> @@ -132,7 +132,6 @@ static int vrf_local_xmit(struct sk_buff *skb, struct net_device *dev,
>  	skb_orphan(skb);
>  
>  	skb_dst_set(skb, dst);
> -	skb_dst_force(skb);
>  
>  	/* set pkt_type to avoid skb hitting packet taps twice -
>  	 * once on Tx and again in Rx processing
> 
> 

Acked-by: David Ahern <dsa@cumulusnetworks.com>

^ permalink raw reply

* Re: Latest net-next from GIT panic
From: Eric Dumazet @ 2017-09-21 14:56 UTC (permalink / raw)
  To: Paweł Staszewski
  Cc: Wei Wang, Cong Wang, Linux Kernel Network Developers,
	Eric Dumazet
In-Reply-To: <80e8948e-669b-4139-add6-637c6dd3405f@itcare.pl>

On Thu, 2017-09-21 at 15:18 +0200, Paweł Staszewski wrote:

> ok after adding patch all is working from now for about 1 hour of normal 
> traffic witc all bgp sessions connected and about 600k prefixes in kernel.


Great, I am doing to submit an official patch, uniting skb_dst_force()
and skb_dst_force_safe() into a single helper.

Thanks.

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox