Netdev List
 help / color / mirror / Atom feed
* Re: [PATCH net-next] liquidio: Add spoof checking on a VF MAC address
From: David Miller @ 2018-09-06 22:52 UTC (permalink / raw)
  To: felix.manlunas
  Cc: netdev, raghu.vatsavayi, derek.chickles, satananda.burla,
	weilin.chang
In-Reply-To: <20180906014056.GA2159@felix-thinkpad.cavium.com>

From: Felix Manlunas <felix.manlunas@cavium.com>
Date: Wed, 5 Sep 2018 18:40:56 -0700

> From: Weilin Chang <weilin.chang@cavium.com>
> 
> 1. Provide the API to set/unset the spoof checking feature.
> 2. Add a function to periodically provide the count of found
>    packets with spoof VF MAC address.
> 3. Prevent VF MAC address changing while the spoofchk of the VF is
>    on unless the changing MAC address is issued from PF.
> 
> Signed-off-by: Weilin Chang <weilin.chang@cavium.com>
> Signed-off-by: Felix Manlunas <felix.manlunas@cavium.com>

Applied, thank you.

^ permalink raw reply

* Re: [PATCH net-next 06/11] tuntap: split out XDP logic
From: Jason Wang @ 2018-09-07  3:29 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: netdev, linux-kernel, kvm, virtualization
In-Reply-To: <20180906131703-mutt-send-email-mst@kernel.org>



On 2018年09月07日 01:21, Michael S. Tsirkin wrote:
> On Thu, Sep 06, 2018 at 12:05:21PM +0800, Jason Wang wrote:
>> This patch split out XDP logic into a single function. This make it to
>> be reused by XDP batching path in the following patch.
>>
>> Signed-off-by: Jason Wang <jasowang@redhat.com>
>> ---
>>   drivers/net/tun.c | 84 ++++++++++++++++++++++++++++-------------------
>>   1 file changed, 51 insertions(+), 33 deletions(-)
>>
>> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
>> index 389aa0727cc6..21b125020b3b 100644
>> --- a/drivers/net/tun.c
>> +++ b/drivers/net/tun.c
>> @@ -1635,6 +1635,44 @@ static bool tun_can_build_skb(struct tun_struct *tun, struct tun_file *tfile,
>>   	return true;
>>   }
>>   
>> +static u32 tun_do_xdp(struct tun_struct *tun,
>> +		      struct tun_file *tfile,
>> +		      struct bpf_prog *xdp_prog,
>> +		      struct xdp_buff *xdp,
>> +		      int *err)
>> +{
>> +	u32 act = bpf_prog_run_xdp(xdp_prog, xdp);
>> +
>> +	switch (act) {
>> +	case XDP_REDIRECT:
>> +		*err = xdp_do_redirect(tun->dev, xdp, xdp_prog);
>> +		xdp_do_flush_map();
>> +		if (*err)
>> +			break;
>> +		goto out;
>> +	case XDP_TX:
>> +		*err = tun_xdp_tx(tun->dev, xdp);
>> +		if (*err < 0)
>> +			break;
>> +		*err = 0;
>> +		goto out;
>> +	case XDP_PASS:
>> +		goto out;
> Do we need goto? why not just return?

I don't see any difference.

>
>> +	default:
>> +		bpf_warn_invalid_xdp_action(act);
>> +		/* fall through */
>> +	case XDP_ABORTED:
>> +		trace_xdp_exception(tun->dev, xdp_prog, act);
>> +		/* fall through */
>> +	case XDP_DROP:
>> +		break;
>> +	}
>> +
>> +	put_page(virt_to_head_page(xdp->data_hard_start));
> put here because caller does get_page :( Not pretty.
> I'd move this out to the caller.

Then we need a switch in the caller, not sure it's better.

>
>> +out:
>> +	return act;
> How about combining err and act? err is < 0 XDP_PASS is > 0.
> No need for pointers then.

Ok.

>
>> +}
>> +
>>   static struct sk_buff *tun_build_skb(struct tun_struct *tun,
>>   				     struct tun_file *tfile,
>>   				     struct iov_iter *from,
>> @@ -1645,10 +1683,10 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
>>   	struct sk_buff *skb = NULL;
>>   	struct bpf_prog *xdp_prog;
>>   	int buflen = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
>> -	unsigned int delta = 0;
>>   	char *buf;
>>   	size_t copied;
>> -	int err, pad = TUN_RX_PAD;
>> +	int pad = TUN_RX_PAD;
>> +	int err = 0;
>>   
>>   	rcu_read_lock();
>>   	xdp_prog = rcu_dereference(tun->xdp_prog);
>> @@ -1685,9 +1723,8 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
>>   	local_bh_disable();
>>   	rcu_read_lock();
>>   	xdp_prog = rcu_dereference(tun->xdp_prog);
>> -	if (xdp_prog && !*skb_xdp) {
>> +	if (xdp_prog) {
>>   		struct xdp_buff xdp;
>> -		void *orig_data;
>>   		u32 act;
>>   
>>   		xdp.data_hard_start = buf;
>> @@ -1695,33 +1732,14 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
>>   		xdp_set_data_meta_invalid(&xdp);
>>   		xdp.data_end = xdp.data + len;
>>   		xdp.rxq = &tfile->xdp_rxq;
>> -		orig_data = xdp.data;
>> -		act = bpf_prog_run_xdp(xdp_prog, &xdp);
>> -
>> -		switch (act) {
>> -		case XDP_REDIRECT:
>> -			err = xdp_do_redirect(tun->dev, &xdp, xdp_prog);
>> -			xdp_do_flush_map();
>> -			if (err)
>> -				goto err_xdp;
>> -			goto out;
>> -		case XDP_TX:
>> -			if (tun_xdp_tx(tun->dev, &xdp) < 0)
>> -				goto err_xdp;
>> -			goto out;
>> -		case XDP_PASS:
>> -			delta = orig_data - xdp.data;
>> -			len = xdp.data_end - xdp.data;
>> -			break;
>> -		default:
>> -			bpf_warn_invalid_xdp_action(act);
>> -			/* fall through */
>> -		case XDP_ABORTED:
>> -			trace_xdp_exception(tun->dev, xdp_prog, act);
>> -			/* fall through */
>> -		case XDP_DROP:
>> +		act = tun_do_xdp(tun, tfile, xdp_prog, &xdp, &err);
>> +		if (err)
>>   			goto err_xdp;
>> -		}
>> +		if (act != XDP_PASS)
>> +			goto out;
> likely?

It depends on the XDP program, so I tend not to use it.

>
>> +
>> +		pad = xdp.data - xdp.data_hard_start;
>> +		len = xdp.data_end - xdp.data;
>>   	}
>>   	rcu_read_unlock();
>>   	local_bh_enable();
>> @@ -1729,18 +1747,18 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
>>   build:
>>   	skb = build_skb(buf, buflen);
>>   	if (!skb) {
>> +		put_page(alloc_frag->page);
>>   		skb = ERR_PTR(-ENOMEM);
>>   		goto out;
>>   	}
>>   
>> -	skb_reserve(skb, pad - delta);
>> +	skb_reserve(skb, pad);
>>   	skb_put(skb, len);
>>   
>>   	return skb;
>>   
>>   err_xdp:
>> -	alloc_frag->offset -= buflen;
>> -	put_page(alloc_frag->page);
>> +	this_cpu_inc(tun->pcpu_stats->rx_dropped);
>
> This fixes bug in previous patch which dropped it. OK :)

Yes, but let me move this to the buggy patch.

Thanks

>>   out:
>>   	rcu_read_unlock();
>>   	local_bh_enable();
>> -- 
>> 2.17.1

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply

* Re: [pull request][net-next 0/9] Mellanox, mlx5 ethernet updates 2018-09-05
From: David Miller @ 2018-09-06 22:50 UTC (permalink / raw)
  To: saeedm; +Cc: netdev
In-Reply-To: <20180906043331.31958-1-saeedm@mellanox.com>

From: Saeed Mahameed <saeedm@mellanox.com>
Date: Wed,  5 Sep 2018 21:33:22 -0700

> This pull request provides some updates to mlx5 ethernet driver.
> 
> For more information please see tag log below.
> 
> Please pull and let me know if there's any problem.

Pulled, thank you.

^ permalink raw reply

* Re: [PATCH net-next 05/11] tuntap: tweak on the path of non-xdp case in tun_build_skb()
From: Jason Wang @ 2018-09-07  3:24 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: netdev, linux-kernel, kvm, virtualization
In-Reply-To: <20180906131509-mutt-send-email-mst@kernel.org>



On 2018年09月07日 01:16, Michael S. Tsirkin wrote:
> On Thu, Sep 06, 2018 at 12:05:20PM +0800, Jason Wang wrote:
>> If we're sure not to go native XDP, there's no need for several things
>> like bh and rcu stuffs.
>>
>> Signed-off-by: Jason Wang <jasowang@redhat.com>
> True...
>
>> ---
>>   drivers/net/tun.c | 9 ++++++---
>>   1 file changed, 6 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
>> index f8cdcfa392c3..389aa0727cc6 100644
>> --- a/drivers/net/tun.c
>> +++ b/drivers/net/tun.c
>> @@ -1675,10 +1675,12 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
>>   	 * of xdp_prog above, this should be rare and for simplicity
>>   	 * we do XDP on skb in case the headroom is not enough.
>>   	 */
>> -	if (hdr->gso_type || !xdp_prog)
>> +	if (hdr->gso_type || !xdp_prog) {
>>   		*skb_xdp = 1;
>> -	else
>> -		*skb_xdp = 0;
>> +		goto build;
>> +	}
>> +
>> +	*skb_xdp = 0;
>>   
>>   	local_bh_disable();
>>   	rcu_read_lock();
>> @@ -1724,6 +1726,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
>>   	rcu_read_unlock();
>>   	local_bh_enable();
>>   
>> +build:
> But this is spaghetti code. Please just put common
> code into functions and call them, don't goto.

Ok, will do this.

Thanks

>
>>   	skb = build_skb(buf, buflen);
>>   	if (!skb) {
>>   		skb = ERR_PTR(-ENOMEM);
>> -- 
>> 2.17.1

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply

* Re: [PATCH net-next 04/11] tuntap: simplify error handling in tun_build_skb()
From: Jason Wang @ 2018-09-07  3:22 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: netdev, linux-kernel, kvm, virtualization
In-Reply-To: <20180906130425-mutt-send-email-mst@kernel.org>



On 2018年09月07日 01:14, Michael S. Tsirkin wrote:
> On Thu, Sep 06, 2018 at 12:05:19PM +0800, Jason Wang wrote:
>> There's no need to duplicate page get logic in each action. So this
>> patch tries to get page and calculate the offset before processing XDP
>> actions, and undo them when meet errors (we don't care the performance
>> on errors). This will be used for factoring out XDP logic.
>>
>> Signed-off-by: Jason Wang <jasowang@redhat.com>
> I see some issues with this one.
>
>> ---
>>   drivers/net/tun.c | 37 ++++++++++++++++---------------------
>>   1 file changed, 16 insertions(+), 21 deletions(-)
>>
>> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
>> index 372caf7d67d9..f8cdcfa392c3 100644
>> --- a/drivers/net/tun.c
>> +++ b/drivers/net/tun.c
>> @@ -1642,7 +1642,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
>>   				     int len, int *skb_xdp)
>>   {
>>   	struct page_frag *alloc_frag = &current->task_frag;
>> -	struct sk_buff *skb;
>> +	struct sk_buff *skb = NULL;
>>   	struct bpf_prog *xdp_prog;
>>   	int buflen = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
>>   	unsigned int delta = 0;
>> @@ -1668,6 +1668,9 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
>>   	if (copied != len)
>>   		return ERR_PTR(-EFAULT);
>>   
>> +	get_page(alloc_frag->page);
>> +	alloc_frag->offset += buflen;
>> +
> This adds an atomic op on XDP_DROP which is a data path
> operation for some workloads.

Yes, I have patch on top to amortize this, the idea is to have a very 
big refcount once after the frag was allocated and maintain a bias and 
decrease them all when allocating new frags.'

>
>>   	/* There's a small window that XDP may be set after the check
>>   	 * of xdp_prog above, this should be rare and for simplicity
>>   	 * we do XDP on skb in case the headroom is not enough.
>> @@ -1695,23 +1698,15 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
>>   
>>   		switch (act) {
>>   		case XDP_REDIRECT:
>> -			get_page(alloc_frag->page);
>> -			alloc_frag->offset += buflen;
>>   			err = xdp_do_redirect(tun->dev, &xdp, xdp_prog);
>>   			xdp_do_flush_map();
>>   			if (err)
>> -				goto err_redirect;
>> -			rcu_read_unlock();
>> -			local_bh_enable();
>> -			return NULL;
>> +				goto err_xdp;
>> +			goto out;
>>   		case XDP_TX:
>> -			get_page(alloc_frag->page);
>> -			alloc_frag->offset += buflen;
>>   			if (tun_xdp_tx(tun->dev, &xdp) < 0)
>> -				goto err_redirect;
>> -			rcu_read_unlock();
>> -			local_bh_enable();
>> -			return NULL;
>> +				goto err_xdp;
>> +			goto out;
>>   		case XDP_PASS:
>>   			delta = orig_data - xdp.data;
>>   			len = xdp.data_end - xdp.data;
>> @@ -1730,23 +1725,23 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
>>   	local_bh_enable();
>>   
>>   	skb = build_skb(buf, buflen);
>> -	if (!skb)
>> -		return ERR_PTR(-ENOMEM);
>> +	if (!skb) {
>> +		skb = ERR_PTR(-ENOMEM);
>> +		goto out;
> So goto out will skip put_page, and we did
> do get_page above. Seems wrong. You should
> goto err_skb or something like this.

Yes, looks like err_xdp.

>
>
>> +	}
>>   
>>   	skb_reserve(skb, pad - delta);
>>   	skb_put(skb, len);
>> -	get_page(alloc_frag->page);
>> -	alloc_frag->offset += buflen;
>>   
>>   	return skb;
>>   
>> -err_redirect:
>> -	put_page(alloc_frag->page);
>>   err_xdp:
>> +	alloc_frag->offset -= buflen;
>> +	put_page(alloc_frag->page);
>> +out:
> Out here isn't an error at all, is it?  You should not mix return and
> error handling IMHO.

If you mean the name, I can rename the label to "drop".

>
>
>
>>   	rcu_read_unlock();
>>   	local_bh_enable();
>> -	this_cpu_inc(tun->pcpu_stats->rx_dropped);
> Doesn't this break rx_dropped accounting?

Let me fix this.

Thanks

>> -	return NULL;
>> +	return skb;
>>   }
>>   
>>   /* Get packet from user space buffer */
>> -- 
>> 2.17.1

^ permalink raw reply

* Re: [PATCH net-next 02/11] tuntap: switch to use XDP_PACKET_HEADROOM
From: Jason Wang @ 2018-09-07  3:12 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: netdev, linux-kernel, kvm, virtualization
In-Reply-To: <20180906125718-mutt-send-email-mst@kernel.org>



On 2018年09月07日 00:57, Michael S. Tsirkin wrote:
> On Thu, Sep 06, 2018 at 12:05:17PM +0800, Jason Wang wrote:
>> Signed-off-by: Jason Wang <jasowang@redhat.com>
> Acked-by: Michael S. Tsirkin <mst@redhat.com>
>
> any idea why didn't we do this straight away?

Dunno, but git grep told me not all XDP capable driver use this (e.g 
virtio_net has its own value).

Anyway, I think it's ok for driver to have their specific value if they 
can make sure the value is equal or greater than XDP_PACKET_HEADROOM.

Thanks

>
>> ---
>>   drivers/net/tun.c | 3 +--
>>   1 file changed, 1 insertion(+), 2 deletions(-)
>>
>> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
>> index 2c548bd20393..d3677a544b56 100644
>> --- a/drivers/net/tun.c
>> +++ b/drivers/net/tun.c
>> @@ -113,7 +113,6 @@ do {								\
>>   } while (0)
>>   #endif
>>   
>> -#define TUN_HEADROOM 256
>>   #define TUN_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD)
>>   
>>   /* TUN device flags */
>> @@ -1654,7 +1653,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
>>   	rcu_read_lock();
>>   	xdp_prog = rcu_dereference(tun->xdp_prog);
>>   	if (xdp_prog)
>> -		pad += TUN_HEADROOM;
>> +		pad += XDP_PACKET_HEADROOM;
>>   	buflen += SKB_DATA_ALIGN(len + pad);
>>   	rcu_read_unlock();
>>   
>> -- 
>> 2.17.1

^ permalink raw reply

* Re: [PATCH net-next 01/11] net: sock: introduce SOCK_XDP
From: Jason Wang @ 2018-09-07  3:07 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: netdev, linux-kernel, kvm, virtualization
In-Reply-To: <20180906125615-mutt-send-email-mst@kernel.org>



On 2018年09月07日 00:56, Michael S. Tsirkin wrote:
> On Thu, Sep 06, 2018 at 12:05:16PM +0800, Jason Wang wrote:
>> This patch introduces a new sock flag - SOCK_XDP. This will be used
>> for notifying the upper layer that XDP program is attached on the
>> lower socket, and requires for extra headroom.
>>
>> TUN will be the first user.
>>
>> Signed-off-by: Jason Wang <jasowang@redhat.com>
> In fact vhost is the 1st user, right? So this can be
> pushed out to become patch 10/11.

Better with an independent patch, since patch 10/11 can work without XDP.

Thanks

>
>> ---
>>   drivers/net/tun.c  | 19 +++++++++++++++++++
>>   include/net/sock.h |  1 +
>>   2 files changed, 20 insertions(+)
>>
>> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
>> index ebd07ad82431..2c548bd20393 100644
>> --- a/drivers/net/tun.c
>> +++ b/drivers/net/tun.c
>> @@ -869,6 +869,9 @@ static int tun_attach(struct tun_struct *tun, struct file *file,
>>   		tun_napi_init(tun, tfile, napi);
>>   	}
>>   
>> +	if (rtnl_dereference(tun->xdp_prog))
>> +		sock_set_flag(&tfile->sk, SOCK_XDP);
>> +
>>   	tun_set_real_num_queues(tun);
>>   
>>   	/* device is allowed to go away first, so no need to hold extra
>> @@ -1241,13 +1244,29 @@ static int tun_xdp_set(struct net_device *dev, struct bpf_prog *prog,
>>   		       struct netlink_ext_ack *extack)
>>   {
>>   	struct tun_struct *tun = netdev_priv(dev);
>> +	struct tun_file *tfile;
>>   	struct bpf_prog *old_prog;
>> +	int i;
>>   
>>   	old_prog = rtnl_dereference(tun->xdp_prog);
>>   	rcu_assign_pointer(tun->xdp_prog, prog);
>>   	if (old_prog)
>>   		bpf_prog_put(old_prog);
>>   
>> +	for (i = 0; i < tun->numqueues; i++) {
>> +		tfile = rtnl_dereference(tun->tfiles[i]);
>> +		if (prog)
>> +			sock_set_flag(&tfile->sk, SOCK_XDP);
>> +		else
>> +			sock_reset_flag(&tfile->sk, SOCK_XDP);
>> +	}
>> +	list_for_each_entry(tfile, &tun->disabled, next) {
>> +		if (prog)
>> +			sock_set_flag(&tfile->sk, SOCK_XDP);
>> +		else
>> +			sock_reset_flag(&tfile->sk, SOCK_XDP);
>> +	}
>> +
>>   	return 0;
>>   }
>>   
>> diff --git a/include/net/sock.h b/include/net/sock.h
>> index 433f45fc2d68..38cae35f6e16 100644
>> --- a/include/net/sock.h
>> +++ b/include/net/sock.h
>> @@ -800,6 +800,7 @@ enum sock_flags {
>>   	SOCK_SELECT_ERR_QUEUE, /* Wake select on error queue */
>>   	SOCK_RCU_FREE, /* wait rcu grace period in sk_destruct() */
>>   	SOCK_TXTIME,
>> +	SOCK_XDP, /* XDP is attached */
>>   };
>>   
>>   #define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE))
>> -- 
>> 2.17.1

^ permalink raw reply

* [PATCH] net: wireless: mediatek: fix mt76 LEDS build error
From: Randy Dunlap @ 2018-09-06 22:28 UTC (permalink / raw)
  To: linux-wireless, Kalle Valo; +Cc: Felix Fietkau, netdev@vger.kernel.org

From: Randy Dunlap <rdunlap@infradead.org>

All of the mt76 driver options use its mac80211.o component,
which uses led interfaces, so each of them should depend on
LEDS_CLASS.

Fixes this build error:

drivers/net/wireless/mediatek/mt76/mac80211.o: In function `mt76_led_init':
drivers/net/wireless/mediatek/mt76/mac80211.c:119: undefined reference to `devm_of_led_classdev_register'

Fixes: 17f1de56df05 ("mt76: add common code shared between multiple chipsets")

Reported-by: kbuild test robot <lkp@intel.com>
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Felix Fietkau <nbd@nbd.name>
Cc: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/mediatek/mt76/Kconfig |    3 +++
 1 file changed, 3 insertions(+)

--- linux-next-20180906.orig/drivers/net/wireless/mediatek/mt76/Kconfig
+++ linux-next-20180906/drivers/net/wireless/mediatek/mt76/Kconfig
@@ -18,6 +18,7 @@ config MT76x0U
 	tristate "MediaTek MT76x0U (USB) support"
 	select MT76_CORE
 	depends on MAC80211
+	depends on LEDS_CLASS
 	depends on USB
 	select MT76x02_LIB
 	help
@@ -28,6 +29,7 @@ config MT76x2E
 	select MT76_CORE
 	select MT76x2_COMMON
 	depends on MAC80211
+	depends on LEDS_CLASS
 	depends on PCI
 	---help---
 	  This adds support for MT7612/MT7602/MT7662-based wireless PCIe devices.
@@ -38,6 +40,7 @@ config MT76x2U
 	select MT76_USB
 	select MT76x2_COMMON
 	depends on MAC80211
+	depends on LEDS_CLASS
 	depends on USB
 	help
 	  This adds support for MT7612U-based wireless USB dongles.

^ permalink raw reply

* Re: [PATCH 1/7] fix hnode refcounting
From: Al Viro @ 2018-09-07  2:35 UTC (permalink / raw)
  To: Jamal Hadi Salim; +Cc: netdev, Cong Wang, Jiri Pirko, stable
In-Reply-To: <3bd95332-a12e-6226-8ac3-61e88b0f3cfd@mojatatu.com>

On Thu, Sep 06, 2018 at 06:21:09AM -0400, Jamal Hadi Salim wrote:

> For networking patches, subject should be reflective of tree and
> subsystem. Example for this one:
> "[PATCH net 1/7]:net: sched: cls_u32: fix hnode refcounting"
> Also useful to have a cover letter summarizing the patchset
> in 0/7. Otherwise
> 
> Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>

Argh...  Unfortunately, there's this: in u32_delete() we have
        if (root_ht) {
                if (root_ht->refcnt > 1) {
                        *last = false;
                        goto ret;
                }
                if (root_ht->refcnt == 1) {
                        if (!ht_empty(root_ht)) {
                                *last = false;
                                goto ret;
                        }
                }
        }
and that would need to be updated.  However, that logics is bloody odd
to start with.  First of all, root_ht has come from
       struct tc_u_hnode *root_ht = rtnl_dereference(tp->root);
and the only place where it's ever modified is
        rcu_assign_pointer(tp->root, root_ht);
in u32_init(), where we'd bloody well checked that root_ht is non-NULL
(see
        if (root_ht == NULL)
                return -ENOBUFS;
upstream of that place) and where that assignment is inevitable on the
way to returning 0.  No matter what, if tp has passed u32_init() it
will have non-NULL ->root, forever.  And there is no way for tcf_proto
to be seen outside of tcf_proto_create() without ->init() having returned
0 - it gets freed before anyone sees it.

So this 'if (root_ht)' can't be false.  What's more, what the hell is the
whole thing checking?  We are in u32_delete().  It's called (as ->delete())
from tfilter_del_notify(), which is called from tc_del_tfilter().  If we
return 0 with *last true, we follow up calling tcf_proto_destroy().
OK, let's look at the logics in there:
	* if there are links to root hnode => false
	* if there's no links to root hnode and it has knodes => false
(BTW, if we ever get there with root_ht->refcnt < 1, we are obviously screwed)
	* if there is a tcf_proto sharing tp->data => false (i.e. any filters
with different prio - don't bother)
	* if tp is the only one with reference to tp->data and there are *any*
knodes => false.

Any extra links can come only from knodes in a non-empty hnode.  And it's not
a common case.  Shouldn't thIe whole thing be
	* shared tp->data => false
	* any non-empty hnode => false
instead?  Perhaps even with the knode counter in tp->data, avoiding any loops
in there, as well as the entire ht_empty()...

Now, in the very beginning of u32_delete() we have this:
        struct tc_u_hnode *ht = arg;
	
        if (ht == NULL)
                goto out;
OK, but the call of ->delete() is
        err = tp->ops->delete(tp, fh, last, extack);
and arg == NULL seen in u32_delete() means fh == NULL in tfilter_del_notify().
Which is called in
        if (!fh) {
		...
	} else {
                bool last;

                err = tfilter_del_notify(net, skb, n, tp, block,
                                         q, parent, fh, false, &last,
                                         extack);
How can we ever get there with NULL fh?

The whole thing makes very little sense; looks like it used to live in
u32_destroy() prior to commit 763dbf6328e41 ("net_sched: move the empty tp
check from ->destroy() to ->delete()"), but looking at the rationale in
that commit...  I don't see how it fixes anything - sure, now we remove
tcf_proto from the list before calling ->destroy().  Without any RCU delays
in between.  How could it possibly solve any issues with ->classify()
called in parallel with ->destroy()?  cls_u32 (at least these days)
does try to survive u32_destroy() in parallel with u32_classify();
if any other classifiers do not, they are still broken and that commit
has not done anything for them.

Anyway, adjusting 1/7 for that is trivial, but I would really like to
understand what that code is doing...  Comments?

^ permalink raw reply

* [Patch net] net_sched: properly cancel netlink dump on failure
From: Cong Wang @ 2018-09-06 21:50 UTC (permalink / raw)
  To: netdev; +Cc: Cong Wang, Davide Caratti, Simon Horman

When nla_put*() fails after nla_nest_start(), we need
to call nla_nest_cancel() to cancel the message, otherwise
we end up calling nla_nest_end() like a success.

Fixes: 0ed5269f9e41 ("net/sched: add tunnel option support to act_tunnel_key")
Cc: Davide Caratti <dcaratti@redhat.com>
Cc: Simon Horman <simon.horman@netronome.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
---
 net/sched/act_tunnel_key.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 28d58bbc953e..681f6f04e7da 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -412,8 +412,10 @@ static int tunnel_key_geneve_opts_dump(struct sk_buff *skb,
 		    nla_put_u8(skb, TCA_TUNNEL_KEY_ENC_OPT_GENEVE_TYPE,
 			       opt->type) ||
 		    nla_put(skb, TCA_TUNNEL_KEY_ENC_OPT_GENEVE_DATA,
-			    opt->length * 4, opt + 1))
+			    opt->length * 4, opt + 1)) {
+			nla_nest_cancel(skb, start);
 			return -EMSGSIZE;
+		}
 
 		len -= sizeof(struct geneve_opt) + opt->length * 4;
 		src += sizeof(struct geneve_opt) + opt->length * 4;
@@ -427,7 +429,7 @@ static int tunnel_key_opts_dump(struct sk_buff *skb,
 				const struct ip_tunnel_info *info)
 {
 	struct nlattr *start;
-	int err;
+	int err = -EINVAL;
 
 	if (!info->options_len)
 		return 0;
@@ -439,9 +441,11 @@ static int tunnel_key_opts_dump(struct sk_buff *skb,
 	if (info->key.tun_flags & TUNNEL_GENEVE_OPT) {
 		err = tunnel_key_geneve_opts_dump(skb, info);
 		if (err)
-			return err;
+			goto err_out;
 	} else {
-		return -EINVAL;
+err_out:
+		nla_nest_cancel(skb, start);
+		return err;
 	}
 
 	nla_nest_end(skb, start);
-- 
2.14.4

^ permalink raw reply related

* Re: [PATCH v2 net-next 7/7] net: dsa: Add Lantiq / Intel DSA driver for vrx200
From: Florian Fainelli @ 2018-09-06 21:36 UTC (permalink / raw)
  To: Hauke Mehrtens, davem
  Cc: netdev, andrew, vivien.didelot, john, linux-mips, dev,
	hauke.mehrtens, devicetree
In-Reply-To: <d0da3eb2-8adb-677a-0f88-b6fe7989ae45@hauke-m.de>

On 09/06/2018 02:11 PM, Hauke Mehrtens wrote:
> On 09/03/2018 09:54 PM, Florian Fainelli wrote:
>>
>>
>> On 9/1/2018 5:05 AM, Hauke Mehrtens wrote:
>>> This adds the DSA driver for the GSWIP Switch found in the VRX200 SoC.
>>> This switch is integrated in the DSL SoC, this SoC uses a GSWIP version
>>> 2.1, there are other SoCs using different versions of this IP block, but
>>> this driver was only tested with the version found in the VRX200.
>>> Currently only the basic features are implemented which will forward all
>>> packages to the CPU and let the CPU do the forwarding. The hardware also
>>> support Layer 2 offloading which is not yet implemented in this driver.
>>>
>>> The GPHY FW loaded is now done by this driver and not any more by the
>>> separate driver in drivers/soc/lantiq/gphy.c, I will remove this driver
>>> is a separate patch. to make use of the GPHY this switch driver is
>>> needed anyway. Other SoCs have more embedded GPHYs so this driver should
>>> support a variable number of GPHYs. After the firmware was loaded the
>>> GPHY can be probed on the MDIO bus and it behaves like an external GPHY,
>>> without the firmware it can not be probed on the MDIO bus.
>>>
>>> Currently this depends on SOC_TYPE_XWAY because the SoC revision
>>> detection function ltq_soc_type() is used to load the correct GPHY
>>> firmware on the VRX200 SoCs.
>>>
>>> The clock names in the sysctrl.c file have to be changed because the
>>> clocks are now used by a different driver. This should be cleaned up and
>>> a real common clock driver should provide the clocks instead.
>>>
>>> Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
>>> ---
>>
>> Looks great, just a few suggestions below
>>
>> [snip]
>>
>>> +static void gswip_adjust_link(struct dsa_switch *ds, int port,
>>> +                  struct phy_device *phydev)
>>> +{
>>> +    struct gswip_priv *priv = ds->priv;
>>> +    u16 macconf = phydev->mdio.addr & GSWIP_MDIO_PHY_ADDR_MASK;
>>> +    u16 miirate = 0;
>>> +    u16 miimode;
>>> +    u16 lcl_adv = 0, rmt_adv = 0;
>>> +    u8 flowctrl;
>>> +
>>> +    /* do not run this for the CPU port */
>>> +    if (dsa_is_cpu_port(ds, port))
>>> +        return;
>>
>> Typically we expect the adjust_link callback to run for fixed link
>> ports, that is inter-switch links (between switches) or between the CPU
>> port and the Ethernet MAC attached to the switch. Here you are running
>> this for the user facing ports (IIRC), which should really not be
>> necessary, most Ethernet switches will be able to look at their built-in
>> PHY's state and configure the switch's port automatically. Maybe this is
>> not possible here because you had to disable polling?
> 
> I deactivated the PHY auto polling, I can activate it again. Some PHYs
> could also be external on the same MDIO bus as the internal PHYs.
> 
> The CPU facing fixed link is a special MAC in the switch, at least in
> this version of the switch IP which is embedded in the networking SoCs.
> The MAC is more or less integrated in the switch and the driver can not
> configure the link between the MAC and the switch.

OK

> 
>> Can you consider implementing PHYLINK operations which would make the
>> driver more future proof, should you consider newer generations of
>> switches that support 10G PHY, SGMII, SFP/SFF and so on?
> 
> I will have a look at this later. I just saw that you pushed some
> branches adding SFP support to b53. ;-)

I would really add PHYLINK callbacks now what you have is fairly simple
to extract into separate functions doing the MAC configuration, and then
setting link up/down, that's pretty much all you need. Once you start
adding SFP/SFF support, things can get a bit more complicated
configuration wise.

> 
> The next step will be adding layer 2 offload. This is planned for the
> next patch series after this was merged. The switch uses internal VLANs
> for the offloading, so you configure a VLAN in the hardware and then add
> ports to it. I saw that multiple switches use this model, but converting
> the not VLAN aware layer 2 offloading to it looks a little bit strange,
> is there a good practice?
> 

Not VLAN aware layer 2 offload is actually quite common, the switch must
accept all frames in that case (not checking VID). L2 offload is really
about the following use case create a bridge, enslave ports of the
switch into it, and you should now have the switch forward from/to/
port0/port1 without this traffic going all the way to the CPU. If it
does, then there is no point having a switch in the first place :)

>> [snip]
>>
>>> +    if (priv->ds->dst->cpu_dp->index != priv->hw_info->cpu_port) {
>>> +        dev_err(dev, "wrong CPU port defined, HW only supports port:
>>> %i",
>>> +            priv->hw_info->cpu_port);
>>> +        err = -EINVAL;
>>> +        goto mdio_bus;
>>> +    }
>>
>> There are a number of switches (b53, qca8k, mt7530) that have this
>> requirement, we should probably be moving this check down into the core
>> DSA layer and allow either to continue but disable switch tagging, if it
>> was requested. Andrew what do you think?
> 
> As the CPU port is a special port many registers are only available for
> the normal front facing Ethernet ports and not for the CPU port so I
> have to make sure the correct port was selected as CPU port, otherwise
> the driver will write to the wrong register offsets.

OK, my comment was mostly for Andrew, this is not the first switch that
has specific requirements on which port of the switch is the
"CPU/management" port. So we should probably add some core functionality
within DSA to say "here are the ports that I can accept for management",
if the port you connected your switch to any other than those, then you
just lose tagging.
-- 
Florian

^ permalink raw reply

* Re: [PATCH v2 net-next 7/7] net: dsa: Add Lantiq / Intel DSA driver for vrx200
From: Hauke Mehrtens @ 2018-09-06 21:11 UTC (permalink / raw)
  To: Florian Fainelli, davem
  Cc: netdev, andrew, vivien.didelot, john, linux-mips, dev,
	hauke.mehrtens, devicetree
In-Reply-To: <eb5c0815-e80c-7fd7-a14a-ccc3f28a7c93@gmail.com>


[-- Attachment #1.1: Type: text/plain, Size: 4679 bytes --]

On 09/03/2018 09:54 PM, Florian Fainelli wrote:
> 
> 
> On 9/1/2018 5:05 AM, Hauke Mehrtens wrote:
>> This adds the DSA driver for the GSWIP Switch found in the VRX200 SoC.
>> This switch is integrated in the DSL SoC, this SoC uses a GSWIP version
>> 2.1, there are other SoCs using different versions of this IP block, but
>> this driver was only tested with the version found in the VRX200.
>> Currently only the basic features are implemented which will forward all
>> packages to the CPU and let the CPU do the forwarding. The hardware also
>> support Layer 2 offloading which is not yet implemented in this driver.
>>
>> The GPHY FW loaded is now done by this driver and not any more by the
>> separate driver in drivers/soc/lantiq/gphy.c, I will remove this driver
>> is a separate patch. to make use of the GPHY this switch driver is
>> needed anyway. Other SoCs have more embedded GPHYs so this driver should
>> support a variable number of GPHYs. After the firmware was loaded the
>> GPHY can be probed on the MDIO bus and it behaves like an external GPHY,
>> without the firmware it can not be probed on the MDIO bus.
>>
>> Currently this depends on SOC_TYPE_XWAY because the SoC revision
>> detection function ltq_soc_type() is used to load the correct GPHY
>> firmware on the VRX200 SoCs.
>>
>> The clock names in the sysctrl.c file have to be changed because the
>> clocks are now used by a different driver. This should be cleaned up and
>> a real common clock driver should provide the clocks instead.
>>
>> Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
>> ---
> 
> Looks great, just a few suggestions below
> 
> [snip]
> 
>> +static void gswip_adjust_link(struct dsa_switch *ds, int port,
>> +                  struct phy_device *phydev)
>> +{
>> +    struct gswip_priv *priv = ds->priv;
>> +    u16 macconf = phydev->mdio.addr & GSWIP_MDIO_PHY_ADDR_MASK;
>> +    u16 miirate = 0;
>> +    u16 miimode;
>> +    u16 lcl_adv = 0, rmt_adv = 0;
>> +    u8 flowctrl;
>> +
>> +    /* do not run this for the CPU port */
>> +    if (dsa_is_cpu_port(ds, port))
>> +        return;
> 
> Typically we expect the adjust_link callback to run for fixed link
> ports, that is inter-switch links (between switches) or between the CPU
> port and the Ethernet MAC attached to the switch. Here you are running
> this for the user facing ports (IIRC), which should really not be
> necessary, most Ethernet switches will be able to look at their built-in
> PHY's state and configure the switch's port automatically. Maybe this is
> not possible here because you had to disable polling?

I deactivated the PHY auto polling, I can activate it again. Some PHYs
could also be external on the same MDIO bus as the internal PHYs.

The CPU facing fixed link is a special MAC in the switch, at least in
this version of the switch IP which is embedded in the networking SoCs.
The MAC is more or less integrated in the switch and the driver can not
configure the link between the MAC and the switch.

> Can you consider implementing PHYLINK operations which would make the
> driver more future proof, should you consider newer generations of
> switches that support 10G PHY, SGMII, SFP/SFF and so on?

I will have a look at this later. I just saw that you pushed some
branches adding SFP support to b53. ;-)

The next step will be adding layer 2 offload. This is planned for the
next patch series after this was merged. The switch uses internal VLANs
for the offloading, so you configure a VLAN in the hardware and then add
ports to it. I saw that multiple switches use this model, but converting
the not VLAN aware layer 2 offloading to it looks a little bit strange,
is there a good practice?

> [snip]
> 
>> +    if (priv->ds->dst->cpu_dp->index != priv->hw_info->cpu_port) {
>> +        dev_err(dev, "wrong CPU port defined, HW only supports port:
>> %i",
>> +            priv->hw_info->cpu_port);
>> +        err = -EINVAL;
>> +        goto mdio_bus;
>> +    }
> 
> There are a number of switches (b53, qca8k, mt7530) that have this
> requirement, we should probably be moving this check down into the core
> DSA layer and allow either to continue but disable switch tagging, if it
> was requested. Andrew what do you think?

As the CPU port is a special port many registers are only available for
the normal front facing Ethernet ports and not for the CPU port so I
have to make sure the correct port was selected as CPU port, otherwise
the driver will write to the wrong register offsets.

Hauke


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply

* [RFC PATCH net-next] liquidio: lio_fetch_vf_stats() can be static
From: kbuild test robot @ 2018-09-07  1:41 UTC (permalink / raw)
  To: Weilin Chang
  Cc: kbuild-all, netdev, Felix Manlunas, Derek Chickles,
	Satanand Burla, Raghu Vatsavayi, linux-kernel
In-Reply-To: <201809070936.Xhh3hpBt%fengguang.wu@intel.com>


Fixes: 488752220b4a ("liquidio: Add spoof checking on a VF MAC address")
Signed-off-by: kbuild test robot <fengguang.wu@intel.com>
---
 lio_core.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/cavium/liquidio/lio_core.c b/drivers/net/ethernet/cavium/liquidio/lio_core.c
index 0284204..2122430 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_core.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_core.c
@@ -1357,7 +1357,7 @@ octnet_nic_stats_callback(struct octeon_device *oct_dev,
 	}
 }
 
-int lio_fetch_vf_stats(struct lio *lio)
+static int lio_fetch_vf_stats(struct lio *lio)
 {
 	struct octeon_device *oct_dev = lio->oct_dev;
 	struct octeon_soft_command *sc;

^ permalink raw reply related

* [net-next:master 73/74] drivers/net/ethernet/cavium/liquidio/lio_core.c:1360:5: sparse: symbol 'lio_fetch_vf_stats' was not declared. Should it be static?
From: kbuild test robot @ 2018-09-07  1:41 UTC (permalink / raw)
  To: Weilin Chang
  Cc: kbuild-all, netdev, Felix Manlunas, Derek Chickles,
	Satanand Burla, Raghu Vatsavayi, linux-kernel

tree:   https://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git master
head:   ddc4d236dc71b255ff4cb8394f5fce2739a1d138
commit: 488752220b4a73ae131ca3e7c0c83b9f1bf092e4 [73/74] liquidio: Add spoof checking on a VF MAC address
reproduce:
        # apt-get install sparse
        git checkout 488752220b4a73ae131ca3e7c0c83b9f1bf092e4
        make ARCH=x86_64 allmodconfig
        make C=1 CF=-D__CHECK_ENDIAN__


sparse warnings: (new ones prefixed by >>)

   include/linux/slab.h:631:13: sparse: undefined identifier '__builtin_mul_overflow'
   include/linux/slab.h:631:13: sparse: not a function <noident>
   include/linux/slab.h:631:13: sparse: not a function <noident>
   include/linux/slab.h:631:13: sparse: not a function <noident>
   include/linux/slab.h:631:13: sparse: not a function <noident>
   include/linux/slab.h:631:13: sparse: not a function <noident>
   include/linux/slab.h:631:13: sparse: not a function <noident>
>> drivers/net/ethernet/cavium/liquidio/lio_core.c:1360:5: sparse: symbol 'lio_fetch_vf_stats' was not declared. Should it be static?
   include/linux/slab.h:631:13: sparse: call with no type!

Please review and possibly fold the followup patch.

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

^ permalink raw reply

* [PATCH iproute2 v2] tc/mqprio: Print extra info on invalid args.
From: Caleb Raitto @ 2018-09-06 21:01 UTC (permalink / raw)
  To: stephen, netdev; +Cc: jhs, xiyou.wangcong, jiri, Caleb Raitto

From: Caleb Raitto <caraitto@google.com>

Print the name of the argument that wasn't understood.

Signed-off-by: Caleb Raitto <caraitto@google.com>
---
 tc/q_mqprio.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tc/q_mqprio.c b/tc/q_mqprio.c
index 89b46002..7cd18ae1 100644
--- a/tc/q_mqprio.c
+++ b/tc/q_mqprio.c
@@ -167,8 +167,7 @@ static int mqprio_parse_opt(struct qdisc_util *qu, int argc,
 			explain();
 			return -1;
 		} else {
-			fprintf(stderr, "Unknown argument\n");
-			return -1;
+			invarg("unknown argument", *argv);
 		}
 		argc--; argv++;
 	}
-- 
2.19.0.rc2.392.g5ba43deb5a-goog

^ permalink raw reply related

* Re: Re: [PATCH net-next v2 0/5] virtio: support packed ring
From: Tiwei Bie @ 2018-09-07  1:22 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: jasowang, virtualization, linux-kernel, netdev, virtio-dev, wexu,
	jfreimann
In-Reply-To: <20180827170005-mutt-send-email-mst@kernel.org>

On Mon, Aug 27, 2018 at 05:00:40PM +0300, Michael S. Tsirkin wrote:
> Are there still plans to test the performance with vost pmd?
> vhost doesn't seem to show a performance gain ...
> 

I tried some performance tests with vhost PMD. In guest, the
XDP program will return XDP_DROP directly. And in host, testpmd
will do txonly fwd.

When burst size is 1 and packet size is 64 in testpmd and
testpmd needs to iterate 5 Tx queues (but only the first two
queues are enabled) to prepare and inject packets, I got ~12%
performance boost (5.7Mpps -> 6.4Mpps). And if the vhost PMD
is faster (e.g. just need to iterate the first two queues to
prepare and inject packets), then I got similar performance
for both rings (~9.9Mpps) (packed ring's performance can be
lower, because it's more complicated in driver.)

I think packed ring makes vhost PMD faster, but it doesn't make
the driver faster. In packed ring, the ring is simplified, and
the handling of the ring in vhost (device) is also simplified,
but things are not simplified in driver, e.g. although there is
no desc table in the virtqueue anymore, driver still needs to
maintain a private desc state table (which is still managed as
a list in this patch set) to support the out-of-order desc
processing in vhost (device).

I think this patch set is mainly to make the driver have a full
functional support for the packed ring, which makes it possible
to leverage the packed ring feature in vhost (device). But I'm
not sure whether there is any other better idea, I'd like to
hear your thoughts. Thanks!


> 
> On Wed, Jul 11, 2018 at 10:27:06AM +0800, Tiwei Bie wrote:
> > Hello everyone,
> > 
> > This patch set implements packed ring support in virtio driver.
> > 
> > Some functional tests have been done with Jason's
> > packed ring implementation in vhost:
> > 
> > https://lkml.org/lkml/2018/7/3/33
> > 
> > Both of ping and netperf worked as expected.
> > 
> > v1 -> v2:
> > - Use READ_ONCE() to read event off_wrap and flags together (Jason);
> > - Add comments related to ccw (Jason);
> > 
> > RFC (v6) -> v1:
> > - Avoid extra virtio_wmb() in virtqueue_enable_cb_delayed_packed()
> >   when event idx is off (Jason);
> > - Fix bufs calculation in virtqueue_enable_cb_delayed_packed() (Jason);
> > - Test the state of the desc at used_idx instead of last_used_idx
> >   in virtqueue_enable_cb_delayed_packed() (Jason);
> > - Save wrap counter (as part of queue state) in the return value
> >   of virtqueue_enable_cb_prepare_packed();
> > - Refine the packed ring definitions in uapi;
> > - Rebase on the net-next tree;
> > 
> > RFC v5 -> RFC v6:
> > - Avoid tracking addr/len/flags when DMA API isn't used (MST/Jason);
> > - Define wrap counter as bool (Jason);
> > - Use ALIGN() in vring_init_packed() (Jason);
> > - Avoid using pointer to track `next` in detach_buf_packed() (Jason);
> > - Add comments for barriers (Jason);
> > - Don't enable RING_PACKED on ccw for now (noticed by Jason);
> > - Refine the memory barrier in virtqueue_poll();
> > - Add a missing memory barrier in virtqueue_enable_cb_delayed_packed();
> > - Remove the hacks in virtqueue_enable_cb_prepare_packed();
> > 
> > RFC v4 -> RFC v5:
> > - Save DMA addr, etc in desc state (Jason);
> > - Track used wrap counter;
> > 
> > RFC v3 -> RFC v4:
> > - Make ID allocation support out-of-order (Jason);
> > - Various fixes for EVENT_IDX support;
> > 
> > RFC v2 -> RFC v3:
> > - Split into small patches (Jason);
> > - Add helper virtqueue_use_indirect() (Jason);
> > - Just set id for the last descriptor of a list (Jason);
> > - Calculate the prev in virtqueue_add_packed() (Jason);
> > - Fix/improve desc suppression code (Jason/MST);
> > - Refine the code layout for XXX_split/packed and wrappers (MST);
> > - Fix the comments and API in uapi (MST);
> > - Remove the BUG_ON() for indirect (Jason);
> > - Some other refinements and bug fixes;
> > 
> > RFC v1 -> RFC v2:
> > - Add indirect descriptor support - compile test only;
> > - Add event suppression supprt - compile test only;
> > - Move vring_packed_init() out of uapi (Jason, MST);
> > - Merge two loops into one in virtqueue_add_packed() (Jason);
> > - Split vring_unmap_one() for packed ring and split ring (Jason);
> > - Avoid using '%' operator (Jason);
> > - Rename free_head -> next_avail_idx (Jason);
> > - Add comments for virtio_wmb() in virtqueue_add_packed() (Jason);
> > - Some other refinements and bug fixes;
> > 
> > Thanks!
> > 
> > Tiwei Bie (5):
> >   virtio: add packed ring definitions
> >   virtio_ring: support creating packed ring
> >   virtio_ring: add packed ring support
> >   virtio_ring: add event idx support in packed ring
> >   virtio_ring: enable packed ring
> > 
> >  drivers/s390/virtio/virtio_ccw.c   |   14 +
> >  drivers/virtio/virtio_ring.c       | 1365 ++++++++++++++++++++++------
> >  include/linux/virtio_ring.h        |    8 +-
> >  include/uapi/linux/virtio_config.h |    3 +
> >  include/uapi/linux/virtio_ring.h   |   43 +
> >  5 files changed, 1157 insertions(+), 276 deletions(-)
> > 
> > -- 
> > 2.18.0
> 
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org
> For additional commands, e-mail: virtio-dev-help@lists.oasis-open.org
> 

^ permalink raw reply

* [PATCH 0/2] ARM: dts: mvebu: marvell,prestera
From: Chris Packham @ 2018-09-07  0:59 UTC (permalink / raw)
  To: robh+dt, gregory.clement
  Cc: jason, davem, andrew, sebastian.hesselbarth, devicetree,
	linux-arm-kernel, linux-kernel, netdev, Chris Packham

Define a generic compatible string so that drivers don't need to deal with the
specific variants.

Chris Packham (2):
  dt-bindings: marvell,prestera: Add common compatible string
  ARM: dts: mvebu: add "marvell,prestera" to PP nodes

 Documentation/devicetree/bindings/net/marvell,prestera.txt | 4 ++--
 arch/arm/boot/dts/armada-xp-98dx3236.dtsi                  | 2 +-
 arch/arm/boot/dts/armada-xp-98dx3336.dtsi                  | 2 +-
 arch/arm/boot/dts/armada-xp-98dx4251.dtsi                  | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

-- 
2.18.0

^ permalink raw reply

* Re: [PATCH net-next] net: sched: change tcf_del_walker() to use concurrent-safe delete
From: Cong Wang @ 2018-09-06 19:58 UTC (permalink / raw)
  To: Vlad Buslov
  Cc: Linux Kernel Network Developers, Jamal Hadi Salim, Jiri Pirko,
	David Miller
In-Reply-To: <vbfworyx4hr.fsf@reg-r-vrt-018-180.mtr.labs.mlnx>

On Thu, Sep 6, 2018 at 4:14 AM Vlad Buslov <vladbu@mellanox.com> wrote:
> > Isn't a concurrent tcf_idr_check_alloc() able to livelock here with
> > your change?
> >
> > idr_for_each_entry_ul{
> >    spin_lock(&idrinfo->lock);
> >    idr_remove();
> >    spin_unlock(&idrinfo->lock);
> >       // tcf_idr_check_alloc() jumps in,
> >      // allocates next ID which can be found
> >       // by idr_get_next_ul()
> > } // the whole loop goes _literately_ infinite...
>
> idr_for_each_entry_ul traverses idr entries with ascending order of
> identifiers, so infinite livelock like this is not possible because it
> never goes back to newly added entries with id<current_id.

I said "literately infinite", it could go from 1 to UINT_MAX,
sufficient to prove my point of livelock.


> >
> > Also, idr_for_each_entry_ul() is supposed to be protected either
> > by RCU or idrinfo->lock, no? With your change or without any change,
> > it doesn't even have any lock after removing RTNL?
>
> After reading this comment I checked actual idr implementation and I
> think you are right. Even though idr_for_each_entry_ul() macro (and
> function idr_get_next_ul() that it uses to iterate over idr entries)
> doesn't specify any locking requirements in comment description (that is
> why this patch doesn't use any), its implementation seems to require
> external synchronization.

Yeah, it is also a reader, so either a reader lock like RCU or a writer lock
like idrinfo->lock.

>
> You suggest I should just hold idrinfo->lock for whole del_walker loop
> duration, or play nicely with potential concurrent users and
> take/release it per action?

My suggestion is pretty clear, you just missed it, let me copy-n-paste:

With what I suggest:

spin_lock(&idrinfo->lock);
idr_for_each_entry_ul{
   idr_remove();
}
spin_unlock(&idrinfo->lock);

^ permalink raw reply

* Re: linux-next: build failure after merge of the bpf-next tree
From: Alexei Starovoitov @ 2018-09-07  0:22 UTC (permalink / raw)
  To: Stephen Rothwell
  Cc: Daniel Borkmann, Alexei Starovoitov, Networking,
	Linux-Next Mailing List, Linux Kernel Mailing List,
	Björn Töpel
In-Reply-To: <20180907101923.4d2a85c6@canb.auug.org.au>

On Fri, Sep 07, 2018 at 10:19:23AM +1000, Stephen Rothwell wrote:
> Hi all,
> 
> After merging the bpf-next tree, today's linux-next build (powerpc
> ppc64_defconfig) failed like this:
> 
> ERROR: ".xsk_reuseq_swap" [drivers/net/ethernet/intel/i40e/i40e.ko] undefined!
> ERROR: ".xsk_reuseq_free" [drivers/net/ethernet/intel/i40e/i40e.ko] undefined!
> ERROR: ".xsk_reuseq_prepare" [drivers/net/ethernet/intel/i40e/i40e.ko] undefined
> 
> Caused by commit
> 
>   9654bd10da60 ("i40e: clean zero-copy XDP Rx ring on shutdown/reset")
> 
> CONFIG_XDP_SOCKETS is not set for this build.
> 
> I have used the version of the bfp-next tree from next-20180906 for today.

merge conflict and build error...
Bjorn, I'm thinking to toss the patches out of bpf-next and reapply
cleaned up version of the patches...
what do you think?

^ permalink raw reply

* Re: [PATCH] tcp: really ignore MSG_ZEROCOPY if no SO_ZEROCOPY
From: Willem de Bruijn @ 2018-09-06 19:44 UTC (permalink / raw)
  To: vincent.whitchurch
  Cc: David Miller, Network Development, Willem de Bruijn, rabinv
In-Reply-To: <20180906135459.15529-1-vincent.whitchurch@axis.com>

On Thu, Sep 6, 2018 at 9:58 AM Vincent Whitchurch
<vincent.whitchurch@axis.com> wrote:
>
> According to the documentation in msg_zerocopy.rst, the SO_ZEROCOPY
> flag was introduced because send(2) ignores unknown message flags and
> any legacy application which was accidentally passing the equivalent of
> MSG_ZEROCOPY earlier should not see any new behaviour.
>
> Before commit f214f915e7db ("tcp: enable MSG_ZEROCOPY"), a send(2) call
> which passed the equivalent of MSG_ZEROCOPY without setting SO_ZEROCOPY
> would succeed.  However, after that commit, it fails with -ENOBUFS.  So
> it appears that the SO_ZEROCOPY flag fails to fulfill its intended
> purpose.  Fix it.
>
> Fixes: f214f915e7db ("tcp: enable MSG_ZEROCOPY")
>
> Signed-off-by: Vincent Whitchurch <vincent.whitchurch@axis.com>

Acked-by: Willem de Bruijn <willemb@google.com>

Good catch, thanks for fixing this.

Please remember to mark patches with PATCH net

^ permalink raw reply

* Re: linux-next: build failure after merge of the net-next tree
From: Stephen Rothwell @ 2018-09-07  0:20 UTC (permalink / raw)
  To: David Miller, Networking
  Cc: Linux-Next Mailing List, Linux Kernel Mailing List, Jacob Keller,
	Jeff Kirsher, Andrew Bowers
In-Reply-To: <20180903094702.3e32d8f5@canb.auug.org.au>

[-- Attachment #1: Type: text/plain, Size: 1503 bytes --]

Hi all,

On Mon, 3 Sep 2018 09:47:02 +1000 Stephen Rothwell <sfr@canb.auug.org.au> wrote:
>
> After merging the net-next tree, today's linux-next build (powerpc
> ppc64_defconfig) failed like this:
> 
> In file included from drivers/net/ethernet/intel/i40e/i40e_ethtool.c:9:
> drivers/net/ethernet/intel/i40e/i40e_ethtool.c: In function '__i40e_add_stat_strings':
> drivers/net/ethernet/intel/i40e/i40e_ethtool_stats.h:193:20: error: function '__i40e_add_stat_strings' can never be inlined because it uses variable argument lists
>  static inline void __i40e_add_stat_strings(u8 **p, const struct i40e_stats stats[],
>                     ^~~~~~~~~~~~~~~~~~~~~~~
> 
> Caused by commit
> 
>   8fd75c58a09a ("i40e: move ethtool stats boiler plate code to i40e_ethtool_stats.h")
> 
> It is not clear this patch has any value anyway as the moved functions
> are only used in the file they were moved from.
> 
> I reverted that commit for today.
> 
> The same problem would exist in drivers/net/ethernet/intel/i40evf (where
> a lot of code is duplicated from drivers/net/ethernet/intel/i40e) except
> that this function is not declared inline there.
> Luckily, i40e_ethtool_stats.h is only included my one file
> drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c, otherwise there
> would be multiple copies of __i40e_add_stat_strings().
> 
> Surely there is some scope for factoring out some common code between
> these two drivers?

Ping?

-- 
Cheers,
Stephen Rothwell

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply

* linux-next: build failure after merge of the bpf-next tree
From: Stephen Rothwell @ 2018-09-07  0:19 UTC (permalink / raw)
  To: Daniel Borkmann, Alexei Starovoitov, Networking
  Cc: Linux-Next Mailing List, Linux Kernel Mailing List,
	Björn Töpel

[-- Attachment #1: Type: text/plain, Size: 618 bytes --]

Hi all,

After merging the bpf-next tree, today's linux-next build (powerpc
ppc64_defconfig) failed like this:

ERROR: ".xsk_reuseq_swap" [drivers/net/ethernet/intel/i40e/i40e.ko] undefined!
ERROR: ".xsk_reuseq_free" [drivers/net/ethernet/intel/i40e/i40e.ko] undefined!
ERROR: ".xsk_reuseq_prepare" [drivers/net/ethernet/intel/i40e/i40e.ko] undefined

Caused by commit

  9654bd10da60 ("i40e: clean zero-copy XDP Rx ring on shutdown/reset")

CONFIG_XDP_SOCKETS is not set for this build.

I have used the version of the bfp-next tree from next-20180906 for today.



-- 
Cheers,
Stephen Rothwell

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply

* linux-next: manual merge of the bpf-next tree with a previous revert
From: Stephen Rothwell @ 2018-09-07  0:12 UTC (permalink / raw)
  To: Daniel Borkmann, Alexei Starovoitov, Networking
  Cc: Linux-Next Mailing List, Linux Kernel Mailing List,
	Björn Töpel, David Miller, Jacob Keller, Jeff Kirsher,
	Andrew Bowers

[-- Attachment #1: Type: text/plain, Size: 2054 bytes --]

Hi all,

Today's linux-next merge of the bpf-next tree got a conflict in:

  drivers/net/ethernet/intel/i40e/i40e_ethtool.c

between commit:

  39b042e0b347 ("Merge remote-tracking branch 'net-next/master'")
(this is really my revert of
  8fd75c58a09a ("i40e: move ethtool stats boiler plate code to i40e_ethtool_stats.h")
due to a build failure
)

and commit:

  b5061a9e8785 ("i40e: disallow changing the number of descriptors when AF_XDP is on")

from the bpf-next tree.

I fixed it up (see below) and can carry the fix as necessary. This
is now fixed as far as linux-next is concerned, but any non trivial
conflicts should be mentioned to your upstream maintainer when your tree
is submitted for merging.  You may also want to consider cooperating
with the maintainer of the conflicting tree to minimise any particularly
complex conflicts.

-- 
Cheers,
Stephen Rothwell

diff --cc drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 235012b3bd42,3cd2c88c72f8..000000000000
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@@ -5,26 -5,9 +5,27 @@@
  
  #include "i40e.h"
  #include "i40e_diag.h"
+ #include "i40e_txrx_common.h"
 -#include "i40e_ethtool_stats.h"
  
 +struct i40e_stats {
 +	/* The stat_string is expected to be a format string formatted using
 +	 * vsnprintf by i40e_add_stat_strings. Every member of a stats array
 +	 * should use the same format specifiers as they will be formatted
 +	 * using the same variadic arguments.
 +	 */
 +	char stat_string[ETH_GSTRING_LEN];
 +	int sizeof_stat;
 +	int stat_offset;
 +};
 +
 +#define I40E_STAT(_type, _name, _stat) { \
 +	.stat_string = _name, \
 +	.sizeof_stat = FIELD_SIZEOF(_type, _stat), \
 +	.stat_offset = offsetof(_type, _stat) \
 +}
 +
 +#define I40E_NETDEV_STAT(_net_stat) \
 +	I40E_STAT(struct rtnl_link_stats64, #_net_stat, _net_stat)
  #define I40E_PF_STAT(_name, _stat) \
  	I40E_STAT(struct i40e_pf, _name, _stat)
  #define I40E_VSI_STAT(_name, _stat) \

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply

* Re: [PATCH net] ip: frags: fix crash in ip_do_fragment()
From: Taehee Yoo @ 2018-09-06 18:48 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David Miller, Peter Oskolkov, netdev, Pablo Neira Ayuso,
	Florian Westphal
In-Reply-To: <CANn89iJfL_zafpBoFBoJKHRNxHogx_8V2BEEfkLW3DZFGCBe8w@mail.gmail.com>

2018-09-07 3:23 GMT+09:00 Eric Dumazet <edumazet@google.com>:
> On Thu, Sep 6, 2018 at 11:06 AM Eric Dumazet <edumazet@google.com> wrote:
>>
>> On Thu, Sep 6, 2018 at 10:51 AM Taehee Yoo <ap420073@gmail.com> wrote:
>> >
>> > A kernel crash occurrs when defragmented packet is fragmented
>> > in ip_do_fragment().
>> > In defragment routine, skb_orphan() is called and
>> > skb->ip_defrag_offset is set. but skb->sk and
>> > skb->ip_defrag_offset are same union member. so that
>> > frag->sk is not NULL.
>> > Hence crash occurrs in skb->sk check routine in ip_do_fragment() when
>> > defragmented packet is fragmented.
>>
>> Have you tested this patch ?
>>
>> Moving back ip_defrag_offset is conflicting with the rbnode !
>>
>> A more correct fix would be to properly clear skb->sk at reassembly.
>
> Something like that :
>
> diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
> index 88281fbce88ce8f1062b99594665766c2a5f5b74..e7227128df2c8fd54727c234f76043133809bd1e
> 100644
> --- a/net/ipv4/ip_fragment.c
> +++ b/net/ipv4/ip_fragment.c
> @@ -599,6 +599,7 @@ static int ip_frag_reasm(struct ipq *qp, struct
> sk_buff *skb,
>                         nextp = &fp->next;
>                         fp->prev = NULL;
>                         memset(&fp->rbnode, 0, sizeof(fp->rbnode));
> +                       fp->sk = NULL;
>                         head->data_len += fp->len;
>                         head->len += fp->len;
>                         if (head->ip_summed != fp->ip_summed)

Hi Eric!

Oh I'm sorry, I realized that ip_defrag_offset would be conflicting
with the rbnode just now
So, this patch should be dropped.
And I will make v2 patch regard you suggested!

Thank you for review and suggestion!

^ permalink raw reply

* Re: [PATCH net] ip: frags: fix crash in ip_do_fragment()
From: Eric Dumazet @ 2018-09-06 18:23 UTC (permalink / raw)
  To: ap420073
  Cc: David Miller, Peter Oskolkov, netdev, Pablo Neira Ayuso,
	Florian Westphal
In-Reply-To: <CANn89iJENha1FiqA_pNpaXu-+vryHjw+6a-fR4Qc3-WmQsrXdQ@mail.gmail.com>

On Thu, Sep 6, 2018 at 11:06 AM Eric Dumazet <edumazet@google.com> wrote:
>
> On Thu, Sep 6, 2018 at 10:51 AM Taehee Yoo <ap420073@gmail.com> wrote:
> >
> > A kernel crash occurrs when defragmented packet is fragmented
> > in ip_do_fragment().
> > In defragment routine, skb_orphan() is called and
> > skb->ip_defrag_offset is set. but skb->sk and
> > skb->ip_defrag_offset are same union member. so that
> > frag->sk is not NULL.
> > Hence crash occurrs in skb->sk check routine in ip_do_fragment() when
> > defragmented packet is fragmented.
>
> Have you tested this patch ?
>
> Moving back ip_defrag_offset is conflicting with the rbnode !
>
> A more correct fix would be to properly clear skb->sk at reassembly.

Something like that :

diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 88281fbce88ce8f1062b99594665766c2a5f5b74..e7227128df2c8fd54727c234f76043133809bd1e
100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -599,6 +599,7 @@ static int ip_frag_reasm(struct ipq *qp, struct
sk_buff *skb,
                        nextp = &fp->next;
                        fp->prev = NULL;
                        memset(&fp->rbnode, 0, sizeof(fp->rbnode));
+                       fp->sk = NULL;
                        head->data_len += fp->len;
                        head->len += fp->len;
                        if (head->ip_summed != fp->ip_summed)

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox