Netdev List

Netdev List
 help / color / mirror / Atom feed

* Re: pull-request: bpf 2018-04-25
From: Daniel Borkmann @ 2018-04-26  4:42 UTC (permalink / raw)
  To: David Miller; +Cc: ast, netdev
In-Reply-To: <20180425.230532.893947763052616175.davem@davemloft.net>

On 04/26/2018 05:05 AM, David Miller wrote:
> From: Daniel Borkmann <daniel@iogearbox.net>
> Date: Thu, 26 Apr 2018 00:04:50 +0200
> 
>> The following pull-request contains BPF updates for your *net* tree.
>  ...
>> Please consider pulling these changes from:
>>
>>   git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git
> 
> Pulled, thanks Daniel.
> 
>> Would be great if you have a chance to merge net into net-next after
>> that since sockmap fixes are needed in bpf-next later on to avoid
>> ugly merge conflicts.
> 
> Done.

Awesome, thanks!

^ permalink raw reply

* [PATCH v2] net/mlx4_en: fix potential use-after-free with dma_unmap_page
From: Sarah Newman @ 2018-04-26  4:00 UTC (permalink / raw)
  To: tariqt, yishaih; +Cc: netdev, Sarah Newman
In-Reply-To: <c386e907-51fa-f9e3-2207-09e137b8bcfa@mellanox.com>

When swiotlb is in use, calling dma_unmap_page means that
the original page mapped with dma_map_page must still be valid
as swiotlb will copy data from its internal cache back to the
originally requested DMA location. When GRO is enabled,
all references to the original frag may be put before
mlx4_en_free_frag is called, meaning the page has been freed
before the call to dma_unmap_page in mlx4_en_free_frag.

To fix, unmap the page as soon as possible.

This can be trivially detected by doing the following:

Compile the kernel with DEBUG_PAGEALLOC
Run the kernel as a Xen Dom0
Leave GRO enabled on the interface
Run a 10 second or more test with iperf over the interface.

Signed-off-by: Sarah Newman <srn@prgmr.com>
---
 drivers/net/ethernet/mellanox/mlx4/en_rx.c | 32 +++++++++++++++++++-----------
 1 file changed, 20 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index 844f5ad..abe2b43 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -142,16 +142,17 @@ static void mlx4_en_free_frag(struct mlx4_en_priv *priv,
 			      struct mlx4_en_rx_alloc *frags,
 			      int i)
 {
-	const struct mlx4_en_frag_info *frag_info = &priv->frag_info[i];
-	u32 next_frag_end = frags[i].page_offset + 2 * frag_info->frag_stride;
-
-
-	if (next_frag_end > frags[i].page_size)
-		dma_unmap_page(priv->ddev, frags[i].dma, frags[i].page_size,
-			       frag_info->dma_dir);
+	if (frags[i].page) {
+		const struct mlx4_en_frag_info *frag_info = &priv->frag_info[i];
+		u32 next_frag_end = frags[i].page_offset +
+				2 * frag_info->frag_stride;
 
-	if (frags[i].page)
+		if (next_frag_end > frags[i].page_size) {
+			dma_unmap_page(priv->ddev, frags[i].dma,
+				       frags[i].page_size, frag_info->dma_dir);
+		}
 		put_page(frags[i].page);
+	}
 }
 
 static int mlx4_en_init_allocator(struct mlx4_en_priv *priv,
@@ -586,21 +587,28 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv,
 				    int length)
 {
 	struct skb_frag_struct *skb_frags_rx = skb_shinfo(skb)->frags;
-	struct mlx4_en_frag_info *frag_info;
 	int nr;
 	dma_addr_t dma;
 
 	/* Collect used fragments while replacing them in the HW descriptors */
 	for (nr = 0; nr < priv->num_frags; nr++) {
-		frag_info = &priv->frag_info[nr];
+		struct mlx4_en_frag_info *frag_info = &priv->frag_info[nr];
+		u32 next_frag_end = frags[nr].page_offset +
+				2 * frag_info->frag_stride;
+
 		if (length <= frag_info->frag_prefix_size)
 			break;
 		if (unlikely(!frags[nr].page))
 			goto fail;
 
 		dma = be64_to_cpu(rx_desc->data[nr].addr);
-		dma_sync_single_for_cpu(priv->ddev, dma, frag_info->frag_size,
-					DMA_FROM_DEVICE);
+		if (next_frag_end > frags[nr].page_size)
+			dma_unmap_page(priv->ddev, frags[nr].dma,
+				       frags[nr].page_size, frag_info->dma_dir);
+		else
+			dma_sync_single_for_cpu(priv->ddev, dma,
+						frag_info->frag_size,
+						DMA_FROM_DEVICE);
 
 		/* Save page reference in skb */
 		__skb_frag_set_page(&skb_frags_rx[nr], frags[nr].page);
-- 
1.9.1

^ permalink raw reply related

* Re: [PATCH bpf-next 13/15] xsk: support for Tx
From: Björn Töpel @ 2018-04-26  4:02 UTC (permalink / raw)
  To: Willem de Bruijn
  Cc: Magnus Karlsson, Karlsson, Magnus, Alexander Duyck,
	Alexander Duyck, John Fastabend, Alexei Starovoitov,
	Jesper Dangaard Brouer, Daniel Borkmann, Michael S. Tsirkin,
	Network Development, michael.lundkvist, Brandeburg, Jesse,
	Singhai, Anjali, Zhang, Qi Z
In-Reply-To: <CAF=yD-JDvEHrWLx7w6zUpct0KbNvWUofG_LRuFFVbozCb-UGkA@mail.gmail.com>

2018-04-25 21:00 GMT+02:00 Willem de Bruijn <willemdebruijn.kernel@gmail.com>:
[...]
>>> static inline struct xdp_desc *xskq_peek_desc(struct xsk_queue *q,
>>> +                                             struct xdp_desc *desc)
>>> +{
>>> +       struct xdp_rxtx_ring *ring;
>>> +
>>> +       if (q->cons_tail == q->cons_head) {
>>> +               WRITE_ONCE(q->ring->consumer, q->cons_tail);
>>> +               q->cons_head = q->cons_tail + xskq_nb_avail(q, RX_BATCH_SIZE);
>>> +
>>> +               /* Order consumer and data */
>>> +               smp_rmb();
>>> +
>>> +               return xskq_validate_desc(q, desc);
>>> +       }
>>> +
>>> +       ring = (struct xdp_rxtx_ring *)q->ring;
>>> +       *desc = ring->desc[q->cons_tail & q->ring_mask];
>>> +       return desc;
>>>
>>> This only validates descriptors if taking the branch.
>>
>> Yes, that is because we only want to validate the descriptors once
>> even if we call this function multiple times for the same entry.
>
> Then I am probably misreading this function. But isn't head increased
> by up to RX_BATCH_SIZE frames at once. If so, then for many frames
> the branch is not taken.

You're not misreading it! :-) The head is indeed increased, but only
the tail descriptor is validated in that function. Later in the
xskq_discard_desc function when the tail is moved, the next descriptor
is validated. So, the peek function will always return a validated
descriptor, but the validation can be done in either peek or discard.


Björn

^ permalink raw reply

* Re: [PATCH net-next v2 2/5] bpf: Add IPv6 Segment Routing helpers
From: kbuild test robot @ 2018-04-26  3:54 UTC (permalink / raw)
  To: Mathieu Xhonneux; +Cc: kbuild-all, netdev, dlebrun, alexei.starovoitov
In-Reply-To: <1d9d533b5d2640fe958d599ac0944132c3b7d61b.1524591163.git.m.xhonneux@gmail.com>

[-- Attachment #1: Type: text/plain, Size: 1680 bytes --]

Hi Mathieu,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on net-next/master]

url:    https://github.com/0day-ci/linux/commits/Mathieu-Xhonneux/ipv6-sr-introduce-seg6local-End-BPF-action/20180426-082209
config: microblaze-mmu_defconfig (attached as .config)
compiler: microblaze-linux-gcc (GCC) 7.2.0
reproduce:
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        make.cross ARCH=microblaze 

All errors (new ones prefixed by >>):

   In file included from net/core/filter.c:63:0:
   include/net/seg6.h: In function 'seg6_pernet':
>> include/net/seg6.h:52:14: error: 'struct net' has no member named 'ipv6'; did you mean 'ipv4'?
     return net->ipv6.seg6_data;
                 ^~~~
                 ipv4

vim +52 include/net/seg6.h

915d7e5e David Lebrun 2016-11-08  49  
915d7e5e David Lebrun 2016-11-08  50  static inline struct seg6_pernet_data *seg6_pernet(struct net *net)
915d7e5e David Lebrun 2016-11-08  51  {
915d7e5e David Lebrun 2016-11-08 @52  	return net->ipv6.seg6_data;
915d7e5e David Lebrun 2016-11-08  53  }
915d7e5e David Lebrun 2016-11-08  54  

:::::: The code at line 52 was first introduced by commit
:::::: 915d7e5e5930b4f01d0971d93b9b25ed17d221aa ipv6: sr: add code base for control plane support of SR-IPv6

:::::: TO: David Lebrun <david.lebrun@uclouvain.be>
:::::: CC: David S. Miller <davem@davemloft.net>

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 12282 bytes --]

^ permalink raw reply

* Re: [PATCH net-next] neighbour: support for NTF_EXT_LEARNED flag
From: Roopa Prabhu @ 2018-04-26  3:52 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Nikolay Aleksandrov, David Ahern
In-Reply-To: <20180425.132058.208675778907308410.davem@davemloft.net>

On Wed, Apr 25, 2018 at 10:20 AM, David Miller <davem@davemloft.net> wrote:
> From: Roopa Prabhu <roopa@cumulusnetworks.com>
> Date: Tue, 24 Apr 2018 13:49:34 -0700
>
>> From: Roopa Prabhu <roopa@cumulusnetworks.com>
>>
>> This patch extends NTF_EXT_LEARNED support to the neighbour system.
>> Example use-case: An Ethernet VPN implementation (eg in FRR routing suite)
>> can use this flag to add dynamic reachable external neigh entires
>> learned via control plane. The use of neigh NTF_EXT_LEARNED in this
>> patch is consistent with its use with bridge and vxlan fdb entries.
>>
>> Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
>
> No objection to the patch or the facility, so applied, thanks.

Thanks!

>
> What exactly is the name of this VPN technology in the FRR routing
> suite?

Its "Ethernet VPN" with BGP based control plane.

https://github.com/FRRouting/frr/wiki/Frr-3.0-%E2%86%92-4.0

reference RFC's:
https://tools.ietf.org/html/rfc7432  : BGP MPLS-Based Ethernet VPN
https://tools.ietf.org/html/draft-ietf-bess-evpn-overlay-07
(describes how rfc7432 can be used as an Network Virtualization
Overlay (NVO) solution: eg with vxlan).

I also talked about it in my netdev2.2 tutorial:
https://www.netdevconf.org/2.2/slides/prabhu-linuxbridge-tutorial.pdf
(slide 60)

Found this blog by Vincent which describes it well:
https://vincent.bernat.im/en/blog/2017-vxlan-bgp-evpn

For the context of this patch:
Neighbor reachability information is exchanged via BGP. Remote
neighbors learnt via
BGP are installed in the kernel with NTF_EXT_LEARNED to indicate that
they are external neighbor entries.
FRR BGP also installs vxlan and bridge remote fdb entries with the
same flag. Basically replaces flood and learn
with control plane learning via BGP. Remote neighbor entries are also
used for arp/nd proxy.

^ permalink raw reply

* Re: [PATCH] fault-injection: reorder config entries
From: Randy Dunlap @ 2018-04-26  3:21 UTC (permalink / raw)
  To: Mikulas Patocka, Michal Hocko
  Cc: Matthew Wilcox, David Miller, Andrew Morton, linux-mm,
	eric.dumazet, edumazet, netdev, linux-kernel, mst, jasowang,
	virtualization, dm-devel, Vlastimil Babka
In-Reply-To: <alpine.LRH.2.02.1804251601160.30569@file01.intranet.prod.int.rdu2.redhat.com>

On 04/25/2018 01:02 PM, Mikulas Patocka wrote:
> This patch reorders Kconfig entries, so that menuconfig displays proper 
> indentation.
> 
> Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>

Acked-by: Randy Dunlap <rdunlap@infradead.org>
Tested-by: Randy Dunlap <rdunlap@infradead.org>

Thanks.

> ---
>  lib/Kconfig.debug |   36 ++++++++++++++++++------------------
>  1 file changed, 18 insertions(+), 18 deletions(-)
> 
> Index: linux-2.6/lib/Kconfig.debug
> ===================================================================
> --- linux-2.6.orig/lib/Kconfig.debug	2018-04-16 21:08:36.000000000 +0200
> +++ linux-2.6/lib/Kconfig.debug	2018-04-25 15:56:16.000000000 +0200
> @@ -1503,6 +1503,10 @@ config NETDEV_NOTIFIER_ERROR_INJECT
>  
>  	  If unsure, say N.
>  
> +config FUNCTION_ERROR_INJECTION
> +	def_bool y
> +	depends on HAVE_FUNCTION_ERROR_INJECTION && KPROBES
> +
>  config FAULT_INJECTION
>  	bool "Fault-injection framework"
>  	depends on DEBUG_KERNEL
> @@ -1510,10 +1514,6 @@ config FAULT_INJECTION
>  	  Provide fault-injection framework.
>  	  For more details, see Documentation/fault-injection/.
>  
> -config FUNCTION_ERROR_INJECTION
> -	def_bool y
> -	depends on HAVE_FUNCTION_ERROR_INJECTION && KPROBES
> -
>  config FAILSLAB
>  	bool "Fault-injection capability for kmalloc"
>  	depends on FAULT_INJECTION
> @@ -1544,16 +1544,6 @@ config FAIL_IO_TIMEOUT
>  	  Only works with drivers that use the generic timeout handling,
>  	  for others it wont do anything.
>  
> -config FAIL_MMC_REQUEST
> -	bool "Fault-injection capability for MMC IO"
> -	depends on FAULT_INJECTION_DEBUG_FS && MMC
> -	help
> -	  Provide fault-injection capability for MMC IO.
> -	  This will make the mmc core return data errors. This is
> -	  useful to test the error handling in the mmc block device
> -	  and to test how the mmc host driver handles retries from
> -	  the block device.
> -
>  config FAIL_FUTEX
>  	bool "Fault-injection capability for futexes"
>  	select DEBUG_FS
> @@ -1561,6 +1551,12 @@ config FAIL_FUTEX
>  	help
>  	  Provide fault-injection capability for futexes.
>  
> +config FAULT_INJECTION_DEBUG_FS
> +	bool "Debugfs entries for fault-injection capabilities"
> +	depends on FAULT_INJECTION && SYSFS && DEBUG_FS
> +	help
> +	  Enable configuration of fault-injection capabilities via debugfs.
> +
>  config FAIL_FUNCTION
>  	bool "Fault-injection capability for functions"
>  	depends on FAULT_INJECTION_DEBUG_FS && FUNCTION_ERROR_INJECTION
> @@ -1571,11 +1567,15 @@ config FAIL_FUNCTION
>  	  an error value and have to handle it. This is useful to test the
>  	  error handling in various subsystems.
>  
> -config FAULT_INJECTION_DEBUG_FS
> -	bool "Debugfs entries for fault-injection capabilities"
> -	depends on FAULT_INJECTION && SYSFS && DEBUG_FS
> +config FAIL_MMC_REQUEST
> +	bool "Fault-injection capability for MMC IO"
> +	depends on FAULT_INJECTION_DEBUG_FS && MMC
>  	help
> -	  Enable configuration of fault-injection capabilities via debugfs.
> +	  Provide fault-injection capability for MMC IO.
> +	  This will make the mmc core return data errors. This is
> +	  useful to test the error handling in the mmc block device
> +	  and to test how the mmc host driver handles retries from
> +	  the block device.
>  
>  config FAULT_INJECTION_STACKTRACE_FILTER
>  	bool "stacktrace filter for fault-injection capabilities"
> 


-- 
~Randy

^ permalink raw reply

* [PATCH net-next] bridge: use hlist_entry_safe
From: YueHaibing @ 2018-04-26  3:07 UTC (permalink / raw)
  To: stephen, davem; +Cc: netdev, bridge, YueHaibing

Use hlist_entry_safe() instead of open-coding it.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
---
 net/bridge/br_forward.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index b4eed11..7a7fd67 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -274,8 +274,7 @@ void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
 		struct net_bridge_port *port, *lport, *rport;
 
 		lport = p ? p->port : NULL;
-		rport = rp ? hlist_entry(rp, struct net_bridge_port, rlist) :
-			     NULL;
+		rport = hlist_entry_safe(rp, struct net_bridge_port, rlist);
 
 		if ((unsigned long)lport > (unsigned long)rport) {
 			port = lport;
-- 
2.7.0

^ permalink raw reply related

* Re: pull-request: bpf 2018-04-25
From: David Miller @ 2018-04-26  3:05 UTC (permalink / raw)
  To: daniel; +Cc: ast, netdev
In-Reply-To: <20180425220450.17024-1-daniel@iogearbox.net>

From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 26 Apr 2018 00:04:50 +0200

> The following pull-request contains BPF updates for your *net* tree.
 ...
> Please consider pulling these changes from:
> 
>   git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git

Pulled, thanks Daniel.

> Would be great if you have a chance to merge net into net-next after
> that since sockmap fixes are needed in bpf-next later on to avoid
> ugly merge conflicts.

Done.

^ permalink raw reply

* Re: [net-next 00/10][pull request] 1GbE Intel Wired LAN Driver Updates 2018-04-25
From: David Miller @ 2018-04-26  3:03 UTC (permalink / raw)
  To: jeffrey.t.kirsher; +Cc: netdev, nhorman, sassmann, jogreene
In-Reply-To: <20180425182232.28935-1-jeffrey.t.kirsher@intel.com>

From: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Date: Wed, 25 Apr 2018 11:22:22 -0700

> This series enables some ethtool and tc-flower filters to be offloaded
> to igb-based network controllers. This is useful when the system
> configuration wants to steer kinds of traffic to a specific hardware
> queue for i210 devices only.
> 
> The first two patch in the series are bug fixes.
> 
> The basis of this series is to export the internal API used to
> configure address filters, so they can be used by ethtool, and
> extending the functionality so an source address can be handled.
> 
> Then, we enable the tc-flower offloading implementation to re-use the
> same infrastructure as ethtool, and storing them in the per-adapter
> "nfc" (Network Filter Config?) list. But for consistency, for
> destructive access they are separated, i.e. an filter added by
> tc-flower can only be removed by tc-flower, but ethtool can read them
> all.
> 
> Only support for VLAN Prio, Source and Destination MAC Address, and
> Ethertype is enabled for now.

Pulled, thanks Jeff.

^ permalink raw reply

* Re: [PATCH v7 net-next 4/4] netvsc: refactor notifier/event handling code to use the failover framework
From: Michael S. Tsirkin @ 2018-04-26  2:43 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: Siwei Liu, Jiri Pirko, Sridhar Samudrala, David Miller, Netdev,
	virtualization, virtio-dev, Brandeburg, Jesse, Alexander Duyck,
	Jakub Kicinski, Jason Wang
In-Reply-To: <20180425171831.785f412b@xeon-e3>

On Wed, Apr 25, 2018 at 05:18:31PM -0700, Stephen Hemminger wrote:
> On Wed, 25 Apr 2018 15:57:57 -0700
> Siwei Liu <loseweigh@gmail.com> wrote:
> 
> > >
> > > I think ideally the infrastructure should suppport flexible matching of
> > > NICs - netvsc is already reported to be moving to some kind of serial
> > > address.
> > >  
> > As Stephen said, Hyper-V supports the serial UUID thing from day-one.
> > It's just the Linux netvsc guest driver itself does not leverage that
> > ID from the very beginging.
> > 
> > Regards,
> > -Siwei
> 
> I am working on that.  The problem is that it requires some messy work
> to go from VF netdevice back to PCI device and from there to the PCI hyperv
> host infrastructure to find the serial number.
> 
> I was hoping that the serial number would also match the concept of PCI Express
> device serial number. But that is a completely different ID :-( 
> The PCI-E serial number is a hardware serial number more like MAC address.
> The Hyper-V serial number is more like PCI slot value.

Asuming you mean the Device Serial Number Capability,
I did consider this, and
we could use that, changing the UUID to one matching the
PV device, but I'm not sure no drivers will get confused
suddenly seeing the UUID of another company there.

If we are going the UUID route, a better idea might be to
specify the UUID of the PCI port into which the PT device is
being hotplugged.  This is PCI-Express specific, but presumably
legacy PCI/PCI-X devices aren't common enough to bother about
this for now.


-- 
MST

^ permalink raw reply

* Re: [PATCH net-next 07/14] bnxt_en: Do not allow VF to read EEPEOM.
From: Michael Chan @ 2018-04-26  2:40 UTC (permalink / raw)
  To: Andrew Lunn; +Cc: David Miller, Netdev
In-Reply-To: <20180426013850.GA1845@lunn.ch>

On Wed, Apr 25, 2018 at 6:38 PM, Andrew Lunn <andrew@lunn.ch> wrote:
> On Wed, Apr 25, 2018 at 08:40:50PM -0400, Michael Chan wrote:
>
> Hi Michael
>
> You have a typO in the Subject.
>

Thanks.  I will wait a day or so for any additional comments before
sending v2 to fix the typo.

^ permalink raw reply

* Re: [PATCH net-next v8 4/4] netvsc: refactor notifier/event handling code to use the failover framework
From: Michael S. Tsirkin @ 2018-04-26  2:30 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: Sridhar Samudrala, davem, netdev, virtualization, virtio-dev,
	jesse.brandeburg, alexander.h.duyck, kubakici, jasowang,
	loseweigh, jiri, aaron.f.brown
In-Reply-To: <20180425170837.6520a577@xeon-e3>

On Wed, Apr 25, 2018 at 05:08:37PM -0700, Stephen Hemminger wrote:
> On Wed, 25 Apr 2018 16:59:28 -0700
> Sridhar Samudrala <sridhar.samudrala@intel.com> wrote:
> 
> > Use the registration/notification framework supported by the generic
> > failover infrastructure.
> > 
> > Signed-off-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
> 
> NAK unless you prove this works on legacy distributions and with DPDK 18.05
> without modification.

It looks like it should work. What kind of proof are you looking for?

-- 
MST

^ permalink raw reply

* Re: [PATCH v7 net-next 4/4] netvsc: refactor notifier/event handling code to use the failover framework
From: Michael S. Tsirkin @ 2018-04-26  2:28 UTC (permalink / raw)
  To: Siwei Liu
  Cc: Stephen Hemminger, Jiri Pirko, Sridhar Samudrala, David Miller,
	Netdev, virtualization, virtio-dev, Brandeburg, Jesse,
	Alexander Duyck, Jakub Kicinski, Jason Wang
In-Reply-To: <CADGSJ20vck5V8JCoF0Tq9PWfBu7QYPDvg0yAZ_8Xkig7TKU7Lw@mail.gmail.com>

On Wed, Apr 25, 2018 at 03:57:57PM -0700, Siwei Liu wrote:
> On Wed, Apr 25, 2018 at 3:22 PM, Michael S. Tsirkin <mst@redhat.com> wrote:
> > On Wed, Apr 25, 2018 at 02:38:57PM -0700, Siwei Liu wrote:
> >> On Mon, Apr 23, 2018 at 1:06 PM, Michael S. Tsirkin <mst@redhat.com> wrote:
> >> > On Mon, Apr 23, 2018 at 12:44:39PM -0700, Siwei Liu wrote:
> >> >> On Mon, Apr 23, 2018 at 10:56 AM, Michael S. Tsirkin <mst@redhat.com> wrote:
> >> >> > On Mon, Apr 23, 2018 at 10:44:40AM -0700, Stephen Hemminger wrote:
> >> >> >> On Mon, 23 Apr 2018 20:24:56 +0300
> >> >> >> "Michael S. Tsirkin" <mst@redhat.com> wrote:
> >> >> >>
> >> >> >> > On Mon, Apr 23, 2018 at 10:04:06AM -0700, Stephen Hemminger wrote:
> >> >> >> > > > >
> >> >> >> > > > >I will NAK patches to change to common code for netvsc especially the
> >> >> >> > > > >three device model.  MS worked hard with distro vendors to support transparent
> >> >> >> > > > >mode, ans we really can't have a new model; or do backport.
> >> >> >> > > > >
> >> >> >> > > > >Plus, DPDK is now dependent on existing model.
> >> >> >> > > >
> >> >> >> > > > Sorry, but nobody here cares about dpdk or other similar oddities.
> >> >> >> > >
> >> >> >> > > The network device model is a userspace API, and DPDK is a userspace application.
> >> >> >> >
> >> >> >> > It is userspace but are you sure dpdk is actually poking at netdevs?
> >> >> >> > AFAIK it's normally banging device registers directly.
> >> >> >> >
> >> >> >> > > You can't go breaking userspace even if you don't like the application.
> >> >> >> >
> >> >> >> > Could you please explain how is the proposed patchset breaking
> >> >> >> > userspace? Ignoring DPDK for now, I don't think it changes the userspace
> >> >> >> > API at all.
> >> >> >> >
> >> >> >>
> >> >> >> The DPDK has a device driver vdev_netvsc which scans the Linux network devices
> >> >> >> to look for Linux netvsc device and the paired VF device and setup the
> >> >> >> DPDK environment.  This setup creates a DPDK failsafe (bondingish) instance
> >> >> >> and sets up TAP support over the Linux netvsc device as well as the Mellanox
> >> >> >> VF device.
> >> >> >>
> >> >> >> So it depends on existing 2 device model. You can't go to a 3 device model
> >> >> >> or start hiding devices from userspace.
> >> >> >
> >> >> > Okay so how does the existing patch break that? IIUC does not go to
> >> >> > a 3 device model since netvsc calls failover_register directly.
> >> >> >
> >> >> >> Also, I am working on associating netvsc and VF device based on serial number
> >> >> >> rather than MAC address. The serial number is how Windows works now, and it makes
> >> >> >> sense for Linux and Windows to use the same mechanism if possible.
> >> >> >
> >> >> > Maybe we should support same for virtio ...
> >> >> > Which serial do you mean? From vpd?
> >> >> >
> >> >> > I guess you will want to keep supporting MAC for old hypervisors?
> >> >> >
> >> >> > It all seems like a reasonable thing to support in the generic core.
> >> >>
> >> >> That's the reason why I chose explicit identifier rather than rely on
> >> >> MAC address to bind/pair a device. MAC address can change. Even if it
> >> >> can't, malicious guest user can fake MAC address to skip binding.
> >> >>
> >> >> -Siwei
> >> >
> >> > Address should be sampled at device creation to prevent this
> >> > kind of hack. Not that it buys the malicious user much:
> >> > if you can poke at MAC addresses you probably already can
> >> > break networking.
> >>
> >> I don't understand why poking at MAC address may potentially break
> >> networking.
> >
> > Set a MAC address to match another device on the same LAN,
> > packets will stop reaching that MAC.
> 
> What I meant was guest users may create a virtual link, say veth that
> has exactly the same MAC address as that for the VF, which can easily
> get around of the binding procedure.

This patchset limits binding to PCI devices so it won't be affected
by any hacks around virtual devices.

> There's no explicit flag to
> identify a VF or pass-through device AFAIK. And sometimes this happens
> maybe due to user misconfiguring the link. This process should be
> hardened to avoid from any potential configuration errors.

They are still PCI devices though.

> >
> >> Unlike VF, passthrough PCI endpoint device has its freedom
> >> to change the MAC address. Even on a VF setup it's not neccessarily
> >> always safe to assume the VF's MAC address cannot or shouldn't be
> >> changed. That depends on the specific need whether the host admin
> >> wants to restrict guest from changing the MAC address, although in
> >> most cases it's true.
> >>
> >> I understand we can use the perm_addr to distinguish. But as said,
> >> this will pose limitation of flexible configuration where one can
> >> assign VFs with identical MAC address at all while each VF belongs to
> >> different PF and/or different subnet for e.g. load balancing.
> >> And
> >> furthermore, the QEMU device model never uses MAC address to be
> >> interpreted as an identifier, which requires to be unique per VM
> >> instance. Why we're introducing this inconsistency?
> >>
> >> -Siwei
> >
> > Because it addresses most of the issues and is simple.  That's already
> > much better than what we have now which is nothing unless guest
> > configures things manually.
> 
> Did you see my QEMU patch for using BDF as the grouping identifier?

Yes. And I don't think it can work because bus numbers are
guest specified.

> And there can be others like what you suggested, but the point is that
> it's requried to support explicit grouping mechanism from day one,
> before the backup property cast into stones.

Let's start with addressing simple configs with just two NICs.

Down the road I can see possible extensions that can work: for example,
require that devices are on the same pci bridge. Or we could even make
the virtio device actually include a pci bridge (as part of same
or a child function), the PT would have to be
behind it.

As long as we are not breaking anything, adding more flags to fix
non-working configurations is always fair game.

> This is orthogonal to
> device model being proposed, be it 1-netdev or not. Delaying it would
> just mean support and compatibility burden, appearing more like a
> design flaw rather than a feature to add later on.

Well it's mostly myself who gets to support it, and I see the device
model as much more fundamental as userspace will come to depend
on it. So I'm not too worried, let's take this one step at a time.

> >
> > I think ideally the infrastructure should suppport flexible matching of
> > NICs - netvsc is already reported to be moving to some kind of serial
> > address.
> >
> As Stephen said, Hyper-V supports the serial UUID thing from day-one.
> It's just the Linux netvsc guest driver itself does not leverage that
> ID from the very beginging.
> 
> Regards,
> -Siwei

We could add something like this, too. For example,
we could add a virtual VPD capability with a UUID.

Do you know how exactly does hyperv pass the UUID for NICs?

> >
> >> >
> >> >
> >> >
> >> >
> >> >>
> >> >> >
> >> >> > --
> >> >> > MST

^ permalink raw reply

* Re: [PATCH bpf-next] bpf: Allow bpf_jit_enable = 2 with BPF_JIT_ALWAYS_ON config
From: Leo Yan @ 2018-04-26  2:28 UTC (permalink / raw)
  To: Daniel Borkmann
  Cc: Alexei Starovoitov, David S. Miller, Alexei Starovoitov,
	Kirill Tkhai, netdev, linux-kernel
In-Reply-To: <77bce91b-8d92-3be2-e4d1-abbb7f4cc2dd@iogearbox.net>

On Wed, Apr 25, 2018 at 05:37:39PM +0200, Daniel Borkmann wrote:
> On 04/25/2018 04:14 PM, Alexei Starovoitov wrote:
> > On Wed, Apr 25, 2018 at 05:25:47PM +0800, Leo Yan wrote:
> >>
> >> If we have concern for security issue, should we remove support for
> >> 'bpf_jit_enable = 2' and modify the doc to reflect this change?
> > 
> > I suggest to fix the doc.
> 
> Agree, lets do that instead. Leo, could you cook a patch for that?

Sure, have sent new patch for this.

Thanks for suggestion!

> Thanks,
> Daniel

^ permalink raw reply

* [PATCH bpf-next] bpf, doc: Update bpf_jit_enable limitation for CONFIG_BPF_JIT_ALWAYS_ON
From: Leo Yan @ 2018-04-26  2:26 UTC (permalink / raw)
  To: Alexei Starovoitov, Daniel Borkmann, David S. Miller,
	Jonathan Corbet, netdev, linux-kernel, linux-doc
  Cc: Leo Yan

When CONFIG_BPF_JIT_ALWAYS_ON is enabled, kernel has limitation for
bpf_jit_enable, so it has fixed value 1 and we cannot set it to 2
for JIT opcode dumping; this patch is to update the doc for it.

Signed-off-by: Leo Yan <leo.yan@linaro.org>
---
 Documentation/networking/filter.txt | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt
index fd55c7d..feddab9 100644
--- a/Documentation/networking/filter.txt
+++ b/Documentation/networking/filter.txt
@@ -483,6 +483,12 @@ Example output from dmesg:
 [ 3389.935851] JIT code: 00000030: 00 e8 28 94 ff e0 83 f8 01 75 07 b8 ff ff 00 00
 [ 3389.935852] JIT code: 00000040: eb 02 31 c0 c9 c3
 
+When CONFIG_BPF_JIT_ALWAYS_ON is enabled, bpf_jit_enable is set to 1 by default
+and it returns failure if change to any other value from proc node; this is
+for security consideration to avoid leaking info to unprivileged users. In this
+case, we can't directly dump JIT opcode image from kernel log, alternatively we
+need to use bpf tool for the dumping.
+
 In the kernel source tree under tools/bpf/, there's bpf_jit_disasm for
 generating disassembly out of the kernel log's hexdump:
 
-- 
1.9.1

^ permalink raw reply related

* Re: [PATCH 06/40] proc: introduce proc_create_single{,_data}
From: Finn Thain @ 2018-04-26  1:45 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Andrew Morton, Alexander Viro, Alexey Dobriyan,
	Greg Kroah-Hartman, Jiri Slaby, Alessandro Zummo,
	Alexandre Belloni, linux-acpi, drbd-dev, linux-ide, netdev,
	linux-rtc, megaraidlinux.pdl, linux-scsi, devel, linux-afs,
	linux-ext4, jfs-discussion, netfilter-devel, linux-kernel
In-Reply-To: <20180425154827.32251-7-hch@lst.de>

On Wed, 25 Apr 2018, Christoph Hellwig wrote:

>  
> -/*
> - * /proc/nubus stuff
> - */
> -

I don't think that the introduction of proc_create_single{,_data} alters 
the value of that comment. That comment and similar comments in the same 
file do have a purpose, which is to keep separate the /proc/nubus 
implementation is kept separate from the /proc/bus/nubus/devices 
implementation and so on.

-- 

^ permalink raw reply

* Re: [PATCH v2] bpf, x86_32: add eBPF JIT compiler for ia32
From: Wang YanQing @ 2018-04-26  1:43 UTC (permalink / raw)
  To: Daniel Borkmann
  Cc: ast, illusionist.neo, tglx, mingo, hpa, davem, x86, netdev,
	linux-kernel
In-Reply-To: <20e1eabd-e821-8240-cb4c-6da253c49585@iogearbox.net>

On Wed, Apr 25, 2018 at 02:11:16AM +0200, Daniel Borkmann wrote:
> On 04/19/2018 05:54 PM, Wang YanQing wrote:
> > Testing results on i5-5200U:
> > 
> > 1) test_bpf: Summary: 349 PASSED, 0 FAILED, [319/341 JIT'ed]
> > 2) test_progs: Summary: 81 PASSED, 2 FAILED.
> >    test_progs report "libbpf: incorrect bpf_call opcode" for
> >    test_l4lb_noinline and test_xdp_noinline, because there is
> >    no llvm-6.0 on my machine, and current implementation doesn't
> >    support BPF_PSEUDO_CALL, so I think we can ignore the two failed
> >    testcases.
> > 3) test_lpm: OK
> > 4) test_lru_map: OK
> > 5) test_verifier: Summary: 823 PASSED, 5 FAILED
> >    test_verifier report "invalid bpf_context access off=68 size=1/2/4/8"
> >    for all the 5 FAILED testcases with/without jit, we need to fix the
> >    failed testcases themself instead of this jit.
> 
> Can you elaborate further on these? Looks like this definitely needs
> fixing on 32 bit. Would be great to get a better understanding of the
> underlying bug(s) and properly fix them.
> 
Hi Daniel Borkmann, here is the detailed log for failed testcases.

linux: Gentoo 32 bit
llvm:
    ~ # llc  --version
    LLVM (http://llvm.org/):
    LLVM version 4.0.1
    Optimized build.
    Default target: i686-pc-linux-gnu
    Host CPU: broadwell
    
    Registered Targets:
    amdgcn  - AMD GCN GPUs
    bpf     - BPF (host endian)
    bpfeb   - BPF (big endian)
    bpfel   - BPF (little endian)
    nvptx   - NVIDIA PTX 32-bit
    nvptx64 - NVIDIA PTX 64-bit
    r600    - AMD GPUs HD2XXX-HD6XXX
    x86     - 32-bit X86: Pentium-Pro and above
    x86-64  - 64-bit X86: EM64T and AMD64
    
    ~ # clang --version
    clang version 4.0.1 (tags/RELEASE_401/final)
    Target: i686-pc-linux-gnu
    Thread model: posix
    InstalledDir: /usr/lib/llvm/4/bin

kernel version:4.16.2
test program:test_verifier in kselftest
condition:bpf_jit_enable=0,bpf_jit_harden=0

log:
#172/p unpriv: spill/fill of different pointers ldx FAIL
Unexpected error message!
0: (bf) r6 = r10
1: (07) r6 += -8
2: (15) if r1 == 0x0 goto pc+3
R1=ctx(id=0,off=0,imm=0) R6=fp-8,call_-1 R10=fp0,call_-1
3: (bf) r2 = r10
4: (07) r2 += -76
5: (7b) *(u64 *)(r6 +0) = r2
6: (55) if r1 != 0x0 goto pc+1
R1=ctx(id=0,off=0,imm=0) R2=fp-76,call_-1 R6=fp-8,call_-1 R10=fp0,call_-1 fp-8=fp
7: (7b) *(u64 *)(r6 +0) = r1
8: (79) r1 = *(u64 *)(r6 +0)
9: (79) r1 = *(u64 *)(r1 +68)
invalid bpf_context access off=68 size=8

#378/p check bpf_perf_event_data->sample_period byte load permitted FAIL
Failed to load prog 'Permission denied'!
0: (b7) r0 = 0
1: (71) r0 = *(u8 *)(r1 +68)
invalid bpf_context access off=68 size=1
#379/p check bpf_perf_event_data->sample_period half load permitted FAIL
Failed to load prog 'Permission denied'!
0: (b7) r0 = 0
1: (69) r0 = *(u16 *)(r1 +68)
invalid bpf_context access off=68 size=2
#380/p check bpf_perf_event_data->sample_period word load permitted FAIL
Failed to load prog 'Permission denied'!
0: (b7) r0 = 0
1: (61) r0 = *(u32 *)(r1 +68)
invalid bpf_context access off=68 size=4
#381/p check bpf_perf_event_data->sample_period dword load permitted FAIL
Failed to load prog 'Permission denied'!
0: (b7) r0 = 0
1: (79) r0 = *(u64 *)(r1 +68)
invalid bpf_context access off=68 size=8


test program:test_progs
condition:bpf_jit_enable=0,bpf_jit_harden=0

bpf # ./test_progs
test_pkt_access:PASS:ipv4 53 nsec
test_pkt_access:PASS:ipv6 47 nsec
test_xdp:PASS:ipv4 1281 nsec
test_xdp:PASS:ipv6 749 nsec
test_l4lb:PASS:ipv4 427 nsec
test_l4lb:PASS:ipv6 562 nsec
libbpf: incorrect bpf_call opcode <= caused by ./test_l4lb_noinline.o in function test_l4lb_all 
libbpf: incorrect bpf_call opcode <= caused by ././test_xdp_noinline.o in function test_xdp_noinline


Thanks.

^ permalink raw reply

* Re: [PATCH net-next 07/14] bnxt_en: Do not allow VF to read EEPEOM.
From: Andrew Lunn @ 2018-04-26  1:38 UTC (permalink / raw)
  To: Michael Chan; +Cc: davem, netdev
In-Reply-To: <1524703257-12812-8-git-send-email-michael.chan@broadcom.com>

On Wed, Apr 25, 2018 at 08:40:50PM -0400, Michael Chan wrote:

Hi Michael

You have a typO in the Subject.

    Andrew

^ permalink raw reply

* Re: [PATCH v2 net-next 0/2] tcp: mmap: rework zerocopy receive
From: Soheil Hassas Yeganeh @ 2018-04-26  1:20 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David S . Miller, netdev, Andy Lutomirski, linux-kernel, linux-mm,
	Eric Dumazet
In-Reply-To: <20180425214307.159264-1-edumazet@google.com>

On Wed, Apr 25, 2018 at 5:43 PM, Eric Dumazet <edumazet@google.com> wrote:
> syzbot reported a lockdep issue caused by tcp mmap() support.
>
> I implemented Andy Lutomirski nice suggestions to resolve the
> issue and increase scalability as well.
>
> First patch is adding a new setsockopt() operation and changes mmap()
> behavior.
>
> Second patch changes tcp_mmap reference program.
>
> v2:
>  Added a missing page align of zc->length in tcp_zerocopy_receive()
>  Properly clear zc->recv_skip_hint in case user request was completed.

Acked-by: Soheil Hassas Yeganeh <soheil@google.com>

Thank you Eric for the nice redesign!

> Eric Dumazet (2):
>   tcp: add TCP_ZEROCOPY_RECEIVE support for zerocopy receive
>   selftests: net: tcp_mmap must use TCP_ZEROCOPY_RECEIVE
>
>  include/uapi/linux/tcp.h               |   8 ++
>  net/ipv4/tcp.c                         | 189 +++++++++++++------------
>  tools/testing/selftests/net/tcp_mmap.c |  63 +++++----
>  3 files changed, 142 insertions(+), 118 deletions(-)
>
> --
> 2.17.0.441.gb46fe60e1d-goog
>

^ permalink raw reply

* linux-next: manual merge of the bpf-next tree with the bpf tree
From: Stephen Rothwell @ 2018-04-26  0:53 UTC (permalink / raw)
  To: Daniel Borkmann, Alexei Starovoitov, Networking
  Cc: Linux-Next Mailing List, Linux Kernel Mailing List,
	John Fastabend

[-- Attachment #1: Type: text/plain, Size: 740 bytes --]

Hi all,

Today's linux-next merge of the bpf-next tree got a conflict in:

  samples/sockmap/Makefile

between commit:

  4dfe1bb95235 ("bpf: sockmap sample use clang flag, -target bpf")

from the bpf tree and commit:

  2e04eb1dd1ca ("bpf: sockmap, remove samples program")

from the bpf-next tree.

I fixed it up (I just removed the file) and can carry the fix as
necessary. This is now fixed as far as linux-next is concerned, but any
non trivial conflicts should be mentioned to your upstream maintainer
when your tree is submitted for merging.  You may also want to consider
cooperating with the maintainer of the conflicting tree to minimise any
particularly complex conflicts.

-- 
Cheers,
Stephen Rothwell

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply

* linux-next: manual merge of the bpf-next tree with the net-next tree
From: Stephen Rothwell @ 2018-04-26  0:49 UTC (permalink / raw)
  To: Daniel Borkmann, Alexei Starovoitov, Networking, David Miller
  Cc: Linux-Next Mailing List, Linux Kernel Mailing List, Anders Roxell

[-- Attachment #1: Type: text/plain, Size: 1120 bytes --]

Hi all,

Today's linux-next merge of the bpf-next tree got a conflict in:

  tools/testing/selftests/bpf/.gitignore

between commit:

  0abf854d7cbb ("selftests: bpf: update .gitignore with missing generated files")

from the net-next tree and commit:

  b6fd9cf796e6 ("selftests: bpf: update .gitignore with missing file")

from the bpf-next tree.

I fixed it up (see below) and can carry the fix as necessary. This
is now fixed as far as linux-next is concerned, but any non trivial
conflicts should be mentioned to your upstream maintainer when your tree
is submitted for merging.  You may also want to consider cooperating
with the maintainer of the conflicting tree to minimise any particularly
complex conflicts.

-- 
Cheers,
Stephen Rothwell

diff --cc tools/testing/selftests/bpf/.gitignore
index 5e1ab2f0eb79,da19f0562bf8..000000000000
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@@ -12,6 -12,4 +12,7 @@@ test_tcpbpf_use
  test_verifier_log
  feature
  test_libbpf_open
+ test_btf
 +test_sock
 +test_sock_addr
 +urandom_read

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply

* Re: [PATCH stable v4.4+] r8152: add Linksys USB3GIGV1 id
From: Grant Grundler @ 2018-04-26  0:41 UTC (permalink / raw)
  To: Krzysztof Kozlowski
  Cc: Oliver Neukum, David S. Miller, linux-usb, netdev, LKML,
	Grant Grundler
In-Reply-To: <1524650077-13270-1-git-send-email-krzk@kernel.org>

On Wed, Apr 25, 2018 at 2:54 AM, Krzysztof Kozlowski <krzk@kernel.org> wrote:
> commit 90841047a01b452cc8c3f9b990698b264143334a upstream
>
> This linksys dongle by default comes up in cdc_ether mode.
> This patch allows r8152 to claim the device:
>    Bus 002 Device 002: ID 13b1:0041 Linksys
>
> Signed-off-by: Grant Grundler <grundler@chromium.org>
> Reviewed-by: Douglas Anderson <dianders@chromium.org>
> Signed-off-by: David S. Miller <davem@davemloft.net>
> [krzk: Rebase on v4.4]
> Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>

thanks krzk!

FTR, to support RTL8153B (HW ID 0x6010), the follow patch series to
bring r8152 v1.09.9 driver from 4.14 kernel.org to 3 (of 5) older
Chrome OS kernels:

3.14: https://chromium-review.googlesource.com/q/topic:%22update_r8152-3.14%22+(status:open%20OR%20status:merged)
3.18: https://chromium-review.googlesource.com/q/topic:%2522update-r8152-3.18%2522+(status:open+OR+status:merged)
4.4: https://chromium-review.googlesource.com/q/topic:%2522update_r8152-4.4%2522+(status:open+OR+status:merged)

caveat: These series are not suitable directly for kernel.org
submission (extraneous stuff in the commit messages, order is
different). Using the original SHA1 (in each commit message), this can
all be fixed up by hand/simple scripts.

cheers,
grant


> ---
>  drivers/net/usb/cdc_ether.c | 10 ++++++++++
>  drivers/net/usb/r8152.c     |  2 ++
>  2 files changed, 12 insertions(+)
>
> diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c
> index 6578127db847..f71abe50ea6f 100644
> --- a/drivers/net/usb/cdc_ether.c
> +++ b/drivers/net/usb/cdc_ether.c
> @@ -461,6 +461,7 @@ static const struct driver_info wwan_info = {
>  #define REALTEK_VENDOR_ID      0x0bda
>  #define SAMSUNG_VENDOR_ID      0x04e8
>  #define LENOVO_VENDOR_ID       0x17ef
> +#define LINKSYS_VENDOR_ID      0x13b1
>  #define NVIDIA_VENDOR_ID       0x0955
>  #define HP_VENDOR_ID           0x03f0
>
> @@ -650,6 +651,15 @@ static const struct usb_device_id  products[] = {
>         .driver_info = 0,
>  },
>
> +#if IS_ENABLED(CONFIG_USB_RTL8152)
> +/* Linksys USB3GIGV1 Ethernet Adapter */
> +{
> +       USB_DEVICE_AND_INTERFACE_INFO(LINKSYS_VENDOR_ID, 0x0041, USB_CLASS_COMM,
> +                       USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE),
> +       .driver_info = 0,
> +},
> +#endif
> +
>  /* Lenovo Thinkpad USB 3.0 Ethernet Adapters (based on Realtek RTL8153) */
>  {
>         USB_DEVICE_AND_INTERFACE_INFO(LENOVO_VENDOR_ID, 0x7205, USB_CLASS_COMM,
> diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
> index 89950f5cea71..b2c1a435357f 100644
> --- a/drivers/net/usb/r8152.c
> +++ b/drivers/net/usb/r8152.c
> @@ -506,6 +506,7 @@ enum rtl8152_flags {
>  #define VENDOR_ID_REALTEK              0x0bda
>  #define VENDOR_ID_SAMSUNG              0x04e8
>  #define VENDOR_ID_LENOVO               0x17ef
> +#define VENDOR_ID_LINKSYS              0x13b1
>  #define VENDOR_ID_NVIDIA               0x0955
>
>  #define MCU_TYPE_PLA                   0x0100
> @@ -4376,6 +4377,7 @@ static struct usb_device_id rtl8152_table[] = {
>         {REALTEK_USB_DEVICE(VENDOR_ID_SAMSUNG, 0xa101)},
>         {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO,  0x7205)},
>         {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO,  0x304f)},
> +       {REALTEK_USB_DEVICE(VENDOR_ID_LINKSYS, 0x0041)},
>         {REALTEK_USB_DEVICE(VENDOR_ID_NVIDIA,  0x09ff)},
>         {}
>  };
> --
> 2.7.4
>

^ permalink raw reply

* [PATCH net-next 14/14] bnxt_en: Reserve rings at driver open if none was reserved at probe time.
From: Michael Chan @ 2018-04-26  0:40 UTC (permalink / raw)
  To: davem; +Cc: netdev
In-Reply-To: <1524703257-12812-1-git-send-email-michael.chan@broadcom.com>

Add logic to reserve default rings at driver open time if none was
reserved during probe time.  This will happen when the PF driver did
not provision minimum rings to the VF, due to more limited resources.

Driver open will only succeed if some minimum rings can be reserved.

Signed-off-by: Michael Chan <michael.chan@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index fee1c0d..efe5c72 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -6844,6 +6844,8 @@ static void bnxt_preset_reg_win(struct bnxt *bp)
 	}
 }
 
+static int bnxt_init_dflt_ring_mode(struct bnxt *bp);
+
 static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init)
 {
 	int rc = 0;
@@ -6851,6 +6853,12 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init)
 	bnxt_preset_reg_win(bp);
 	netif_carrier_off(bp->dev);
 	if (irq_re_init) {
+		/* Reserve rings now if none were reserved at driver probe. */
+		rc = bnxt_init_dflt_ring_mode(bp);
+		if (rc) {
+			netdev_err(bp->dev, "Failed to reserve default rings at open\n");
+			return rc;
+		}
 		rc = bnxt_reserve_rings(bp);
 		if (rc)
 			return rc;
@@ -8600,6 +8608,29 @@ static int bnxt_set_dflt_rings(struct bnxt *bp, bool sh)
 	return rc;
 }
 
+static int bnxt_init_dflt_ring_mode(struct bnxt *bp)
+{
+	int rc;
+
+	if (bp->tx_nr_rings)
+		return 0;
+
+	rc = bnxt_set_dflt_rings(bp, true);
+	if (rc) {
+		netdev_err(bp->dev, "Not enough rings available.\n");
+		return rc;
+	}
+	rc = bnxt_init_int_mode(bp);
+	if (rc)
+		return rc;
+	bp->tx_nr_rings_per_tc = bp->tx_nr_rings;
+	if (bnxt_rfs_supported(bp) && bnxt_rfs_capable(bp)) {
+		bp->flags |= BNXT_FLAG_RFS;
+		bp->dev->features |= NETIF_F_NTUPLE;
+	}
+	return 0;
+}
+
 int bnxt_restore_pf_fw_resources(struct bnxt *bp)
 {
 	int rc;
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH net-next 13/14] bnxt_en: Reserve RSS and L2 contexts for VF.
From: Michael Chan @ 2018-04-26  0:40 UTC (permalink / raw)
  To: davem; +Cc: netdev
In-Reply-To: <1524703257-12812-1-git-send-email-michael.chan@broadcom.com>

For completeness and correctness, the VF driver needs to reserve these
RSS and L2 contexts.

Signed-off-by: Michael Chan <michael.chan@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c       |  4 ++++
 drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c | 10 +++++-----
 drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h |  5 +++++
 3 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 0884e49..fee1c0d 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -4713,6 +4713,10 @@ int __bnxt_hwrm_get_tx_rings(struct bnxt *bp, u16 fid, int *tx_rings)
 
 	__bnxt_hwrm_reserve_vf_rings(bp, &req, tx_rings, rx_rings, ring_grps,
 				     cp_rings, vnics);
+	req.enables |= cpu_to_le32(FUNC_VF_CFG_REQ_ENABLES_NUM_RSSCOS_CTXS |
+				   FUNC_VF_CFG_REQ_ENABLES_NUM_L2_CTXS);
+	req.num_rsscos_ctxs = cpu_to_le16(BNXT_VF_MAX_RSS_CTX);
+	req.num_l2_ctxs = cpu_to_le16(BNXT_VF_MAX_L2_CTX);
 	rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
 	if (rc)
 		return -ENOMEM;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
index 18ee471..cc21d87 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
@@ -462,13 +462,13 @@ static int bnxt_hwrm_func_vf_resc_cfg(struct bnxt *bp, int num_vfs)
 	vf_vnics = hw_resc->max_vnics - bp->nr_vnics;
 	vf_vnics = min_t(u16, vf_vnics, vf_rx_rings);
 
-	req.min_rsscos_ctx = cpu_to_le16(1);
-	req.max_rsscos_ctx = cpu_to_le16(1);
+	req.min_rsscos_ctx = cpu_to_le16(BNXT_VF_MIN_RSS_CTX);
+	req.max_rsscos_ctx = cpu_to_le16(BNXT_VF_MAX_RSS_CTX);
 	if (pf->vf_resv_strategy == BNXT_VF_RESV_STRATEGY_MINIMAL) {
 		req.min_cmpl_rings = cpu_to_le16(1);
 		req.min_tx_rings = cpu_to_le16(1);
 		req.min_rx_rings = cpu_to_le16(1);
-		req.min_l2_ctxs = cpu_to_le16(1);
+		req.min_l2_ctxs = cpu_to_le16(BNXT_VF_MIN_L2_CTX);
 		req.min_vnics = cpu_to_le16(1);
 		req.min_stat_ctx = cpu_to_le16(1);
 		req.min_hw_ring_grps = cpu_to_le16(1);
@@ -483,7 +483,7 @@ static int bnxt_hwrm_func_vf_resc_cfg(struct bnxt *bp, int num_vfs)
 		req.min_cmpl_rings = cpu_to_le16(vf_cp_rings);
 		req.min_tx_rings = cpu_to_le16(vf_tx_rings);
 		req.min_rx_rings = cpu_to_le16(vf_rx_rings);
-		req.min_l2_ctxs = cpu_to_le16(4);
+		req.min_l2_ctxs = cpu_to_le16(BNXT_VF_MAX_L2_CTX);
 		req.min_vnics = cpu_to_le16(vf_vnics);
 		req.min_stat_ctx = cpu_to_le16(vf_stat_ctx);
 		req.min_hw_ring_grps = cpu_to_le16(vf_ring_grps);
@@ -491,7 +491,7 @@ static int bnxt_hwrm_func_vf_resc_cfg(struct bnxt *bp, int num_vfs)
 	req.max_cmpl_rings = cpu_to_le16(vf_cp_rings);
 	req.max_tx_rings = cpu_to_le16(vf_tx_rings);
 	req.max_rx_rings = cpu_to_le16(vf_rx_rings);
-	req.max_l2_ctxs = cpu_to_le16(4);
+	req.max_l2_ctxs = cpu_to_le16(BNXT_VF_MAX_L2_CTX);
 	req.max_vnics = cpu_to_le16(vf_vnics);
 	req.max_stat_ctx = cpu_to_le16(vf_stat_ctx);
 	req.max_hw_ring_grps = cpu_to_le16(vf_ring_grps);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h
index 6f6d850..e9b20cd 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h
@@ -23,6 +23,11 @@
 	((offsetof(struct hwrm_reject_fwd_resp_input, encap_request) + n) >\
 	 offsetof(struct hwrm_reject_fwd_resp_input, encap_resp_target_id))
 
+#define BNXT_VF_MIN_RSS_CTX	1
+#define BNXT_VF_MAX_RSS_CTX	1
+#define BNXT_VF_MIN_L2_CTX	1
+#define BNXT_VF_MAX_L2_CTX	4
+
 int bnxt_get_vf_config(struct net_device *, int, struct ifla_vf_info *);
 int bnxt_set_vf_mac(struct net_device *, int, u8 *);
 int bnxt_set_vf_vlan(struct net_device *, int, u16, u8, __be16);
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH net-next 12/14] bnxt_en: Don't reserve rings on VF when min rings were not provisioned by PF.
From: Michael Chan @ 2018-04-26  0:40 UTC (permalink / raw)
  To: davem; +Cc: netdev
In-Reply-To: <1524703257-12812-1-git-send-email-michael.chan@broadcom.com>

When rings are more limited and the PF has not provisioned minimum
guaranteed rings to the VF, do not reserve rings during driver probe.
Wait till device open before reserving rings when they will be used.
Device open will succeed if some minimum rings can be successfully
reserved and allocated.

Signed-off-by: Michael Chan <michael.chan@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 27 ++++++++++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index a45e692..0884e49 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -5952,6 +5952,9 @@ static int bnxt_init_msix(struct bnxt *bp)
 	if (total_vecs > max)
 		total_vecs = max;
 
+	if (!total_vecs)
+		return 0;
+
 	msix_ent = kcalloc(total_vecs, sizeof(struct msix_entry), GFP_KERNEL);
 	if (!msix_ent)
 		return -ENOMEM;
@@ -7276,6 +7279,25 @@ static int bnxt_cfg_rx_mode(struct bnxt *bp)
 	return rc;
 }
 
+static bool bnxt_can_reserve_rings(struct bnxt *bp)
+{
+#ifdef CONFIG_BNXT_SRIOV
+	if ((bp->flags & BNXT_FLAG_NEW_RM) && BNXT_VF(bp)) {
+		struct bnxt_hw_resc *hw_resc = &bp->hw_resc;
+
+		/* No minimum rings were provisioned by the PF.  Don't
+		 * reserve rings by default when device is down.
+		 */
+		if (hw_resc->min_tx_rings || hw_resc->resv_tx_rings)
+			return true;
+
+		if (!netif_running(bp->dev))
+			return false;
+	}
+#endif
+	return true;
+}
+
 /* If the chip and firmware supports RFS */
 static bool bnxt_rfs_supported(struct bnxt *bp)
 {
@@ -7292,7 +7314,7 @@ static bool bnxt_rfs_capable(struct bnxt *bp)
 #ifdef CONFIG_RFS_ACCEL
 	int vnics, max_vnics, max_rss_ctxs;
 
-	if (!(bp->flags & BNXT_FLAG_MSIX_CAP))
+	if (!(bp->flags & BNXT_FLAG_MSIX_CAP) || !bnxt_can_reserve_rings(bp))
 		return false;
 
 	vnics = 1 + bp->rx_nr_rings;
@@ -8526,6 +8548,9 @@ static int bnxt_set_dflt_rings(struct bnxt *bp, bool sh)
 {
 	int dflt_rings, max_rx_rings, max_tx_rings, rc;
 
+	if (!bnxt_can_reserve_rings(bp))
+		return 0;
+
 	if (sh)
 		bp->flags |= BNXT_FLAG_SHARED_RINGS;
 	dflt_rings = netif_get_num_default_rss_queues();
-- 
1.8.3.1

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox