Netdev List
 help / color / mirror / Atom feed
* Re: [PATCH bpf-next v3 7/7] selftests/bpf: support BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP
From: Eric Dumazet @ 2019-08-27 18:04 UTC (permalink / raw)
  To: Stanislav Fomichev, netdev, bpf
  Cc: davem, ast, daniel, Petar Penkov, Willem de Bruijn, Song Liu
In-Reply-To: <20190725225231.195090-8-sdf@google.com>



On 7/26/19 12:52 AM, Stanislav Fomichev wrote:
> Exit as soon as we found that packet is encapped when
> BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP is passed.
> Add appropriate selftest cases.
> 
> v2:
> * Subtract sizeof(struct iphdr) from .iph_inner.tot_len (Willem de Bruijn)
> 
> Acked-by: Petar Penkov <ppenkov@google.com>
> Acked-by: Willem de Bruijn <willemb@google.com>
> Acked-by: Song Liu <songliubraving@fb.com>
> Cc: Song Liu <songliubraving@fb.com>
> Cc: Willem de Bruijn <willemb@google.com>
> Cc: Petar Penkov <ppenkov@google.com>
> Signed-off-by: Stanislav Fomichev <sdf@google.com>
> ---
>  .../selftests/bpf/prog_tests/flow_dissector.c | 64 +++++++++++++++++++
>  tools/testing/selftests/bpf/progs/bpf_flow.c  |  8 +++
>  2 files changed, 72 insertions(+)
> 
> diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
> index ef83f145a6f1..700d73d2f22a 100644
> --- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
> +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
> @@ -41,6 +41,13 @@ struct ipv4_pkt {
>  	struct tcphdr tcp;
>  } __packed;
>  
> +struct ipip_pkt {
> +	struct ethhdr eth;
> +	struct iphdr iph;
> +	struct iphdr iph_inner;
> +	struct tcphdr tcp;
> +} __packed;
> +
>  struct svlan_ipv4_pkt {
>  	struct ethhdr eth;
>  	__u16 vlan_tci;
> @@ -82,6 +89,7 @@ struct test {
>  	union {
>  		struct ipv4_pkt ipv4;
>  		struct svlan_ipv4_pkt svlan_ipv4;
> +		struct ipip_pkt ipip;
>  		struct ipv6_pkt ipv6;
>  		struct ipv6_frag_pkt ipv6_frag;
>  		struct dvlan_ipv6_pkt dvlan_ipv6;
> @@ -303,6 +311,62 @@ struct test tests[] = {
>  		},
>  		.flags = BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL,
>  	},
> +	{
> +		.name = "ipip-encap",
> +		.pkt.ipip = {
> +			.eth.h_proto = __bpf_constant_htons(ETH_P_IP),
> +			.iph.ihl = 5,
> +			.iph.protocol = IPPROTO_IPIP,
> +			.iph.tot_len = __bpf_constant_htons(MAGIC_BYTES),
> +			.iph_inner.ihl = 5,
> +			.iph_inner.protocol = IPPROTO_TCP,
> +			.iph_inner.tot_len =
> +				__bpf_constant_htons(MAGIC_BYTES) -
> +				sizeof(struct iphdr),
> +			.tcp.doff = 5,
> +			.tcp.source = 80,
> +			.tcp.dest = 8080,
> +		},
> +		.keys = {
> +			.nhoff = 0,
> +			.nhoff = ETH_HLEN,

clang emits a warning because nhoff is defined twice.

> +			.thoff = ETH_HLEN + sizeof(struct iphdr) +
> +				sizeof(struct iphdr),
> +			.addr_proto = ETH_P_IP,
> +			.ip_proto = IPPROTO_TCP,
> +			.n_proto = __bpf_constant_htons(ETH_P_IP),
> +			.is_encap = true,
> +			.sport = 80,
> +			.dport = 8080,
> +		},
> +	},
> +	{
> +		.name = "ipip-no-encap",
> +		.pkt.ipip = {
> +			.eth.h_proto = __bpf_constant_htons(ETH_P_IP),
> +			.iph.ihl = 5,
> +			.iph.protocol = IPPROTO_IPIP,
> +			.iph.tot_len = __bpf_constant_htons(MAGIC_BYTES),
> +			.iph_inner.ihl = 5,
> +			.iph_inner.protocol = IPPROTO_TCP,
> +			.iph_inner.tot_len =
> +				__bpf_constant_htons(MAGIC_BYTES) -
> +				sizeof(struct iphdr),
> +			.tcp.doff = 5,
> +			.tcp.source = 80,
> +			.tcp.dest = 8080,
> +		},
> +		.keys = {
> +			.flags = BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP,
> +			.nhoff = ETH_HLEN,
> +			.thoff = ETH_HLEN + sizeof(struct iphdr),
> +			.addr_proto = ETH_P_IP,
> +			.ip_proto = IPPROTO_IPIP,
> +			.n_proto = __bpf_constant_htons(ETH_P_IP),
> +			.is_encap = true,
> +		},
> +		.flags = BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP,
> +	},
>  };
>  
>  static int create_tap(const char *ifname)
> diff --git a/tools/testing/selftests/bpf/progs/bpf_flow.c b/tools/testing/selftests/bpf/progs/bpf_flow.c
> index 7fbfa22f33df..08bd8b9d58d0 100644
> --- a/tools/testing/selftests/bpf/progs/bpf_flow.c
> +++ b/tools/testing/selftests/bpf/progs/bpf_flow.c
> @@ -167,9 +167,15 @@ static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto)
>  		return export_flow_keys(keys, BPF_OK);
>  	case IPPROTO_IPIP:
>  		keys->is_encap = true;
> +		if (keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP)
> +			return export_flow_keys(keys, BPF_OK);
> +
>  		return parse_eth_proto(skb, bpf_htons(ETH_P_IP));
>  	case IPPROTO_IPV6:
>  		keys->is_encap = true;
> +		if (keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP)
> +			return export_flow_keys(keys, BPF_OK);
> +
>  		return parse_eth_proto(skb, bpf_htons(ETH_P_IPV6));
>  	case IPPROTO_GRE:
>  		gre = bpf_flow_dissect_get_header(skb, sizeof(*gre), &_gre);
> @@ -189,6 +195,8 @@ static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto)
>  			keys->thoff += 4; /* Step over sequence number */
>  
>  		keys->is_encap = true;
> +		if (keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP)
> +			return export_flow_keys(keys, BPF_OK);
>  
>  		if (gre->proto == bpf_htons(ETH_P_TEB)) {
>  			eth = bpf_flow_dissect_get_header(skb, sizeof(*eth),
> 

^ permalink raw reply

* [PATCH] r8152: Add rx_buf_sz field to struct r8152
From: Prashant Malani @ 2019-08-27 18:01 UTC (permalink / raw)
  To: hayeswang, davem; +Cc: grundler, netdev, nic_swsd, Prashant Malani

tp->rx_buf_sz is set according to the specific version of HW being used.

agg_buf_sz was originally added to support LSO (Large Send Offload) and
then seems to have been co-opted for LRO (Large Receive Offload). But RX
large buffer size can be larger than TX large buffer size with newer HW.
Using larger buffers can result in fewer "large RX packets" processed
by the rest of the networking stack to reduce RX CPU utilization.

This patch is copied from the r8152 driver (v2.12.0) published by
Realtek (www.realtek.com).

Signed-off-by: Prashant Malani <pmalani@chromium.org>
Reviewed-by: Grant Grundler <grundler@chromium.org>
---
 drivers/net/usb/r8152.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index 0cc03a9ff545..d221e59a5392 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -750,6 +750,7 @@ struct r8152 {
 	u32 tx_qlen;
 	u32 coalesce;
 	u16 ocp_base;
+	u32 rx_buf_sz;
 	u16 speed;
 	u8 *intr_buff;
 	u8 version;
@@ -1516,13 +1517,13 @@ static int alloc_all_mem(struct r8152 *tp)
 	skb_queue_head_init(&tp->rx_queue);
 
 	for (i = 0; i < RTL8152_MAX_RX; i++) {
-		buf = kmalloc_node(agg_buf_sz, GFP_KERNEL, node);
+		buf = kmalloc_node(tp->rx_buf_sz, GFP_KERNEL, node);
 		if (!buf)
 			goto err1;
 
 		if (buf != rx_agg_align(buf)) {
 			kfree(buf);
-			buf = kmalloc_node(agg_buf_sz + RX_ALIGN, GFP_KERNEL,
+			buf = kmalloc_node(tp->rx_buf_sz + RX_ALIGN, GFP_KERNEL,
 					   node);
 			if (!buf)
 				goto err1;
@@ -2113,7 +2114,7 @@ int r8152_submit_rx(struct r8152 *tp, struct rx_agg *agg, gfp_t mem_flags)
 		return 0;
 
 	usb_fill_bulk_urb(agg->urb, tp->udev, usb_rcvbulkpipe(tp->udev, 1),
-			  agg->head, agg_buf_sz,
+			  agg->head, tp->rx_buf_sz,
 			  (usb_complete_t)read_bulk_callback, agg);
 
 	ret = usb_submit_urb(agg->urb, mem_flags);
@@ -2447,7 +2448,7 @@ static void r8153_set_rx_early_timeout(struct r8152 *tp)
 
 static void r8153_set_rx_early_size(struct r8152 *tp)
 {
-	u32 ocp_data = agg_buf_sz - rx_reserved_size(tp->netdev->mtu);
+	u32 ocp_data = tp->rx_buf_sz - rx_reserved_size(tp->netdev->mtu);
 
 	switch (tp->version) {
 	case RTL_VER_03:
@@ -5115,6 +5116,7 @@ static int rtl_ops_init(struct r8152 *tp)
 		ops->in_nway		= rtl8152_in_nway;
 		ops->hw_phy_cfg		= r8152b_hw_phy_cfg;
 		ops->autosuspend_en	= rtl_runtime_suspend_enable;
+		tp->rx_buf_sz		= 16 * 1024;
 		break;
 
 	case RTL_VER_03:
@@ -5132,6 +5134,7 @@ static int rtl_ops_init(struct r8152 *tp)
 		ops->in_nway		= rtl8153_in_nway;
 		ops->hw_phy_cfg		= r8153_hw_phy_cfg;
 		ops->autosuspend_en	= rtl8153_runtime_enable;
+		tp->rx_buf_sz		= 32 * 1024;
 		break;
 
 	case RTL_VER_08:
@@ -5147,6 +5150,7 @@ static int rtl_ops_init(struct r8152 *tp)
 		ops->in_nway		= rtl8153_in_nway;
 		ops->hw_phy_cfg		= r8153b_hw_phy_cfg;
 		ops->autosuspend_en	= rtl8153b_runtime_enable;
+		tp->rx_buf_sz		= 32 * 1024;
 		break;
 
 	default:
-- 
2.23.0.187.g17f5b7556c-goog


^ permalink raw reply related

* RE: [E1000-devel] SFP+ EEPROM readouts fail on X722 (ethtool -m: Invalid argument)
From: Fujinaka, Todd @ 2019-08-27 17:56 UTC (permalink / raw)
  To: Jakub Jankowski, e1000-devel@lists.sourceforge.net
  Cc: netdev@vger.kernel.org, mhemsley@open-systems.com
In-Reply-To: <ec481f17-cbf4-589d-807f-736421391c71@toxcorp.com>

The hints should be:
# ethtool -m eth10
Cannot get module EEPROM information: Invalid argument # dmesg | tail -n 1 [  445.971974] i40e 0000:3d:00.3 eth10: Module EEPROM memory read not supported. Please update the NVM image.

# ethtool -i eth10
driver: i40e
version: 2.9.21
firmware-version: 3.31 0x80000d31 1.1767.0

And the working case:
# ethtool -i eth8
driver: i40e
version: 2.9.21
firmware-version: 6.01 0x800035cf 1.1876.0

If you don't see it, 6.01 > 3.31.

The NVM update tool should be available on downloadcenter.intel.com

Todd Fujinaka
Software Application Engineer
Datacenter Engineering Group
Intel Corporation
todd.fujinaka@intel.com


-----Original Message-----
From: Jakub Jankowski [mailto:shasta@toxcorp.com] 
Sent: Tuesday, August 27, 2019 4:03 AM
To: e1000-devel@lists.sourceforge.net
Cc: netdev@vger.kernel.org; shasta@toxcorp.com; mhemsley@open-systems.com
Subject: [E1000-devel] SFP+ EEPROM readouts fail on X722 (ethtool -m: Invalid argument)

Hi,

We can't get SFP+ EEPROM readouts for X722 to work at all:

# ethtool -m eth10
Cannot get module EEPROM information: Invalid argument # dmesg | tail -n 1 [  445.971974] i40e 0000:3d:00.3 eth10: Module EEPROM memory read not supported. Please update the NVM image.
# lspci | grep 3d:00.3
3d:00.3 Ethernet controller: Intel Corporation Ethernet Connection X722 for 10GbE SFP+ (rev 09)


We're running 4.19.65 kernel at the moment, testing using the newest out-of-tree Intel module

# modinfo -F version i40e
2.9.21

We also tried:
- 4.19.65 with in-tree i40e (2.3.2-k)
- stock Arch Linux (kernel 5.2.5, driver 2.8.20-k) and the results are the same, as shown above.

# ethtool -i eth10
driver: i40e
version: 2.9.21
firmware-version: 3.31 0x80000d31 1.1767.0
expansion-rom-version:
bus-info: 0000:3d:00.3
supports-statistics: yes
supports-test: yes
supports-eeprom-access: yes
supports-register-dump: yes
supports-priv-flags: yes
# dmidecode -s baseboard-manufacturer
Intel Corporation
# dmidecode -s baseboard-product-name
S2600WFT
# dmidecode -s baseboard-version
H48104-853

# lspci -vvv
(...)
3d:00.3 Ethernet controller: Intel Corporation Ethernet Connection X722 for 10GbE SFP+ (rev 09)
	DeviceName: Intel PCH Integrated 10 Gigabit Ethernet Controller
	Subsystem: Intel Corporation Ethernet Connection X722 for 10GbE SFP+
	Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr+ Stepping- SERR+ FastB2B- DisINTx+
	Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- <TAbort- <MAbort- >SERR- <PERR- INTx-
	Latency: 0, Cache Line Size: 32 bytes
	Interrupt: pin A routed to IRQ 112
	NUMA node: 0
	Region 0: Memory at ab000000 (64-bit, prefetchable) [size=16M]
	Region 3: Memory at b0000000 (64-bit, prefetchable) [size=32K]
	Expansion ROM at <ignored> [disabled]
	Capabilities: [40] Power Management version 3
		Flags: PMEClk- DSI+ D1- D2- AuxCurrent=0mA PME(D0+,D1-,D2-,D3hot+,D3cold+)
		Status: D0 NoSoftRst+ PME-Enable- DSel=0 DScale=1 PME-
	Capabilities: [50] MSI: Enable- Count=1/1 Maskable+ 64bit+
		Address: 0000000000000000  Data: 0000
		Masking: 00000000  Pending: 00000000
	Capabilities: [70] MSI-X: Enable+ Count=129 Masked-
		Vector table: BAR=3 offset=00000000
		PBA: BAR=3 offset=00001000
	Capabilities: [a0] Express (v2) Endpoint, MSI 00
		DevCap:	MaxPayload 512 bytes, PhantFunc 0, Latency L0s <512ns, L1 <64us
			ExtTag+ AttnBtn- AttnInd- PwrInd- RBE+ FLReset+ SlotPowerLimit 0.000W
		DevCtl:	CorrErr+ NonFatalErr+ FatalErr+ UnsupReq+
			RlxdOrd+ ExtTag+ PhantFunc- AuxPwr- NoSnoop- FLReset-
			MaxPayload 256 bytes, MaxReadReq 512 bytes
		DevSta:	CorrErr+ NonFatalErr- FatalErr- UnsupReq+ AuxPwr+ TransPend-
		LnkCap:	Port #0, Speed 2.5GT/s, Width x1, ASPM L0s L1, Exit Latency L0s <64ns, L1 <1us
			ClockPM- Surprise- LLActRep- BwNot- ASPMOptComp+
		LnkCtl:	ASPM Disabled; RCB 64 bytes Disabled- CommClk+
			ExtSynch- ClockPM- AutWidDis- BWInt- AutBWInt-
		LnkSta:	Speed 2.5GT/s (ok), Width x1 (ok)
			TrErr- Train- SlotClk+ DLActive- BWMgmt- ABWMgmt-
		DevCap2: Completion Timeout: Range AB, TimeoutDis+, LTR-, OBFF Not Supported
			 AtomicOpsCap: 32bit- 64bit- 128bitCAS-
		DevCtl2: Completion Timeout: 50us to 50ms, TimeoutDis-, LTR-, OBFF Disabled
			 AtomicOpsCtl: ReqEn-
		LnkSta2: Current De-emphasis Level: -6dB, EqualizationComplete-, EqualizationPhase1-
			 EqualizationPhase2-, EqualizationPhase3-, LinkEqualizationRequest-
	Capabilities: [e0] Vital Product Data
		Product Name: Example VPD
		Read-only fields:
			[V0] Vendor specific:
			[RV] Reserved: checksum good, 0 byte(s) reserved
		End
	Capabilities: [100 v2] Advanced Error Reporting
		UESta:	DLP- SDES- TLP- FCP- CmpltTO- CmpltAbrt- UnxCmplt- RxOF- MalfTLP- ECRC- UnsupReq- ACSViol-
		UEMsk:	DLP- SDES- TLP- FCP- CmpltTO- CmpltAbrt- UnxCmplt- RxOF- MalfTLP- ECRC- UnsupReq+ ACSViol-
		UESvrt:	DLP+ SDES- TLP- FCP+ CmpltTO- CmpltAbrt- UnxCmplt- RxOF+ MalfTLP+ ECRC- UnsupReq- ACSViol-
		CESta:	RxErr- BadTLP- BadDLLP- Rollover- Timeout- AdvNonFatalErr+
		CEMsk:	RxErr- BadTLP- BadDLLP- Rollover- Timeout- AdvNonFatalErr+
		AERCap:	First Error Pointer: 00, ECRCGenCap+ ECRCGenEn- ECRCChkCap+ ECRCChkEn-
			MultHdrRecCap- MultHdrRecEn- TLPPfxPres- HdrLogCap-
		HeaderLog: 00000000 00000000 00000000 00000000
	Capabilities: [150 v1] Alternative Routing-ID Interpretation (ARI)
		ARICap:	MFVC- ACS-, Next Function: 0
		ARICtl:	MFVC- ACS-, Function Group: 0
	Capabilities: [160 v1] Single Root I/O Virtualization (SR-IOV)
		IOVCap:	Migration-, Interrupt Message Number: 000
		IOVCtl:	Enable- Migration- Interrupt- MSE- ARIHierarchy-
		IOVSta:	Migration-
		Initial VFs: 32, Total VFs: 32, Number of VFs: 0, Function Dependency Link: 03
		VF offset: 109, stride: 1, Device ID: 37cd
		Supported Page Size: 00000553, System Page Size: 00000001
		Region 0: Memory at 00000000af000000 (64-bit, prefetchable)
		Region 3: Memory at 00000000b0020000 (64-bit, prefetchable)
		VF Migration: offset: 00000000, BIR: 0
	Capabilities: [1a0 v1] Transaction Processing Hints
		Device specific mode supported
		No steering table available
	Capabilities: [1b0 v1] Access Control Services
		ACSCap:	SrcValid- TransBlk- ReqRedir- CmpltRedir- UpstreamFwd- EgressCtrl- DirectTrans-
		ACSCtl:	SrcValid- TransBlk- ReqRedir- CmpltRedir- UpstreamFwd- EgressCtrl- DirectTrans-
	Kernel driver in use: i40e
	Kernel modules: i40e


Same kernel+i40e, same SFP+ module - but on Intel X710, works like a treat:

# lspci | grep X7
81:00.0 Ethernet controller: Intel Corporation Ethernet Controller X710 for 10GbE SFP+ (rev 01)
81:00.1 Ethernet controller: Intel Corporation Ethernet Controller X710 for 10GbE SFP+ (rev 01) # ethtool -m eth8
	Identifier                                : 0x03 (SFP)
	Extended identifier                       : 0x04 (GBIC/SFP defined by 2-wire interface ID)
	Connector                                 : 0x07 (LC)
	Transceiver codes                         : 0x10 0x00 0x00 0x01 0x00 0x00 0x00 0x00 0x00
	Transceiver type                          : 10G Ethernet: 10G Base-SR
	Transceiver type                          : Ethernet: 1000BASE-SX
	Encoding                                  : 0x06 (64B/66B)
	BR, Nominal                               : 10300MBd
         (...)
# ethtool -i eth8
driver: i40e
version: 2.9.21
firmware-version: 6.01 0x800035cf 1.1876.0
expansion-rom-version:
bus-info: 0000:81:00.0
supports-statistics: yes
supports-test: yes
supports-eeprom-access: yes
supports-register-dump: yes
supports-priv-flags: yes
#


Is this a known problem?


Best regards,
  Jakub



_______________________________________________
E1000-devel mailing list
E1000-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/e1000-devel
To learn more about Intel&#174; Ethernet, visit http://communities.intel.com/community/wired

^ permalink raw reply

* Re: [PATCH net] ipv6: Default fib6_type to RTN_UNICAST when not set
From: David Ahern @ 2019-08-27 17:51 UTC (permalink / raw)
  To: Joakim Tjernlund, netdev@vger.kernel.org, greg@kroah.com
  Cc: stable@vger.kernel.org, David Miller
In-Reply-To: <db87d29f160302789f239cda2074ed35ae67da62.camel@infinera.com>

On 8/27/19 11:24 AM, Joakim Tjernlund wrote:
> On Tue, 2019-08-27 at 19:07 +0200, Greg KH wrote:
>>
>> On Tue, Aug 27, 2019 at 08:33:28AM +0000, Joakim Tjernlund wrote:
>>> I don't see the above patch in stable yet, is it still queued?
>>> https://nam03.safelinks.protection.outlook.com/?url=https%3A%2F%2Fwww.spinics.net%2Flists%2Fnetdev%2Fmsg579581.html&amp;data=02%7C01%7CJoakim.Tjernlund%40infinera.com%7Ce70efa27d90b4eecb1cf08d72b110e78%7C285643de5f5b4b03a1530ae2dc8aaf77%7C1%7C1%7C637025224574216531&amp;sdata=Mhu0BqlM21XXYdR%2BC%2F8wXrMkzBKJpKUZZZXz57sAyuQ%3D&amp;reserved=0
>>
>> Ask the network developers :)
> 
> OK, asking netdev then.
> 
>  Jocke
> 

Dave:

Specific request is for commit c7036d97acd2527cef145b5ef9ad1a37ed21bbe6
("ipv6: Default fib6_type to RTN_UNICAST when not set") to be queued for
stable releases prior to v5.2

^ permalink raw reply

* Re: [PATCH 0/4] Introduce variable length mdev alias
From: Alex Williamson @ 2019-08-27 17:48 UTC (permalink / raw)
  To: Parav Pandit
  Cc: Jiri Pirko, kwankhede@nvidia.com, cohuck@redhat.com,
	davem@davemloft.net, kvm@vger.kernel.org,
	linux-kernel@vger.kernel.org, netdev@vger.kernel.org
In-Reply-To: <AM0PR05MB4866A24FF3D283F0F3CB3CDAD1A00@AM0PR05MB4866.eurprd05.prod.outlook.com>

On Tue, 27 Aug 2019 13:11:17 +0000
Parav Pandit <parav@mellanox.com> wrote:

> Hi Alex, Cornelia,
> 
> > -----Original Message-----
> > From: kvm-owner@vger.kernel.org <kvm-owner@vger.kernel.org> On Behalf
> > Of Parav Pandit
> > Sent: Tuesday, August 27, 2019 2:11 AM
> > To: alex.williamson@redhat.com; Jiri Pirko <jiri@mellanox.com>;
> > kwankhede@nvidia.com; cohuck@redhat.com; davem@davemloft.net
> > Cc: kvm@vger.kernel.org; linux-kernel@vger.kernel.org;
> > netdev@vger.kernel.org; Parav Pandit <parav@mellanox.com>
> > Subject: [PATCH 0/4] Introduce variable length mdev alias
> > 
> > To have consistent naming for the netdevice of a mdev and to have consistent
> > naming of the devlink port [1] of a mdev, which is formed using
> > phys_port_name of the devlink port, current UUID is not usable because UUID
> > is too long.
> > 
> > UUID in string format is 36-characters long and in binary 128-bit.
> > Both formats are not able to fit within 15 characters limit of netdev name.
> > 
> > It is desired to have mdev device naming consistent using UUID.
> > So that widely used user space framework such as ovs [2] can make use of
> > mdev representor in similar way as PCIe SR-IOV VF and PF representors.
> > 
> > Hence,
> > (a) mdev alias is created which is derived using sha1 from the mdev name.
> > (b) Vendor driver describes how long an alias should be for the child mdev
> > created for a given parent.
> > (c) Mdev aliases are unique at system level.
> > (d) alias is created optionally whenever parent requested.
> > This ensures that non networking mdev parents can function without alias
> > creation overhead.
> > 
> > This design is discussed at [3].
> > 
> > An example systemd/udev extension will have,
> > 
> > 1. netdev name created using mdev alias available in sysfs.
> > 
> > mdev UUID=83b8f4f2-509f-382f-3c1e-e6bfe0fa1001
> > mdev 12 character alias=cd5b146a80a5
> > 
> > netdev name of this mdev = enmcd5b146a80a5 Here en = Ethernet link m =
> > mediated device
> > 
> > 2. devlink port phys_port_name created using mdev alias.
> > devlink phys_port_name=pcd5b146a80a5
> > 
> > This patchset enables mdev core to maintain unique alias for a mdev.
> > 
> > Patch-1 Introduces mdev alias using sha1.
> > Patch-2 Ensures that mdev alias is unique in a system.
> > Patch-3 Exposes mdev alias in a sysfs hirerchy.
> > Patch-4 Extends mtty driver to optionally provide alias generation.
> > This also enables to test UUID based sha1 collision and trigger error handling
> > for duplicate sha1 results.
> > 
> > In future when networking driver wants to use mdev alias, mdev_alias() API will
> > be added to derive devlink port name.
> >   
> Now that majority of above patches looks in shape and I addressed all comments,
> In next v1 post, I was considering to include mdev_alias() and have
> example use in mtty driver.
> 
> This way, subsequent series of mlx5_core who intents to use
> mdev_alias() API makes it easy to review and merge through Dave M,
> netdev tree. Is that ok with you?

What would be the timing for the mlx5_core use case?  Can we coordinate
within the same development cycle?  I wouldn't want someone to come
clean up the sample driver and remove the API ;)  Thanks,

Alex

^ permalink raw reply

* Re: [PATCH v5 net-next 02/18] ionic: Add hardware init and device commands
From: Shannon Nelson @ 2019-08-27 17:39 UTC (permalink / raw)
  To: Andrew Lunn; +Cc: netdev, davem
In-Reply-To: <20190827022628.GD13411@lunn.ch>

On 8/26/19 7:26 PM, Andrew Lunn wrote:
> On Mon, Aug 26, 2019 at 02:33:23PM -0700, Shannon Nelson wrote:
>> +void ionic_debugfs_add_dev(struct ionic *ionic)
>> +{
>> +	struct dentry *dentry;
>> +
>> +	dentry = debugfs_create_dir(ionic_bus_info(ionic), ionic_dir);
>> +	if (IS_ERR_OR_NULL(dentry))
>> +		return;
>> +
>> +	ionic->dentry = dentry;
>> +}
> Hi Shannon
>
> There was recently a big patchset from GregKH which removed all error
> checking from drivers calling debugfs calls. I'm pretty sure you don't
> need this check here.

With this check I end up either with a valid dentry value or NULL in 
ionic->dentry.  Without this check I possibly get an error value in 
ionic->dentry, which can get used later as the parent dentry to try to 
make a new debugfs node.  Some quick tracing looks like this error value 
will get dereferenced in a call to inode_lock(), which would likely 
cause us some heartburn.

I'd prefer to keep this check and leave ionic->dentry as NULL.

I've removed several of the other error checks and messages that were in 
this code, but left a few of the IS_ERR_OR_NULL checks to be sure we 
don't try dereferencing similar bogus values.

>
>> +#ifdef CONFIG_DEBUG_FS
>> +
>> +void ionic_debugfs_create(void);
>> +void ionic_debugfs_destroy(void);
>> +void ionic_debugfs_add_dev(struct ionic *ionic);
>> +void ionic_debugfs_del_dev(struct ionic *ionic);
>> +void ionic_debugfs_add_ident(struct ionic *ionic);
>> +#else
>> +static inline void ionic_debugfs_create(void) { }
>> +static inline void ionic_debugfs_destroy(void) { }
>> +static inline void ionic_debugfs_add_dev(struct ionic *ionic) { }
>> +static inline void ionic_debugfs_del_dev(struct ionic *ionic) { }
>> +static inline void ionic_debugfs_add_ident(struct ionic *ionic) { }
>> +#endif
> Is this really needed? I would expect there to be stubs for all the
> debugfs calls if it is disabled.

If CONFIG_DEBUG_FS is not enabled, I would prefer this driver's debugfs 
code to also be left out rather than be compiled in and left useless.

>
>> +/**
>> + * union drv_identity - driver identity information
>> + * @os_type:          OS type (see enum os_type)
>> + * @os_dist:          OS distribution, numeric format
>> + * @os_dist_str:      OS distribution, string format
>> + * @kernel_ver:       Kernel version, numeric format
>> + * @kernel_ver_str:   Kernel version, string format
>> + * @driver_ver_str:   Driver version, string format
>> + */
>> +union ionic_drv_identity {
>> +	struct {
>> +		__le32 os_type;
>> +		__le32 os_dist;
>> +		char   os_dist_str[128];
>> +		__le32 kernel_ver;
>> +		char   kernel_ver_str[32];
>> +		char   driver_ver_str[32];
>> +	};
>> +	__le32 words[512];
>> +};
>> +int ionic_identify(struct ionic *ionic)
>> +{
>> +	struct ionic_identity *ident = &ionic->ident;
>> +	struct ionic_dev *idev = &ionic->idev;
>> +	size_t sz;
>> +	int err;
>> +
>> +	memset(ident, 0, sizeof(*ident));
>> +
>> +	ident->drv.os_type = cpu_to_le32(IONIC_OS_TYPE_LINUX);
>> +	ident->drv.os_dist = 0;
>> +	strncpy(ident->drv.os_dist_str, utsname()->release,
>> +		sizeof(ident->drv.os_dist_str) - 1);
>> +	ident->drv.kernel_ver = cpu_to_le32(LINUX_VERSION_CODE);
>> +	strncpy(ident->drv.kernel_ver_str, utsname()->version,
>> +		sizeof(ident->drv.kernel_ver_str) - 1);
>> +	strncpy(ident->drv.driver_ver_str, IONIC_DRV_VERSION,
>> +		sizeof(ident->drv.driver_ver_str) - 1);
>> +
>> +	mutex_lock(&ionic->dev_cmd_lock);
>> +
> I don't know about others, but from a privacy prospective, i'm not so
> happy about this. This is a smart NIC. It could be reporting back to
> Mothership pensando with this information?

I suppose the phrase "you can trust us" wouldn't help much here, would 
it... :-)

>
> I would be happier if there was a privacy statement, right here,
> saying what this information is used for, and an agreement it is not
> used for anything else. If that gets violated, you can then only blame
> yourself when we ripe this out and hard code it to static values.

That makes perfect sense.

I can add a full description here of how the information will be used, 
which should help most folks, but I'm sure there will still be some that 
don't want this info released.

What I'd like to propose here is that I do the hardcoded strings myself 
for now, and I work up a way for the users to enable the feature as 
desired, with a reasonable comment here in the code and in the 
Documentation/.../ionic.rst file.  This might end up as an ethtool 
priv-flag that defaults to off and can set a NIC value that is 
remembered for later.

Does that sound reasonable?

Cheers,
sln



^ permalink raw reply

* Re: [PATCH v2 1/1] netfilter: nf_tables: fib: Drop IPV6 packages if IPv6 is disabled on boot
From: Leonardo Bras @ 2019-08-27 17:34 UTC (permalink / raw)
  To: Pablo Neira Ayuso
  Cc: netfilter-devel, coreteam, netdev, linux-kernel, Jozsef Kadlecsik,
	Florian Westphal, David S. Miller, Alexey Kuznetsov,
	Hideaki YOSHIFUJI
In-Reply-To: <20190827103541.vzwqwg4jlbuzajxu@salvia>

[-- Attachment #1: Type: text/plain, Size: 1912 bytes --]

On Tue, 2019-08-27 at 12:35 +0200, Pablo Neira Ayuso wrote:
> On Wed, Aug 21, 2019 at 11:15:06AM -0300, Leonardo Bras wrote:
> > If IPv6 is disabled on boot (ipv6.disable=1), but nft_fib_inet ends up
> > dealing with a IPv6 package, it causes a kernel panic in
> > fib6_node_lookup_1(), crashing in bad_page_fault.
> 
> Q: How do you get to see IPv6 packets if IPv6 module is disable?
I could reproduce this bug on a host ('ipv6.disable=1') starting a
guest with a virtio-net interface with 'filterref' over a virtual
bridge. It crashes the host during guest boot (just before login).

By that I could understand that a guest IPv6 network traffic (viavirtio-net) may cause this kernel panic. 

> 
> > The panic is caused by trying to deference a very low address (0x38
> > in ppc64le), due to ipv6.fib6_main_tbl = NULL.
> > BUG: Kernel NULL pointer dereference at 0x00000038
> > 
> > Fix this behavior by dropping IPv6 packages if !ipv6_mod_enabled().
> 
> I'd suggest: s/package/packet/
Sure, I will make sure to put it on v3.
(Sorry, I am not very used to net subsystem.)
> 
> [...]
> > diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c
> > index 7ece86afd079..75acc417e2ff 100644
> > --- a/net/ipv6/netfilter/nft_fib_ipv6.c
> > +++ b/net/ipv6/netfilter/nft_fib_ipv6.c
> > @@ -125,6 +125,11 @@ void nft_fib6_eval_type(const struct nft_expr *expr, struct nft_regs *regs,
> >  	u32 *dest = &regs->data[priv->dreg];
> >  	struct ipv6hdr *iph, _iph;
> >  
> > +	if (!ipv6_mod_enabled()) {
> > +		regs->verdict.code = NF_DROP;
> 
> NFT_BREAK instead to stop evaluating this rule, this results in a
> mismatch, so you let the user decide what to do with packets that do
> not match your policy.
Ok, I will replace for v3.

> 
> The drop case at the bottom of the fib eval function never actually
> never happens.
Which one do you mean?


[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply

* Re: [PATCH net] ipv6: Default fib6_type to RTN_UNICAST when not set
From: Joakim Tjernlund @ 2019-08-27 17:24 UTC (permalink / raw)
  To: netdev@vger.kernel.org, greg@kroah.com; +Cc: stable@vger.kernel.org
In-Reply-To: <20190827170729.GD21369@kroah.com>

On Tue, 2019-08-27 at 19:07 +0200, Greg KH wrote:
> 
> On Tue, Aug 27, 2019 at 08:33:28AM +0000, Joakim Tjernlund wrote:
> > I don't see the above patch in stable yet, is it still queued?
> > https://nam03.safelinks.protection.outlook.com/?url=https%3A%2F%2Fwww.spinics.net%2Flists%2Fnetdev%2Fmsg579581.html&amp;data=02%7C01%7CJoakim.Tjernlund%40infinera.com%7Ce70efa27d90b4eecb1cf08d72b110e78%7C285643de5f5b4b03a1530ae2dc8aaf77%7C1%7C1%7C637025224574216531&amp;sdata=Mhu0BqlM21XXYdR%2BC%2F8wXrMkzBKJpKUZZZXz57sAyuQ%3D&amp;reserved=0
> 
> Ask the network developers :)

OK, asking netdev then.

 Jocke

^ permalink raw reply

* Re: [net-next] net: sched: pie: enable timestamp based delay calculation
From: Dave Taht @ 2019-08-27 16:58 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Gautam Ramakrishnan, Linux Kernel Network Developers,
	Jamal Hadi Salim, David S. Miller, Cong Wang, Leslie Monis,
	Mohit P . Tahiliani
In-Reply-To: <316fdac3-5fa9-35d5-ad74-94072f19c5fc@gmail.com>

On Tue, Aug 27, 2019 at 8:34 AM Eric Dumazet <eric.dumazet@gmail.com> wrote:
>
>
>
> On 8/27/19 4:19 PM, Gautam Ramakrishnan wrote:
> > RFC 8033 suggests an alternative approach to calculate the queue
> > delay in PIE by using per packet timestamps. This patch enables the
> > PIE implementation to do this.
> >
> > The calculation of queue delay is as follows:
> >       qdelay = now - packet_enqueue_time
> >
> > To enable the use of timestamps:
> >       modprobe sch_pie use_timestamps=1
>
>
> No module parameter is accepted these days.
>
> Please add a new attribute instead,
> so that pie can be used in both mode on the same host.

I note that I think (but lack independent data) this improvement to
the rate estimator in pie should improve its usability and accuracy
in low rate or hw mq situations, and with some benchmarking to
show the cpu impact (at high and low rates) of this improvement as
well as the network
impact, the old way should probably be dropped and new way adopted without
needing a new variable to control it.

A commit showing the before/after cpu and network impact with a whole
bunch of flent benchmarks would be great.

(I'd also love to know if pie can be run with a lower target - like 5ms -
 with this mod in place)

>
> For a typical example of attribute addition, please take
> a look at commit 48872c11b77271ef9b070bdc50afe6655c4eb9aa
> ("net_sched: sch_fq: add dctcp-like marking")

utilizing ce_threshold in this way is actually independent of whether or
not you are using dctcp-style or rfc3168 style ecn marking.

It's "I'm self congesting... Dooo something! Anything, to reduce the load!"




-- 

Dave Täht
CTO, TekLibre, LLC
http://www.teklibre.com
Tel: 1-831-205-9740

^ permalink raw reply

* Re: [PATCH 1/4] mdev: Introduce sha1 based mdev alias
From: Alex Williamson @ 2019-08-27 16:50 UTC (permalink / raw)
  To: Cornelia Huck
  Cc: Parav Pandit, Jiri Pirko, kwankhede@nvidia.com,
	davem@davemloft.net, kvm@vger.kernel.org,
	linux-kernel@vger.kernel.org, netdev@vger.kernel.org
In-Reply-To: <20190827153510.0bd10437.cohuck@redhat.com>

On Tue, 27 Aug 2019 15:35:10 +0200
Cornelia Huck <cohuck@redhat.com> wrote:

> On Tue, 27 Aug 2019 11:57:07 +0000
> Parav Pandit <parav@mellanox.com> wrote:
> 
> > > -----Original Message-----
> > > From: Cornelia Huck <cohuck@redhat.com>
> > > Sent: Tuesday, August 27, 2019 5:11 PM
> > > To: Parav Pandit <parav@mellanox.com>
> > > Cc: alex.williamson@redhat.com; Jiri Pirko <jiri@mellanox.com>;
> > > kwankhede@nvidia.com; davem@davemloft.net; kvm@vger.kernel.org; linux-
> > > kernel@vger.kernel.org; netdev@vger.kernel.org
> > > Subject: Re: [PATCH 1/4] mdev: Introduce sha1 based mdev alias
> > > 
> > > On Tue, 27 Aug 2019 11:33:54 +0000
> > > Parav Pandit <parav@mellanox.com> wrote:
> > >     
> > > > > -----Original Message-----
> > > > > From: Cornelia Huck <cohuck@redhat.com>
> > > > > Sent: Tuesday, August 27, 2019 4:54 PM
> > > > > To: Parav Pandit <parav@mellanox.com>
> > > > > Cc: alex.williamson@redhat.com; Jiri Pirko <jiri@mellanox.com>;
> > > > > kwankhede@nvidia.com; davem@davemloft.net; kvm@vger.kernel.org;
> > > > > linux- kernel@vger.kernel.org; netdev@vger.kernel.org
> > > > > Subject: Re: [PATCH 1/4] mdev: Introduce sha1 based mdev alias
> > > > >
> > > > > On Tue, 27 Aug 2019 11:12:23 +0000
> > > > > Parav Pandit <parav@mellanox.com> wrote:
> > > > >    
> > > > > > > -----Original Message-----
> > > > > > > From: Cornelia Huck <cohuck@redhat.com>
> > > > > > > Sent: Tuesday, August 27, 2019 3:54 PM
> > > > > > > To: Parav Pandit <parav@mellanox.com>
> > > > > > > Cc: alex.williamson@redhat.com; Jiri Pirko <jiri@mellanox.com>;
> > > > > > > kwankhede@nvidia.com; davem@davemloft.net; kvm@vger.kernel.org;
> > > > > > > linux- kernel@vger.kernel.org; netdev@vger.kernel.org
> > > > > > > Subject: Re: [PATCH 1/4] mdev: Introduce sha1 based mdev alias
> > > > > > >    
> > >     
> > > > > > > What about:
> > > > > > >
> > > > > > > * @get_alias_length: optional callback to specify length of the
> > > > > > > alias to    
> > > > > create    
> > > > > > > *                    Returns unsigned integer: length of the alias to be created,
> > > > > > > *                                              0 to not create an alias
> > > > > > >    
> > > > > > Ack.
> > > > > >    
> > > > > > > I also think it might be beneficial to add a device parameter
> > > > > > > here now (rather than later); that seems to be something that makes    
> > > sense.    
> > > > > > >    
> > > > > > Without showing the use, it shouldn't be added.    
> > > > >
> > > > > It just feels like an omission: Why should the vendor driver only be
> > > > > able to return one value here, without knowing which device it is for?
> > > > > If a driver supports different devices, it may have different
> > > > > requirements for them.
> > > > >    
> > > > Sure. Lets first have this requirement to add it.
> > > > I am against adding this length field itself without an actual vendor use case,    
> > > which is adding some complexity in code today.    
> > > > But it was ok to have length field instead of bool.
> > > >
> > > > Lets not further add "no-requirement futuristic knobs" which hasn't shown its    
> > > need yet.    
> > > > When a vendor driver needs it, there is nothing prevents such addition.    
> > > 
> > > Frankly, I do not see how it adds complexity; the other callbacks have device
> > > arguments already,    
> > Other ioctls such as create, remove, mmap, likely need to access the parent.
> > Hence it make sense to have parent pointer in there.
> > 
> > I am not against complexity, I am just saying, at present there is no use-case. Let have use case and we add it.
> >   
> > > and the vendor driver is free to ignore it if it does not have
> > > a use for it. I'd rather add the argument before a possible future user tries
> > > weird hacks to allow multiple values, but I'll leave the decision to the
> > > maintainers.    
> > Why would a possible future user tries a weird hack?
> > If user needs to access parent device, that driver maintainer should ask for it.  
> 
> I've seen the situation often enough that folks tried to do hacks
> instead of enhancing the interface.
> 
> Again, let's get a maintainer opinion.

Sure, make someone else have an opinion ;)  I don't have a strong one.
The argument against a dev arg, as I see it, is that it's unused
currently, so why should we try to predict a future use case.  The
argument for, is that we're defining an API between the core and vendor
driver, where our job in defining that API could certainly be seen as
anticipating future use cases so as not to unnecessarily churn the
API.  So do we lean towards a more stable API or do we lean towards
minimalism?

when called form mdev_register_device(), the arg we'd add seems obvious
because we really have nothing more to work with than the parent
device.  But this is only a sanity test and the value there seems
questionable anyway.  If we look to the real use case in
mdev_device_create() then clearly dev stands out as a likely useful
arg, but is the type or kobj also useful?  Would we forfeit the sanity
test to include those?  I don't have a lot of confidence in being able
to predict that, so without an obvious set of args, I'm fine with the
minimalist approach provided.  Thanks,

Alex

^ permalink raw reply

* [net-next 04/15] ice: shorten local and add debug prints
From: Jeff Kirsher @ 2019-08-27 16:38 UTC (permalink / raw)
  To: davem
  Cc: Jesse Brandeburg, netdev, nhorman, sassmann, Tony Nguyen,
	Andrew Bowers, Jeff Kirsher
In-Reply-To: <20190827163832.8362-1-jeffrey.t.kirsher@intel.com>

From: Jesse Brandeburg <jesse.brandeburg@intel.com>

Add some verbose debugging for dyndbg to help us when
we are having issues with link and/or PHY.

While there, shorten some strings used by locals that
were causing long line wrapping.

Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_common.c | 63 ++++++++++++++-------
 1 file changed, 44 insertions(+), 19 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index 4b43e6de847b..302ad981129c 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -263,21 +263,23 @@ enum ice_status
 ice_aq_get_link_info(struct ice_port_info *pi, bool ena_lse,
 		     struct ice_link_status *link, struct ice_sq_cd *cd)
 {
-	struct ice_link_status *hw_link_info_old, *hw_link_info;
 	struct ice_aqc_get_link_status_data link_data = { 0 };
 	struct ice_aqc_get_link_status *resp;
+	struct ice_link_status *li_old, *li;
 	enum ice_media_type *hw_media_type;
 	struct ice_fc_info *hw_fc_info;
 	bool tx_pause, rx_pause;
 	struct ice_aq_desc desc;
 	enum ice_status status;
+	struct ice_hw *hw;
 	u16 cmd_flags;
 
 	if (!pi)
 		return ICE_ERR_PARAM;
-	hw_link_info_old = &pi->phy.link_info_old;
+	hw = pi->hw;
+	li_old = &pi->phy.link_info_old;
 	hw_media_type = &pi->phy.media_type;
-	hw_link_info = &pi->phy.link_info;
+	li = &pi->phy.link_info;
 	hw_fc_info = &pi->fc;
 
 	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_link_status);
@@ -286,27 +288,27 @@ ice_aq_get_link_info(struct ice_port_info *pi, bool ena_lse,
 	resp->cmd_flags = cpu_to_le16(cmd_flags);
 	resp->lport_num = pi->lport;
 
-	status = ice_aq_send_cmd(pi->hw, &desc, &link_data, sizeof(link_data),
-				 cd);
+	status = ice_aq_send_cmd(hw, &desc, &link_data, sizeof(link_data), cd);
 
 	if (status)
 		return status;
 
 	/* save off old link status information */
-	*hw_link_info_old = *hw_link_info;
+	*li_old = *li;
 
 	/* update current link status information */
-	hw_link_info->link_speed = le16_to_cpu(link_data.link_speed);
-	hw_link_info->phy_type_low = le64_to_cpu(link_data.phy_type_low);
-	hw_link_info->phy_type_high = le64_to_cpu(link_data.phy_type_high);
+	li->link_speed = le16_to_cpu(link_data.link_speed);
+	li->phy_type_low = le64_to_cpu(link_data.phy_type_low);
+	li->phy_type_high = le64_to_cpu(link_data.phy_type_high);
 	*hw_media_type = ice_get_media_type(pi);
-	hw_link_info->link_info = link_data.link_info;
-	hw_link_info->an_info = link_data.an_info;
-	hw_link_info->ext_info = link_data.ext_info;
-	hw_link_info->max_frame_size = le16_to_cpu(link_data.max_frame_size);
-	hw_link_info->fec_info = link_data.cfg & ICE_AQ_FEC_MASK;
-	hw_link_info->topo_media_conflict = link_data.topo_media_conflict;
-	hw_link_info->pacing = link_data.cfg & ICE_AQ_CFG_PACING_M;
+	li->link_info = link_data.link_info;
+	li->an_info = link_data.an_info;
+	li->ext_info = link_data.ext_info;
+	li->max_frame_size = le16_to_cpu(link_data.max_frame_size);
+	li->fec_info = link_data.cfg & ICE_AQ_FEC_MASK;
+	li->topo_media_conflict = link_data.topo_media_conflict;
+	li->pacing = link_data.cfg & (ICE_AQ_CFG_PACING_M |
+				      ICE_AQ_CFG_PACING_TYPE_M);
 
 	/* update fc info */
 	tx_pause = !!(link_data.an_info & ICE_AQ_LINK_PAUSE_TX);
@@ -320,12 +322,24 @@ ice_aq_get_link_info(struct ice_port_info *pi, bool ena_lse,
 	else
 		hw_fc_info->current_mode = ICE_FC_NONE;
 
-	hw_link_info->lse_ena =
-		!!(resp->cmd_flags & cpu_to_le16(ICE_AQ_LSE_IS_ENABLED));
+	li->lse_ena = !!(resp->cmd_flags & cpu_to_le16(ICE_AQ_LSE_IS_ENABLED));
+
+	ice_debug(hw, ICE_DBG_LINK, "link_speed = 0x%x\n", li->link_speed);
+	ice_debug(hw, ICE_DBG_LINK, "phy_type_low = 0x%llx\n",
+		  (unsigned long long)li->phy_type_low);
+	ice_debug(hw, ICE_DBG_LINK, "phy_type_high = 0x%llx\n",
+		  (unsigned long long)li->phy_type_high);
+	ice_debug(hw, ICE_DBG_LINK, "media_type = 0x%x\n", *hw_media_type);
+	ice_debug(hw, ICE_DBG_LINK, "link_info = 0x%x\n", li->link_info);
+	ice_debug(hw, ICE_DBG_LINK, "an_info = 0x%x\n", li->an_info);
+	ice_debug(hw, ICE_DBG_LINK, "ext_info = 0x%x\n", li->ext_info);
+	ice_debug(hw, ICE_DBG_LINK, "lse_ena = 0x%x\n", li->lse_ena);
+	ice_debug(hw, ICE_DBG_LINK, "max_frame = 0x%x\n", li->max_frame_size);
+	ice_debug(hw, ICE_DBG_LINK, "pacing = 0x%x\n", li->pacing);
 
 	/* save link status information */
 	if (link)
-		*link = *hw_link_info;
+		*link = *li;
 
 	/* flag cleared so calling functions don't call AQ again */
 	pi->phy.get_link_info = false;
@@ -2000,6 +2014,17 @@ ice_aq_set_phy_cfg(struct ice_hw *hw, u8 lport,
 	desc.params.set_phy.lport_num = lport;
 	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
 
+	ice_debug(hw, ICE_DBG_LINK, "phy_type_low = 0x%llx\n",
+		  (unsigned long long)le64_to_cpu(cfg->phy_type_low));
+	ice_debug(hw, ICE_DBG_LINK, "phy_type_high = 0x%llx\n",
+		  (unsigned long long)le64_to_cpu(cfg->phy_type_high));
+	ice_debug(hw, ICE_DBG_LINK, "caps = 0x%x\n", cfg->caps);
+	ice_debug(hw, ICE_DBG_LINK, "low_power_ctrl = 0x%x\n",
+		  cfg->low_power_ctrl);
+	ice_debug(hw, ICE_DBG_LINK, "eee_cap = 0x%x\n", cfg->eee_cap);
+	ice_debug(hw, ICE_DBG_LINK, "eeer_value = 0x%x\n", cfg->eeer_value);
+	ice_debug(hw, ICE_DBG_LINK, "link_fec_opt = 0x%x\n", cfg->link_fec_opt);
+
 	return ice_aq_send_cmd(hw, &desc, cfg, sizeof(*cfg), cd);
 }
 
-- 
2.21.0


^ permalink raw reply related

* [net-next 02/15] ice: added sibling head to parse nodes
From: Jeff Kirsher @ 2019-08-27 16:38 UTC (permalink / raw)
  To: davem
  Cc: Victor Raj, netdev, nhorman, sassmann, Tony Nguyen, Andrew Bowers,
	Jeff Kirsher
In-Reply-To: <20190827163832.8362-1-jeffrey.t.kirsher@intel.com>

From: Victor Raj <victor.raj@intel.com>

There was a bug in the previous code which never traverses all the
children to get the first node of the requested layer. Add a sibling
head pointer to point the first node of each layer per TC. This helps
traverse easier and quicker and also removes the recursion.

Signed-off-by: Victor Raj <victor.raj@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_sched.c | 57 ++++++++--------------
 drivers/net/ethernet/intel/ice/ice_type.h  |  2 +
 2 files changed, 23 insertions(+), 36 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c
index 2a232504379d..79d64f9ed609 100644
--- a/drivers/net/ethernet/intel/ice/ice_sched.c
+++ b/drivers/net/ethernet/intel/ice/ice_sched.c
@@ -260,33 +260,17 @@ ice_sched_remove_elems(struct ice_hw *hw, struct ice_sched_node *parent,
 
 /**
  * ice_sched_get_first_node - get the first node of the given layer
- * @hw: pointer to the HW struct
+ * @pi: port information structure
  * @parent: pointer the base node of the subtree
  * @layer: layer number
  *
  * This function retrieves the first node of the given layer from the subtree
  */
 static struct ice_sched_node *
-ice_sched_get_first_node(struct ice_hw *hw, struct ice_sched_node *parent,
-			 u8 layer)
+ice_sched_get_first_node(struct ice_port_info *pi,
+			 struct ice_sched_node *parent, u8 layer)
 {
-	u8 i;
-
-	if (layer < hw->sw_entry_point_layer)
-		return NULL;
-	for (i = 0; i < parent->num_children; i++) {
-		struct ice_sched_node *node = parent->children[i];
-
-		if (node) {
-			if (node->tx_sched_layer == layer)
-				return node;
-			/* this recursion is intentional, and wouldn't
-			 * go more than 9 calls
-			 */
-			return ice_sched_get_first_node(hw, node, layer);
-		}
-	}
-	return NULL;
+	return pi->sib_head[parent->tc_num][layer];
 }
 
 /**
@@ -342,7 +326,7 @@ void ice_free_sched_node(struct ice_port_info *pi, struct ice_sched_node *node)
 	parent = node->parent;
 	/* root has no parent */
 	if (parent) {
-		struct ice_sched_node *p, *tc_node;
+		struct ice_sched_node *p;
 
 		/* update the parent */
 		for (i = 0; i < parent->num_children; i++)
@@ -354,16 +338,7 @@ void ice_free_sched_node(struct ice_port_info *pi, struct ice_sched_node *node)
 				break;
 			}
 
-		/* search for previous sibling that points to this node and
-		 * remove the reference
-		 */
-		tc_node = ice_sched_get_tc_node(pi, node->tc_num);
-		if (!tc_node) {
-			ice_debug(hw, ICE_DBG_SCHED,
-				  "Invalid TC number %d\n", node->tc_num);
-			goto err_exit;
-		}
-		p = ice_sched_get_first_node(hw, tc_node, node->tx_sched_layer);
+		p = ice_sched_get_first_node(pi, node, node->tx_sched_layer);
 		while (p) {
 			if (p->sibling == node) {
 				p->sibling = node->sibling;
@@ -371,8 +346,13 @@ void ice_free_sched_node(struct ice_port_info *pi, struct ice_sched_node *node)
 			}
 			p = p->sibling;
 		}
+
+		/* update the sibling head if head is getting removed */
+		if (pi->sib_head[node->tc_num][node->tx_sched_layer] == node)
+			pi->sib_head[node->tc_num][node->tx_sched_layer] =
+				node->sibling;
 	}
-err_exit:
+
 	/* leaf nodes have no children */
 	if (node->children)
 		devm_kfree(ice_hw_to_dev(hw), node->children);
@@ -743,13 +723,17 @@ ice_sched_add_elems(struct ice_port_info *pi, struct ice_sched_node *tc_node,
 
 		/* add it to previous node sibling pointer */
 		/* Note: siblings are not linked across branches */
-		prev = ice_sched_get_first_node(hw, tc_node, layer);
+		prev = ice_sched_get_first_node(pi, tc_node, layer);
 		if (prev && prev != new_node) {
 			while (prev->sibling)
 				prev = prev->sibling;
 			prev->sibling = new_node;
 		}
 
+		/* initialize the sibling head */
+		if (!pi->sib_head[tc_node->tc_num][layer])
+			pi->sib_head[tc_node->tc_num][layer] = new_node;
+
 		if (i == 0)
 			*first_node_teid = teid;
 	}
@@ -1160,7 +1144,7 @@ ice_sched_get_free_qparent(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
 		goto lan_q_exit;
 
 	/* get the first queue group node from VSI sub-tree */
-	qgrp_node = ice_sched_get_first_node(pi->hw, vsi_node, qgrp_layer);
+	qgrp_node = ice_sched_get_first_node(pi, vsi_node, qgrp_layer);
 	while (qgrp_node) {
 		/* make sure the qgroup node is part of the VSI subtree */
 		if (ice_sched_find_node_in_subtree(pi->hw, vsi_node, qgrp_node))
@@ -1191,7 +1175,7 @@ ice_sched_get_vsi_node(struct ice_hw *hw, struct ice_sched_node *tc_node,
 	u8 vsi_layer;
 
 	vsi_layer = ice_sched_get_vsi_layer(hw);
-	node = ice_sched_get_first_node(hw, tc_node, vsi_layer);
+	node = ice_sched_get_first_node(hw->port_info, tc_node, vsi_layer);
 
 	/* Check whether it already exists */
 	while (node) {
@@ -1316,7 +1300,8 @@ ice_sched_calc_vsi_support_nodes(struct ice_hw *hw,
 			/* If intermediate nodes are reached max children
 			 * then add a new one.
 			 */
-			node = ice_sched_get_first_node(hw, tc_node, (u8)i);
+			node = ice_sched_get_first_node(hw->port_info, tc_node,
+							(u8)i);
 			/* scan all the siblings */
 			while (node) {
 				if (node->num_children < hw->max_children[i])
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index 24bbef8bbe69..d76e0cb7ef46 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -347,6 +347,8 @@ struct ice_port_info {
 	struct ice_mac_info mac;
 	struct ice_phy_info phy;
 	struct mutex sched_lock;	/* protect access to TXSched tree */
+	struct ice_sched_node *
+		sib_head[ICE_MAX_TRAFFIC_CLASS][ICE_AQC_TOPO_MAX_LEVEL_NUM];
 	struct ice_dcbx_cfg local_dcbx_cfg;	/* Oper/Local Cfg */
 	/* DCBX info */
 	struct ice_dcbx_cfg remote_dcbx_cfg;	/* Peer Cfg */
-- 
2.21.0


^ permalink raw reply related

* [net-next 07/15] ice: add validation in OP_CONFIG_VSI_QUEUES VF message
From: Jeff Kirsher @ 2019-08-27 16:38 UTC (permalink / raw)
  To: davem
  Cc: Michal Swiatkowski, netdev, nhorman, sassmann, Andrew Bowers,
	Jeff Kirsher
In-Reply-To: <20190827163832.8362-1-jeffrey.t.kirsher@intel.com>

From: Michal Swiatkowski <michal.swiatkowski@intel.com>

Check num_queue_pairs to avoid access to unallocated field of
vsi->tx_rings/vsi->rx_rings. Without this validation we can set
vsi->alloc_txq/vsi->alloc_rxq to value smaller than ICE_MAX_BASE_QS_PER_VF
and send this command with num_queue_pairs greater than
vsi->alloc_txq/vsi->alloc_rxq. This lead to access to unallocated memory.

In VF vsi alloc_txq and alloc_rxq should be the same. Get minimum
because looks more readable.

Also add validation for ring_len param. It should be greater than 32 and
be multiple of 32. Incorrect value leads to hang traffic on PF.

Signed-off-by: Michal Swiatkowski <michal.swiatkowski@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 .../net/ethernet/intel/ice/ice_virtchnl_pf.c  | 31 ++++++++++++++-----
 1 file changed, 24 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index 86637d99ee77..e6578d2f0876 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -1712,6 +1712,19 @@ static bool ice_vc_isvalid_q_id(struct ice_vf *vf, u16 vsi_id, u8 qid)
 	return (vsi && (qid < vsi->alloc_txq));
 }
 
+/**
+ * ice_vc_isvalid_ring_len
+ * @ring_len: length of ring
+ *
+ * check for the valid ring count, should be multiple of ICE_REQ_DESC_MULTIPLE
+ */
+static bool ice_vc_isvalid_ring_len(u16 ring_len)
+{
+	return (ring_len >= ICE_MIN_NUM_DESC &&
+		ring_len <= ICE_MAX_NUM_DESC &&
+		!(ring_len % ICE_REQ_DESC_MULTIPLE));
+}
+
 /**
  * ice_vc_config_rss_key
  * @vf: pointer to the VF info
@@ -2107,16 +2120,17 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg)
 		goto error_param;
 	}
 
-	if (qci->num_queue_pairs > ICE_MAX_BASE_QS_PER_VF) {
-		dev_err(&pf->pdev->dev,
-			"VF-%d requesting more than supported number of queues: %d\n",
-			vf->vf_id, qci->num_queue_pairs);
+	vsi = pf->vsi[vf->lan_vsi_idx];
+	if (!vsi) {
 		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
 		goto error_param;
 	}
 
-	vsi = pf->vsi[vf->lan_vsi_idx];
-	if (!vsi) {
+	if (qci->num_queue_pairs > ICE_MAX_BASE_QS_PER_VF ||
+	    qci->num_queue_pairs > min_t(u16, vsi->alloc_txq, vsi->alloc_rxq)) {
+		dev_err(&pf->pdev->dev,
+			"VF-%d requesting more than supported number of queues: %d\n",
+			vf->vf_id, min_t(u16, vsi->alloc_txq, vsi->alloc_rxq));
 		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
 		goto error_param;
 	}
@@ -2127,6 +2141,8 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg)
 		    qpi->rxq.vsi_id != qci->vsi_id ||
 		    qpi->rxq.queue_id != qpi->txq.queue_id ||
 		    qpi->txq.headwb_enabled ||
+		    !ice_vc_isvalid_ring_len(qpi->txq.ring_len) ||
+		    !ice_vc_isvalid_ring_len(qpi->rxq.ring_len) ||
 		    !ice_vc_isvalid_q_id(vf, qci->vsi_id, qpi->txq.queue_id)) {
 			v_ret = VIRTCHNL_STATUS_ERR_PARAM;
 			goto error_param;
@@ -2137,7 +2153,8 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg)
 		/* copy Rx queue info from VF into VSI */
 		vsi->rx_rings[i]->dma = qpi->rxq.dma_ring_addr;
 		vsi->rx_rings[i]->count = qpi->rxq.ring_len;
-		if (qpi->rxq.databuffer_size > ((16 * 1024) - 128)) {
+		if (qpi->rxq.databuffer_size > ((16 * 1024) - 128) ||
+		    qpi->rxq.databuffer_size < 1024) {
 			v_ret = VIRTCHNL_STATUS_ERR_PARAM;
 			goto error_param;
 		}
-- 
2.21.0


^ permalink raw reply related

* [net-next 06/15] ice: Don't clog kernel debug log with VF MDD events errors
From: Jeff Kirsher @ 2019-08-27 16:38 UTC (permalink / raw)
  To: davem
  Cc: Akeem G Abodunrin, netdev, nhorman, sassmann, Andrew Bowers,
	Jeff Kirsher
In-Reply-To: <20190827163832.8362-1-jeffrey.t.kirsher@intel.com>

From: Akeem G Abodunrin <akeem.g.abodunrin@intel.com>

In case of MDD events on VF, don't clog kernel log with unlimited VF MDD
events message "VF 0 has had 1018 MDD events since last boot" - limit
events log message to 30, based on the observation in some experimentation
with sending malicious packet once, and number of events reported before
device stopped observing MDD events.

Also removed defunct macro "ICE_DFLT_NUM_MDD_EVENTS_ALLOWED" for tracking
number of MDD events allowed before disabling the interface...

Signed-off-by: Akeem G Abodunrin <akeem.g.abodunrin@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_main.c        | 6 ++++--
 drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h | 2 +-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 2c6b2bc4e30e..67cbebe1ff3f 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -1315,8 +1315,10 @@ static void ice_handle_mdd_event(struct ice_pf *pf)
 
 		if (vf_mdd_detected) {
 			vf->num_mdd_events++;
-			if (vf->num_mdd_events > 1)
-				dev_info(&pf->pdev->dev, "VF %d has had %llu MDD events since last boot\n",
+			if (vf->num_mdd_events &&
+			    vf->num_mdd_events <= ICE_MDD_EVENTS_THRESHOLD)
+				dev_info(&pf->pdev->dev,
+					 "VF %d has had %llu MDD events since last boot, Admin might need to reload AVF driver with this number of events\n",
 					 i, vf->num_mdd_events);
 		}
 	}
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
index 4d94853f119a..13f45f37d75e 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
@@ -15,8 +15,8 @@
 #define ICE_MAX_MACADDR_PER_VF		12
 
 /* Malicious Driver Detection */
-#define ICE_DFLT_NUM_MDD_EVENTS_ALLOWED		3
 #define ICE_DFLT_NUM_INVAL_MSGS_ALLOWED		10
+#define ICE_MDD_EVENTS_THRESHOLD		30
 
 /* Static VF transaction/status register def */
 #define VF_DEVICE_STATUS		0xAA
-- 
2.21.0


^ permalink raw reply related

* [net-next 13/15] ice: Fix VF configuration issues due to reset
From: Jeff Kirsher @ 2019-08-27 16:38 UTC (permalink / raw)
  To: davem
  Cc: Akeem G Abodunrin, netdev, nhorman, sassmann, Andrew Bowers,
	Jeff Kirsher
In-Reply-To: <20190827163832.8362-1-jeffrey.t.kirsher@intel.com>

From: Akeem G Abodunrin <akeem.g.abodunrin@intel.com>

This patch fixes a critical reset issue that resulting to the server
reboot when an Admin changes VF configuration on the host, for example
changing VF to Trusted/non_Trusted mode, the PF driver send reset
notification to AVF driver while also continue with reset flow. However,
AVF driver schedule another reset due to notification, which causes two
concurrent reset going on, and trigger lock up in the FW, with AQ call to
delete VSI.

Signed-off-by: Akeem G Abodunrin <akeem.g.abodunrin@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index 78fd3fa8ac8b..b93324e9f4bc 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -1152,12 +1152,19 @@ static bool ice_reset_vf(struct ice_vf *vf, bool is_vflr)
 	u32 reg;
 	int i;
 
-	/* If the VFs have been disabled, this means something else is
-	 * resetting the VF, so we shouldn't continue.
+	/* If the PF has been disabled, there is no need resetting VF until
+	 * PF is active again.
 	 */
 	if (test_bit(__ICE_VF_DIS, pf->state))
 		return false;
 
+	/* If the VF has been disabled, this means something else is
+	 * resetting the VF, so we shouldn't continue. Otherwise, set
+	 * disable VF state bit for actual reset, and continue.
+	 */
+	if (test_and_set_bit(ICE_VF_STATE_DIS, vf->vf_states))
+		return false;
+
 	ice_trigger_vf_reset(vf, is_vflr);
 
 	vsi = pf->vsi[vf->lan_vsi_idx];
-- 
2.21.0


^ permalink raw reply related

* [net-next 11/15] ice: add support for virtchnl_queue_select.[tx|rx]_queues bitmap
From: Jeff Kirsher @ 2019-08-27 16:38 UTC (permalink / raw)
  To: davem
  Cc: Paul Greenwalt, netdev, nhorman, sassmann, Peng Huang,
	Tony Nguyen, Andrew Bowers, Jeff Kirsher
In-Reply-To: <20190827163832.8362-1-jeffrey.t.kirsher@intel.com>

From: Paul Greenwalt <paul.greenwalt@intel.com>

The VF driver can call VIRTCHNL_OP_[ENABLE|DISABLE]_QUEUES separately
for each queue. Add support for virtchnl_queue_select.[tx|rx]_queues
bitmap which is used to indicate which queues to enable and disable.

Add tracing of VF Tx/Rx per queue enable state to avoid enabling enabled
queues and disabling disabled queues. Add total queues enabled count and
clear ICE_VF_STATE_QS_ENA when count is zero.

Signed-off-by: Paul Greenwalt <paul.greenwalt@intel.com>
Signed-off-by: Peng Huang <peng.huang@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_lib.c      |  15 +-
 drivers/net/ethernet/intel/ice/ice_lib.h      |  10 +
 drivers/net/ethernet/intel/ice/ice_main.c     |   2 +-
 .../net/ethernet/intel/ice/ice_virtchnl_pf.c  | 243 +++++++++++++-----
 .../net/ethernet/intel/ice/ice_virtchnl_pf.h  |  12 +-
 5 files changed, 207 insertions(+), 75 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index cfd6723de857..fb866be84088 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -196,7 +196,10 @@ static int ice_pf_rxq_wait(struct ice_pf *pf, int pf_q, bool ena)
  * @ena: start or stop the Rx rings
  * @rxq_idx: Rx queue index
  */
-static int ice_vsi_ctrl_rx_ring(struct ice_vsi *vsi, bool ena, u16 rxq_idx)
+#ifndef CONFIG_PCI_IOV
+static
+#endif /* !CONFIG_PCI_IOV */
+int ice_vsi_ctrl_rx_ring(struct ice_vsi *vsi, bool ena, u16 rxq_idx)
 {
 	int pf_q = vsi->rxq_map[rxq_idx];
 	struct ice_pf *pf = vsi->back;
@@ -2105,7 +2108,10 @@ void ice_trigger_sw_intr(struct ice_hw *hw, struct ice_q_vector *q_vector)
  * @ring: Tx ring to be stopped
  * @txq_meta: Meta data of Tx ring to be stopped
  */
-static int
+#ifndef CONFIG_PCI_IOV
+static
+#endif /* !CONFIG_PCI_IOV */
+int
 ice_vsi_stop_tx_ring(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
 		     u16 rel_vmvf_num, struct ice_ring *ring,
 		     struct ice_txq_meta *txq_meta)
@@ -2165,7 +2171,10 @@ ice_vsi_stop_tx_ring(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
  * Set up a helper struct that will contain all the necessary fields that
  * are needed for stopping Tx queue
  */
-static void
+#ifndef CONFIG_PCI_IOV
+static
+#endif /* !CONFIG_PCI_IOV */
+void
 ice_fill_txq_meta(struct ice_vsi *vsi, struct ice_ring *ring,
 		  struct ice_txq_meta *txq_meta)
 {
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h
index 33074b8b7557..7faf8db844f6 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_lib.h
@@ -39,6 +39,16 @@ ice_cfg_txq_interrupt(struct ice_vsi *vsi, u16 txq, u16 msix_idx, u16 itr_idx);
 
 void
 ice_cfg_rxq_interrupt(struct ice_vsi *vsi, u16 rxq, u16 msix_idx, u16 itr_idx);
+
+int
+ice_vsi_stop_tx_ring(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
+		     u16 rel_vmvf_num, struct ice_ring *ring,
+		     struct ice_txq_meta *txq_meta);
+
+void ice_fill_txq_meta(struct ice_vsi *vsi, struct ice_ring *ring,
+		       struct ice_txq_meta *txq_meta);
+
+int ice_vsi_ctrl_rx_ring(struct ice_vsi *vsi, bool ena, u16 rxq_idx);
 #endif /* CONFIG_PCI_IOV */
 
 int ice_vsi_add_vlan(struct ice_vsi *vsi, u16 vid);
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 0398c86226f0..e47aab6d998d 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -489,7 +489,7 @@ ice_prepare_for_reset(struct ice_pf *pf)
 
 	/* Disable VFs until reset is completed */
 	for (i = 0; i < pf->num_alloc_vfs; i++)
-		clear_bit(ICE_VF_STATE_ENA, pf->vf[i].vf_states);
+		ice_set_vf_state_qs_dis(&pf->vf[i]);
 
 	/* disable the VSIs and their queues that are not already DOWN */
 	ice_pf_dis_all_vsi(pf, false);
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index e6578d2f0876..78fd3fa8ac8b 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -251,6 +251,35 @@ static int ice_sriov_free_msix_res(struct ice_pf *pf)
 	return 0;
 }
 
+/**
+ * ice_set_vf_state_qs_dis - Set VF queues state to disabled
+ * @vf: pointer to the VF structure
+ */
+void ice_set_vf_state_qs_dis(struct ice_vf *vf)
+{
+	/* Clear Rx/Tx enabled queues flag */
+	bitmap_zero(vf->txq_ena, ICE_MAX_BASE_QS_PER_VF);
+	bitmap_zero(vf->rxq_ena, ICE_MAX_BASE_QS_PER_VF);
+	vf->num_qs_ena = 0;
+	clear_bit(ICE_VF_STATE_QS_ENA, vf->vf_states);
+}
+
+/**
+ * ice_dis_vf_qs - Disable the VF queues
+ * @vf: pointer to the VF structure
+ */
+static void ice_dis_vf_qs(struct ice_vf *vf)
+{
+	struct ice_pf *pf = vf->pf;
+	struct ice_vsi *vsi;
+
+	vsi = pf->vsi[vf->lan_vsi_idx];
+
+	ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, vf->vf_id);
+	ice_vsi_stop_rx_rings(vsi);
+	ice_set_vf_state_qs_dis(vf);
+}
+
 /**
  * ice_free_vfs - Free all VFs
  * @pf: pointer to the PF structure
@@ -267,19 +296,9 @@ void ice_free_vfs(struct ice_pf *pf)
 		usleep_range(1000, 2000);
 
 	/* Avoid wait time by stopping all VFs at the same time */
-	for (i = 0; i < pf->num_alloc_vfs; i++) {
-		struct ice_vsi *vsi;
-
-		if (!test_bit(ICE_VF_STATE_ENA, pf->vf[i].vf_states))
-			continue;
-
-		vsi = pf->vsi[pf->vf[i].lan_vsi_idx];
-		/* stop rings without wait time */
-		ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, i);
-		ice_vsi_stop_rx_rings(vsi);
-
-		clear_bit(ICE_VF_STATE_ENA, pf->vf[i].vf_states);
-	}
+	for (i = 0; i < pf->num_alloc_vfs; i++)
+		if (test_bit(ICE_VF_STATE_QS_ENA, pf->vf[i].vf_states))
+			ice_dis_vf_qs(&pf->vf[i]);
 
 	/* Disable IOV before freeing resources. This lets any VF drivers
 	 * running in the host get themselves cleaned up before we yank
@@ -1055,17 +1074,9 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr)
 	for (v = 0; v < pf->num_alloc_vfs; v++)
 		ice_trigger_vf_reset(&pf->vf[v], is_vflr);
 
-	for (v = 0; v < pf->num_alloc_vfs; v++) {
-		struct ice_vsi *vsi;
-
-		vf = &pf->vf[v];
-		vsi = pf->vsi[vf->lan_vsi_idx];
-		if (test_bit(ICE_VF_STATE_ENA, vf->vf_states)) {
-			ice_vsi_stop_lan_tx_rings(vsi, ICE_VF_RESET, vf->vf_id);
-			ice_vsi_stop_rx_rings(vsi);
-			clear_bit(ICE_VF_STATE_ENA, vf->vf_states);
-		}
-	}
+	for (v = 0; v < pf->num_alloc_vfs; v++)
+		if (test_bit(ICE_VF_STATE_QS_ENA, pf->vf[v].vf_states))
+			ice_dis_vf_qs(&pf->vf[v]);
 
 	/* HW requires some time to make sure it can flush the FIFO for a VF
 	 * when it resets it. Poll the VPGEN_VFRSTAT register for each VF in
@@ -1144,24 +1155,21 @@ static bool ice_reset_vf(struct ice_vf *vf, bool is_vflr)
 	/* If the VFs have been disabled, this means something else is
 	 * resetting the VF, so we shouldn't continue.
 	 */
-	if (test_and_set_bit(__ICE_VF_DIS, pf->state))
+	if (test_bit(__ICE_VF_DIS, pf->state))
 		return false;
 
 	ice_trigger_vf_reset(vf, is_vflr);
 
 	vsi = pf->vsi[vf->lan_vsi_idx];
 
-	if (test_bit(ICE_VF_STATE_ENA, vf->vf_states)) {
-		ice_vsi_stop_lan_tx_rings(vsi, ICE_VF_RESET, vf->vf_id);
-		ice_vsi_stop_rx_rings(vsi);
-		clear_bit(ICE_VF_STATE_ENA, vf->vf_states);
-	} else {
+	if (test_bit(ICE_VF_STATE_QS_ENA, vf->vf_states))
+		ice_dis_vf_qs(vf);
+	else
 		/* Call Disable LAN Tx queue AQ call even when queues are not
-		 * enabled. This is needed for successful completiom of VFR
+		 * enabled. This is needed for successful completion of VFR
 		 */
 		ice_dis_vsi_txq(vsi->port_info, vsi->idx, 0, 0, NULL, NULL,
 				NULL, ICE_VF_RESET, vf->vf_id, NULL);
-	}
 
 	hw = &pf->hw;
 	/* poll VPGEN_VFRSTAT reg to make sure
@@ -1210,7 +1218,6 @@ static bool ice_reset_vf(struct ice_vf *vf, bool is_vflr)
 	ice_cleanup_and_realloc_vf(vf);
 
 	ice_flush(hw);
-	clear_bit(__ICE_VF_DIS, pf->state);
 
 	return true;
 }
@@ -1717,10 +1724,12 @@ static bool ice_vc_isvalid_q_id(struct ice_vf *vf, u16 vsi_id, u8 qid)
  * @ring_len: length of ring
  *
  * check for the valid ring count, should be multiple of ICE_REQ_DESC_MULTIPLE
+ * or zero
  */
 static bool ice_vc_isvalid_ring_len(u16 ring_len)
 {
-	return (ring_len >= ICE_MIN_NUM_DESC &&
+	return ring_len == 0 ||
+	       (ring_len >= ICE_MIN_NUM_DESC &&
 		ring_len <= ICE_MAX_NUM_DESC &&
 		!(ring_len % ICE_REQ_DESC_MULTIPLE));
 }
@@ -1877,6 +1886,8 @@ static int ice_vc_ena_qs_msg(struct ice_vf *vf, u8 *msg)
 	    (struct virtchnl_queue_select *)msg;
 	struct ice_pf *pf = vf->pf;
 	struct ice_vsi *vsi;
+	unsigned long q_map;
+	u16 vf_q_id;
 
 	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
 		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
@@ -1909,12 +1920,48 @@ static int ice_vc_ena_qs_msg(struct ice_vf *vf, u8 *msg)
 	 * Tx queue group list was configured and the context bits were
 	 * programmed using ice_vsi_cfg_txqs
 	 */
-	if (ice_vsi_start_rx_rings(vsi))
-		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+	q_map = vqs->rx_queues;
+	for_each_set_bit(vf_q_id, &q_map, ICE_MAX_BASE_QS_PER_VF) {
+		if (!ice_vc_isvalid_q_id(vf, vqs->vsi_id, vf_q_id)) {
+			v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+			goto error_param;
+		}
+
+		/* Skip queue if enabled */
+		if (test_bit(vf_q_id, vf->rxq_ena))
+			continue;
+
+		if (ice_vsi_ctrl_rx_ring(vsi, true, vf_q_id)) {
+			dev_err(&vsi->back->pdev->dev,
+				"Failed to enable Rx ring %d on VSI %d\n",
+				vf_q_id, vsi->vsi_num);
+			v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+			goto error_param;
+		}
+
+		set_bit(vf_q_id, vf->rxq_ena);
+		vf->num_qs_ena++;
+	}
+
+	vsi = pf->vsi[vf->lan_vsi_idx];
+	q_map = vqs->tx_queues;
+	for_each_set_bit(vf_q_id, &q_map, ICE_MAX_BASE_QS_PER_VF) {
+		if (!ice_vc_isvalid_q_id(vf, vqs->vsi_id, vf_q_id)) {
+			v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+			goto error_param;
+		}
+
+		/* Skip queue if enabled */
+		if (test_bit(vf_q_id, vf->txq_ena))
+			continue;
+
+		set_bit(vf_q_id, vf->txq_ena);
+		vf->num_qs_ena++;
+	}
 
 	/* Set flag to indicate that queues are enabled */
 	if (v_ret == VIRTCHNL_STATUS_SUCCESS)
-		set_bit(ICE_VF_STATE_ENA, vf->vf_states);
+		set_bit(ICE_VF_STATE_QS_ENA, vf->vf_states);
 
 error_param:
 	/* send the response to the VF */
@@ -1937,9 +1984,11 @@ static int ice_vc_dis_qs_msg(struct ice_vf *vf, u8 *msg)
 	    (struct virtchnl_queue_select *)msg;
 	struct ice_pf *pf = vf->pf;
 	struct ice_vsi *vsi;
+	unsigned long q_map;
+	u16 vf_q_id;
 
 	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states) &&
-	    !test_bit(ICE_VF_STATE_ENA, vf->vf_states)) {
+	    !test_bit(ICE_VF_STATE_QS_ENA, vf->vf_states)) {
 		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
 		goto error_param;
 	}
@@ -1966,23 +2015,69 @@ static int ice_vc_dis_qs_msg(struct ice_vf *vf, u8 *msg)
 		goto error_param;
 	}
 
-	if (ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, vf->vf_id)) {
-		dev_err(&vsi->back->pdev->dev,
-			"Failed to stop tx rings on VSI %d\n",
-			vsi->vsi_num);
-		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+	if (vqs->tx_queues) {
+		q_map = vqs->tx_queues;
+
+		for_each_set_bit(vf_q_id, &q_map, ICE_MAX_BASE_QS_PER_VF) {
+			struct ice_ring *ring = vsi->tx_rings[vf_q_id];
+			struct ice_txq_meta txq_meta = { 0 };
+
+			if (!ice_vc_isvalid_q_id(vf, vqs->vsi_id, vf_q_id)) {
+				v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+				goto error_param;
+			}
+
+			/* Skip queue if not enabled */
+			if (!test_bit(vf_q_id, vf->txq_ena))
+				continue;
+
+			ice_fill_txq_meta(vsi, ring, &txq_meta);
+
+			if (ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, vf->vf_id,
+						 ring, &txq_meta)) {
+				dev_err(&vsi->back->pdev->dev,
+					"Failed to stop Tx ring %d on VSI %d\n",
+					vf_q_id, vsi->vsi_num);
+				v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+				goto error_param;
+			}
+
+			/* Clear enabled queues flag */
+			clear_bit(vf_q_id, vf->txq_ena);
+			vf->num_qs_ena--;
+		}
 	}
 
-	if (ice_vsi_stop_rx_rings(vsi)) {
-		dev_err(&vsi->back->pdev->dev,
-			"Failed to stop rx rings on VSI %d\n",
-			vsi->vsi_num);
-		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+	if (vqs->rx_queues) {
+		q_map = vqs->rx_queues;
+
+		for_each_set_bit(vf_q_id, &q_map, ICE_MAX_BASE_QS_PER_VF) {
+			if (!ice_vc_isvalid_q_id(vf, vqs->vsi_id, vf_q_id)) {
+				v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+				goto error_param;
+			}
+
+			/* Skip queue if not enabled */
+			if (!test_bit(vf_q_id, vf->rxq_ena))
+				continue;
+
+			if (ice_vsi_ctrl_rx_ring(vsi, false, vf_q_id)) {
+				dev_err(&vsi->back->pdev->dev,
+					"Failed to stop Rx ring %d on VSI %d\n",
+					vf_q_id, vsi->vsi_num);
+				v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+				goto error_param;
+			}
+
+			/* Clear enabled queues flag */
+			clear_bit(vf_q_id, vf->rxq_ena);
+			vf->num_qs_ena--;
+		}
 	}
 
 	/* Clear enabled queues flag */
-	if (v_ret == VIRTCHNL_STATUS_SUCCESS)
-		clear_bit(ICE_VF_STATE_ENA, vf->vf_states);
+	if (v_ret == VIRTCHNL_STATUS_SUCCESS && !vf->num_qs_ena)
+		clear_bit(ICE_VF_STATE_QS_ENA, vf->vf_states);
 
 error_param:
 	/* send the response to the VF */
@@ -2106,6 +2201,7 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg)
 	struct virtchnl_vsi_queue_config_info *qci =
 	    (struct virtchnl_vsi_queue_config_info *)msg;
 	struct virtchnl_queue_pair_info *qpi;
+	u16 num_rxq = 0, num_txq = 0;
 	struct ice_pf *pf = vf->pf;
 	struct ice_vsi *vsi;
 	int i;
@@ -2148,33 +2244,44 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg)
 			goto error_param;
 		}
 		/* copy Tx queue info from VF into VSI */
-		vsi->tx_rings[i]->dma = qpi->txq.dma_ring_addr;
-		vsi->tx_rings[i]->count = qpi->txq.ring_len;
-		/* copy Rx queue info from VF into VSI */
-		vsi->rx_rings[i]->dma = qpi->rxq.dma_ring_addr;
-		vsi->rx_rings[i]->count = qpi->rxq.ring_len;
-		if (qpi->rxq.databuffer_size > ((16 * 1024) - 128) ||
-		    qpi->rxq.databuffer_size < 1024) {
-			v_ret = VIRTCHNL_STATUS_ERR_PARAM;
-			goto error_param;
+		if (qpi->txq.ring_len > 0) {
+			num_txq++;
+			vsi->tx_rings[i]->dma = qpi->txq.dma_ring_addr;
+			vsi->tx_rings[i]->count = qpi->txq.ring_len;
 		}
-		vsi->rx_buf_len = qpi->rxq.databuffer_size;
-		if (qpi->rxq.max_pkt_size >= (16 * 1024) ||
-		    qpi->rxq.max_pkt_size < 64) {
-			v_ret = VIRTCHNL_STATUS_ERR_PARAM;
-			goto error_param;
+
+		/* copy Rx queue info from VF into VSI */
+		if (qpi->rxq.ring_len > 0) {
+			num_rxq++;
+			vsi->rx_rings[i]->dma = qpi->rxq.dma_ring_addr;
+			vsi->rx_rings[i]->count = qpi->rxq.ring_len;
+
+			if (qpi->rxq.databuffer_size != 0 &&
+			    (qpi->rxq.databuffer_size > ((16 * 1024) - 128) ||
+			     qpi->rxq.databuffer_size < 1024)) {
+				v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+				goto error_param;
+			}
+			vsi->rx_buf_len = qpi->rxq.databuffer_size;
+			vsi->rx_rings[i]->rx_buf_len = vsi->rx_buf_len;
+			if (qpi->rxq.max_pkt_size >= (16 * 1024) ||
+			    qpi->rxq.max_pkt_size < 64) {
+				v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+				goto error_param;
+			}
 		}
+
 		vsi->max_frame = qpi->rxq.max_pkt_size;
 	}
 
 	/* VF can request to configure less than allocated queues
 	 * or default allocated queues. So update the VSI with new number
 	 */
-	vsi->num_txq = qci->num_queue_pairs;
-	vsi->num_rxq = qci->num_queue_pairs;
+	vsi->num_txq = num_txq;
+	vsi->num_rxq = num_rxq;
 	/* All queues of VF VSI are in TC 0 */
-	vsi->tc_cfg.tc_info[0].qcount_tx = qci->num_queue_pairs;
-	vsi->tc_cfg.tc_info[0].qcount_rx = qci->num_queue_pairs;
+	vsi->tc_cfg.tc_info[0].qcount_tx = num_txq;
+	vsi->tc_cfg.tc_info[0].qcount_rx = num_rxq;
 
 	if (ice_vsi_cfg_lan_txqs(vsi) || ice_vsi_cfg_rxqs(vsi))
 		v_ret = VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR;
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
index 13f45f37d75e..0d9880c8bba3 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
@@ -41,9 +41,9 @@
 
 /* Specific VF states */
 enum ice_vf_states {
-	ICE_VF_STATE_INIT = 0,
-	ICE_VF_STATE_ACTIVE,
-	ICE_VF_STATE_ENA,
+	ICE_VF_STATE_INIT = 0,		/* PF is initializing VF */
+	ICE_VF_STATE_ACTIVE,		/* VF resources are allocated for use */
+	ICE_VF_STATE_QS_ENA,		/* VF queue(s) enabled */
 	ICE_VF_STATE_DIS,
 	ICE_VF_STATE_MC_PROMISC,
 	ICE_VF_STATE_UC_PROMISC,
@@ -68,6 +68,8 @@ struct ice_vf {
 	struct virtchnl_version_info vf_ver;
 	u32 driver_caps;		/* reported by VF driver */
 	struct virtchnl_ether_addr dflt_lan_addr;
+	DECLARE_BITMAP(txq_ena, ICE_MAX_BASE_QS_PER_VF);
+	DECLARE_BITMAP(rxq_ena, ICE_MAX_BASE_QS_PER_VF);
 	u16 port_vlan_id;
 	u8 pf_set_mac:1;		/* VF MAC address set by VMM admin */
 	u8 trusted:1;
@@ -90,6 +92,7 @@ struct ice_vf {
 	u16 num_mac;
 	u16 num_vlan;
 	u16 num_vf_qs;			/* num of queue configured per VF */
+	u16 num_qs_ena;			/* total num of Tx/Rx queue enabled */
 };
 
 #ifdef CONFIG_PCI_IOV
@@ -116,12 +119,15 @@ int ice_set_vf_link_state(struct net_device *netdev, int vf_id, int link_state);
 int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena);
 
 int ice_calc_vf_reg_idx(struct ice_vf *vf, struct ice_q_vector *q_vector);
+
+void ice_set_vf_state_qs_dis(struct ice_vf *vf);
 #else /* CONFIG_PCI_IOV */
 #define ice_process_vflr_event(pf) do {} while (0)
 #define ice_free_vfs(pf) do {} while (0)
 #define ice_vc_process_vf_msg(pf, event) do {} while (0)
 #define ice_vc_notify_link_state(pf) do {} while (0)
 #define ice_vc_notify_reset(pf) do {} while (0)
+#define ice_set_vf_state_qs_dis(vf) do {} while (0)
 
 static inline bool
 ice_reset_all_vfs(struct ice_pf __always_unused *pf,
-- 
2.21.0


^ permalink raw reply related

* [net-next 08/15] ice: fix ice_is_tc_ena
From: Jeff Kirsher @ 2019-08-27 16:38 UTC (permalink / raw)
  To: davem
  Cc: Jacob Keller, netdev, nhorman, sassmann, Tony Nguyen,
	Andrew Bowers, Jeff Kirsher
In-Reply-To: <20190827163832.8362-1-jeffrey.t.kirsher@intel.com>

From: Jacob Keller <jacob.e.keller@intel.com>

ice_is_tc_ena is used to check whether a given traffic class is
enabled. Because there are only 8 traffic classes, the function took
a u8 bitmap. This causes problems because it is cast to an unsigned
long causing a static analysis warning regarding Out-of-bounds read.

Fix this by simply updating ice_is_tc_ena to take an unsigned long.
Passing a u8 to this function should implicitly convert the value.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_type.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index d76e0cb7ef46..b538d0b9eb80 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -13,9 +13,9 @@
 #define ICE_BYTES_PER_WORD	2
 #define ICE_BYTES_PER_DWORD	4
 
-static inline bool ice_is_tc_ena(u8 bitmap, u8 tc)
+static inline bool ice_is_tc_ena(unsigned long bitmap, u8 tc)
 {
-	return test_bit(tc, (unsigned long *)&bitmap);
+	return test_bit(tc, &bitmap);
 }
 
 /* Driver always calls main vsi_handle first */
-- 
2.21.0


^ permalink raw reply related

* [net-next 10/15] ice: add support for enabling/disabling single queues
From: Jeff Kirsher @ 2019-08-27 16:38 UTC (permalink / raw)
  To: davem
  Cc: Maciej Fijalkowski, netdev, nhorman, sassmann, Tony Nguyen,
	Andrew Bowers, Jeff Kirsher
In-Reply-To: <20190827163832.8362-1-jeffrey.t.kirsher@intel.com>

From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>

Refactor the queue handling functions that are going through queue
arrays in a way that the logic done for a single queue is pulled out and
it will be called for each ring when traversing ring array. This implies
that when disabling Tx rings we won't fill up q_ids, q_teids and
q_handles arrays.  Drop also 'offset' parameter; the value from vsi's
txq_map is stored in ring->reg_idx and that drops the need for mentioned
parameter. Introduce the ice_vsi_cfg_txq, ice_vsi_stop_tx_ring and
ice_vsi_ctrl_rx_ring that are the functions with pulled out logic.

There's several Tx queue meta data (q_id, q_handle, q_teid and other)
that need to be set up during Tx queue disablement, so let's as well add
a helper structure that wraps it up and a function that will be filling
it up.

Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_lib.c | 342 +++++++++++++----------
 drivers/net/ethernet/intel/ice/ice_lib.h |  18 +-
 2 files changed, 214 insertions(+), 146 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index 8d5d6635a123..cfd6723de857 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -191,41 +191,55 @@ static int ice_pf_rxq_wait(struct ice_pf *pf, int pf_q, bool ena)
 }
 
 /**
- * ice_vsi_ctrl_rx_rings - Start or stop a VSI's Rx rings
+ * ice_vsi_ctrl_rx_ring - Start or stop a VSI's Rx ring
  * @vsi: the VSI being configured
  * @ena: start or stop the Rx rings
+ * @rxq_idx: Rx queue index
  */
-static int ice_vsi_ctrl_rx_rings(struct ice_vsi *vsi, bool ena)
+static int ice_vsi_ctrl_rx_ring(struct ice_vsi *vsi, bool ena, u16 rxq_idx)
 {
+	int pf_q = vsi->rxq_map[rxq_idx];
 	struct ice_pf *pf = vsi->back;
 	struct ice_hw *hw = &pf->hw;
-	int i, ret = 0;
+	int ret = 0;
+	u32 rx_reg;
 
-	for (i = 0; i < vsi->num_rxq; i++) {
-		int pf_q = vsi->rxq_map[i];
-		u32 rx_reg;
+	rx_reg = rd32(hw, QRX_CTRL(pf_q));
 
-		rx_reg = rd32(hw, QRX_CTRL(pf_q));
+	/* Skip if the queue is already in the requested state */
+	if (ena == !!(rx_reg & QRX_CTRL_QENA_STAT_M))
+		return 0;
 
-		/* Skip if the queue is already in the requested state */
-		if (ena == !!(rx_reg & QRX_CTRL_QENA_STAT_M))
-			continue;
+	/* turn on/off the queue */
+	if (ena)
+		rx_reg |= QRX_CTRL_QENA_REQ_M;
+	else
+		rx_reg &= ~QRX_CTRL_QENA_REQ_M;
+	wr32(hw, QRX_CTRL(pf_q), rx_reg);
 
-		/* turn on/off the queue */
-		if (ena)
-			rx_reg |= QRX_CTRL_QENA_REQ_M;
-		else
-			rx_reg &= ~QRX_CTRL_QENA_REQ_M;
-		wr32(hw, QRX_CTRL(pf_q), rx_reg);
-
-		/* wait for the change to finish */
-		ret = ice_pf_rxq_wait(pf, pf_q, ena);
-		if (ret) {
-			dev_err(&pf->pdev->dev,
-				"VSI idx %d Rx ring %d %sable timeout\n",
-				vsi->idx, pf_q, (ena ? "en" : "dis"));
+	/* wait for the change to finish */
+	ret = ice_pf_rxq_wait(pf, pf_q, ena);
+	if (ret)
+		dev_err(&pf->pdev->dev,
+			"VSI idx %d Rx ring %d %sable timeout\n",
+			vsi->idx, pf_q, (ena ? "en" : "dis"));
+
+	return ret;
+}
+
+/**
+ * ice_vsi_ctrl_rx_rings - Start or stop a VSI's Rx rings
+ * @vsi: the VSI being configured
+ * @ena: start or stop the Rx rings
+ */
+static int ice_vsi_ctrl_rx_rings(struct ice_vsi *vsi, bool ena)
+{
+	int i, ret = 0;
+
+	for (i = 0; i < vsi->num_rxq; i++) {
+		ret = ice_vsi_ctrl_rx_ring(vsi, ena, i);
+		if (ret)
 			break;
-		}
 	}
 
 	return ret;
@@ -1648,6 +1662,62 @@ int ice_vsi_cfg_rxqs(struct ice_vsi *vsi)
 	return 0;
 }
 
+/**
+ * ice_vsi_cfg_txq - Configure single Tx queue
+ * @vsi: the VSI that queue belongs to
+ * @ring: Tx ring to be configured
+ * @tc_q_idx: queue index within given TC
+ * @qg_buf: queue group buffer
+ * @tc: TC that Tx ring belongs to
+ */
+static int
+ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_ring *ring, u16 tc_q_idx,
+		struct ice_aqc_add_tx_qgrp *qg_buf, u8 tc)
+{
+	struct ice_tlan_ctx tlan_ctx = { 0 };
+	struct ice_aqc_add_txqs_perq *txq;
+	struct ice_pf *pf = vsi->back;
+	u8 buf_len = sizeof(*qg_buf);
+	enum ice_status status;
+	u16 pf_q;
+
+	pf_q = ring->reg_idx;
+	ice_setup_tx_ctx(ring, &tlan_ctx, pf_q);
+	/* copy context contents into the qg_buf */
+	qg_buf->txqs[0].txq_id = cpu_to_le16(pf_q);
+	ice_set_ctx((u8 *)&tlan_ctx, qg_buf->txqs[0].txq_ctx,
+		    ice_tlan_ctx_info);
+
+	/* init queue specific tail reg. It is referred as
+	 * transmit comm scheduler queue doorbell.
+	 */
+	ring->tail = pf->hw.hw_addr + QTX_COMM_DBELL(pf_q);
+
+	/* Add unique software queue handle of the Tx queue per
+	 * TC into the VSI Tx ring
+	 */
+	ring->q_handle = tc_q_idx;
+
+	status = ice_ena_vsi_txq(vsi->port_info, vsi->idx, tc, ring->q_handle,
+				 1, qg_buf, buf_len, NULL);
+	if (status) {
+		dev_err(&pf->pdev->dev,
+			"Failed to set LAN Tx queue context, error: %d\n",
+			status);
+		return -ENODEV;
+	}
+
+	/* Add Tx Queue TEID into the VSI Tx ring from the
+	 * response. This will complete configuring and
+	 * enabling the queue.
+	 */
+	txq = &qg_buf->txqs[0];
+	if (pf_q == le16_to_cpu(txq->txq_id))
+		ring->txq_teid = le32_to_cpu(txq->q_teid);
+
+	return 0;
+}
+
 /**
  * ice_vsi_cfg_txqs - Configure the VSI for Tx
  * @vsi: the VSI being configured
@@ -1661,20 +1731,16 @@ static int
 ice_vsi_cfg_txqs(struct ice_vsi *vsi, struct ice_ring **rings, int offset)
 {
 	struct ice_aqc_add_tx_qgrp *qg_buf;
-	struct ice_aqc_add_txqs_perq *txq;
 	struct ice_pf *pf = vsi->back;
-	u8 num_q_grps, q_idx = 0;
-	enum ice_status status;
-	u16 buf_len, i, pf_q;
-	int err = 0, tc;
+	u16 q_idx = 0, i;
+	int err = 0;
+	u8 tc;
 
-	buf_len = sizeof(*qg_buf);
-	qg_buf = devm_kzalloc(&pf->pdev->dev, buf_len, GFP_KERNEL);
+	qg_buf = devm_kzalloc(&pf->pdev->dev, sizeof(*qg_buf), GFP_KERNEL);
 	if (!qg_buf)
 		return -ENOMEM;
 
 	qg_buf->num_txqs = 1;
-	num_q_grps = 1;
 
 	/* set up and configure the Tx queues for each enabled TC */
 	ice_for_each_traffic_class(tc) {
@@ -1682,39 +1748,10 @@ ice_vsi_cfg_txqs(struct ice_vsi *vsi, struct ice_ring **rings, int offset)
 			break;
 
 		for (i = 0; i < vsi->tc_cfg.tc_info[tc].qcount_tx; i++) {
-			struct ice_tlan_ctx tlan_ctx = { 0 };
-
-			pf_q = vsi->txq_map[q_idx + offset];
-			ice_setup_tx_ctx(rings[q_idx], &tlan_ctx, pf_q);
-			/* copy context contents into the qg_buf */
-			qg_buf->txqs[0].txq_id = cpu_to_le16(pf_q);
-			ice_set_ctx((u8 *)&tlan_ctx, qg_buf->txqs[0].txq_ctx,
-				    ice_tlan_ctx_info);
-
-			/* init queue specific tail reg. It is referred as
-			 * transmit comm scheduler queue doorbell.
-			 */
-			rings[q_idx]->tail =
-				pf->hw.hw_addr + QTX_COMM_DBELL(pf_q);
-			status = ice_ena_vsi_txq(vsi->port_info, vsi->idx, tc,
-						 i, num_q_grps, qg_buf,
-						 buf_len, NULL);
-			if (status) {
-				dev_err(&pf->pdev->dev,
-					"Failed to set LAN Tx queue context, error: %d\n",
-					status);
-				err = -ENODEV;
+			err = ice_vsi_cfg_txq(vsi, rings[q_idx], i + offset,
+					      qg_buf, tc);
+			if (err)
 				goto err_cfg_txqs;
-			}
-
-			/* Add Tx Queue TEID into the VSI Tx ring from the
-			 * response. This will complete configuring and
-			 * enabling the queue.
-			 */
-			txq = &qg_buf->txqs[0];
-			if (pf_q == le16_to_cpu(txq->txq_id))
-				rings[q_idx]->txq_teid =
-					le32_to_cpu(txq->q_teid);
 
 			q_idx++;
 		}
@@ -2061,45 +2098,106 @@ void ice_trigger_sw_intr(struct ice_hw *hw, struct ice_q_vector *q_vector)
 }
 
 /**
- * ice_vsi_stop_tx_rings - Disable Tx rings
+ * ice_vsi_stop_tx_ring - Disable single Tx ring
  * @vsi: the VSI being configured
  * @rst_src: reset source
  * @rel_vmvf_num: Relative ID of VF/VM
- * @rings: Tx ring array to be stopped
- * @offset: offset within vsi->txq_map
+ * @ring: Tx ring to be stopped
+ * @txq_meta: Meta data of Tx ring to be stopped
  */
 static int
-ice_vsi_stop_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
-		      u16 rel_vmvf_num, struct ice_ring **rings, int offset)
+ice_vsi_stop_tx_ring(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
+		     u16 rel_vmvf_num, struct ice_ring *ring,
+		     struct ice_txq_meta *txq_meta)
 {
 	struct ice_pf *pf = vsi->back;
+	struct ice_q_vector *q_vector;
 	struct ice_hw *hw = &pf->hw;
-	int tc, q_idx = 0, err = 0;
-	u16 *q_ids, *q_handles, i;
 	enum ice_status status;
-	u32 *q_teids, val;
+	u32 val;
 
-	if (vsi->num_txq > ICE_LAN_TXQ_MAX_QDIS)
-		return -EINVAL;
+	/* clear cause_ena bit for disabled queues */
+	val = rd32(hw, QINT_TQCTL(ring->reg_idx));
+	val &= ~QINT_TQCTL_CAUSE_ENA_M;
+	wr32(hw, QINT_TQCTL(ring->reg_idx), val);
 
-	q_teids = devm_kcalloc(&pf->pdev->dev, vsi->num_txq, sizeof(*q_teids),
-			       GFP_KERNEL);
-	if (!q_teids)
-		return -ENOMEM;
+	/* software is expected to wait for 100 ns */
+	ndelay(100);
 
-	q_ids = devm_kcalloc(&pf->pdev->dev, vsi->num_txq, sizeof(*q_ids),
-			     GFP_KERNEL);
-	if (!q_ids) {
-		err = -ENOMEM;
-		goto err_alloc_q_ids;
+	/* trigger a software interrupt for the vector
+	 * associated to the queue to schedule NAPI handler
+	 */
+	q_vector = ring->q_vector;
+	if (q_vector)
+		ice_trigger_sw_intr(hw, q_vector);
+
+	status = ice_dis_vsi_txq(vsi->port_info, txq_meta->vsi_idx,
+				 txq_meta->tc, 1, &txq_meta->q_handle,
+				 &txq_meta->q_id, &txq_meta->q_teid, rst_src,
+				 rel_vmvf_num, NULL);
+
+	/* if the disable queue command was exercised during an
+	 * active reset flow, ICE_ERR_RESET_ONGOING is returned.
+	 * This is not an error as the reset operation disables
+	 * queues at the hardware level anyway.
+	 */
+	if (status == ICE_ERR_RESET_ONGOING) {
+		dev_dbg(&vsi->back->pdev->dev,
+			"Reset in progress. LAN Tx queues already disabled\n");
+	} else if (status == ICE_ERR_DOES_NOT_EXIST) {
+		dev_dbg(&vsi->back->pdev->dev,
+			"LAN Tx queues do not exist, nothing to disable\n");
+	} else if (status) {
+		dev_err(&vsi->back->pdev->dev,
+			"Failed to disable LAN Tx queues, error: %d\n", status);
+		return -ENODEV;
 	}
 
-	q_handles = devm_kcalloc(&pf->pdev->dev, vsi->num_txq,
-				 sizeof(*q_handles), GFP_KERNEL);
-	if (!q_handles) {
-		err = -ENOMEM;
-		goto err_alloc_q_handles;
-	}
+	return 0;
+}
+
+/**
+ * ice_fill_txq_meta - Prepare the Tx queue's meta data
+ * @vsi: VSI that ring belongs to
+ * @ring: ring that txq_meta will be based on
+ * @txq_meta: a helper struct that wraps Tx queue's information
+ *
+ * Set up a helper struct that will contain all the necessary fields that
+ * are needed for stopping Tx queue
+ */
+static void
+ice_fill_txq_meta(struct ice_vsi *vsi, struct ice_ring *ring,
+		  struct ice_txq_meta *txq_meta)
+{
+	u8 tc = 0;
+
+#ifdef CONFIG_DCB
+	tc = ring->dcb_tc;
+#endif /* CONFIG_DCB */
+	txq_meta->q_id = ring->reg_idx;
+	txq_meta->q_teid = ring->txq_teid;
+	txq_meta->q_handle = ring->q_handle;
+	txq_meta->vsi_idx = vsi->idx;
+	txq_meta->tc = tc;
+}
+
+/**
+ * ice_vsi_stop_tx_rings - Disable Tx rings
+ * @vsi: the VSI being configured
+ * @rst_src: reset source
+ * @rel_vmvf_num: Relative ID of VF/VM
+ * @rings: Tx ring array to be stopped
+ */
+static int
+ice_vsi_stop_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
+		      u16 rel_vmvf_num, struct ice_ring **rings)
+{
+	u16 i, q_idx = 0;
+	int status;
+	u8 tc;
+
+	if (vsi->num_txq > ICE_LAN_TXQ_MAX_QDIS)
+		return -EINVAL;
 
 	/* set up the Tx queue list to be disabled for each enabled TC */
 	ice_for_each_traffic_class(tc) {
@@ -2107,67 +2205,24 @@ ice_vsi_stop_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
 			break;
 
 		for (i = 0; i < vsi->tc_cfg.tc_info[tc].qcount_tx; i++) {
-			struct ice_q_vector *q_vector;
+			struct ice_txq_meta txq_meta = { };
 
-			if (!rings || !rings[q_idx]) {
-				err = -EINVAL;
-				goto err_out;
-			}
+			if (!rings || !rings[q_idx])
+				return -EINVAL;
 
-			q_ids[i] = vsi->txq_map[q_idx + offset];
-			q_teids[i] = rings[q_idx]->txq_teid;
-			q_handles[i] = i;
+			ice_fill_txq_meta(vsi, rings[q_idx], &txq_meta);
+			status = ice_vsi_stop_tx_ring(vsi, rst_src,
+						      rel_vmvf_num,
+						      rings[q_idx], &txq_meta);
 
-			/* clear cause_ena bit for disabled queues */
-			val = rd32(hw, QINT_TQCTL(rings[i]->reg_idx));
-			val &= ~QINT_TQCTL_CAUSE_ENA_M;
-			wr32(hw, QINT_TQCTL(rings[i]->reg_idx), val);
-
-			/* software is expected to wait for 100 ns */
-			ndelay(100);
-
-			/* trigger a software interrupt for the vector
-			 * associated to the queue to schedule NAPI handler
-			 */
-			q_vector = rings[i]->q_vector;
-			if (q_vector)
-				ice_trigger_sw_intr(hw, q_vector);
+			if (status)
+				return status;
 
 			q_idx++;
 		}
-		status = ice_dis_vsi_txq(vsi->port_info, vsi->idx, tc,
-					 vsi->num_txq, q_handles, q_ids,
-					 q_teids, rst_src, rel_vmvf_num, NULL);
-
-		/* if the disable queue command was exercised during an active
-		 * reset flow, ICE_ERR_RESET_ONGOING is returned. This is not
-		 * an error as the reset operation disables queues at the
-		 * hardware level anyway.
-		 */
-		if (status == ICE_ERR_RESET_ONGOING) {
-			dev_dbg(&pf->pdev->dev,
-				"Reset in progress. LAN Tx queues already disabled\n");
-		} else if (status == ICE_ERR_DOES_NOT_EXIST) {
-			dev_dbg(&pf->pdev->dev,
-				"LAN Tx queues does not exist, nothing to disabled\n");
-		} else if (status) {
-			dev_err(&pf->pdev->dev,
-				"Failed to disable LAN Tx queues, error: %d\n",
-				status);
-			err = -ENODEV;
-		}
 	}
 
-err_out:
-	devm_kfree(&pf->pdev->dev, q_handles);
-
-err_alloc_q_handles:
-	devm_kfree(&pf->pdev->dev, q_ids);
-
-err_alloc_q_ids:
-	devm_kfree(&pf->pdev->dev, q_teids);
-
-	return err;
+	return 0;
 }
 
 /**
@@ -2180,8 +2235,7 @@ int
 ice_vsi_stop_lan_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
 			  u16 rel_vmvf_num)
 {
-	return ice_vsi_stop_tx_rings(vsi, rst_src, rel_vmvf_num, vsi->tx_rings,
-				     0);
+	return ice_vsi_stop_tx_rings(vsi, rst_src, rel_vmvf_num, vsi->tx_rings);
 }
 
 /**
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h
index 969ba27cba95..33074b8b7557 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_lib.h
@@ -6,8 +6,22 @@
 
 #include "ice.h"
 
-int ice_add_mac_to_list(struct ice_vsi *vsi, struct list_head *add_list,
-			const u8 *macaddr);
+struct ice_txq_meta {
+	/* Tx-scheduler element identifier */
+	u32 q_teid;
+	/* Entry in VSI's txq_map bitmap */
+	u16 q_id;
+	/* Relative index of Tx queue within TC */
+	u16 q_handle;
+	/* VSI index that Tx queue belongs to */
+	u16 vsi_idx;
+	/* TC number that Tx queue belongs to */
+	u8 tc;
+};
+
+int
+ice_add_mac_to_list(struct ice_vsi *vsi, struct list_head *add_list,
+		    const u8 *macaddr);
 
 void ice_free_fltr_list(struct device *dev, struct list_head *h);
 
-- 
2.21.0


^ permalink raw reply related

* [net-next 15/15] ice: fix adminq calls during remove
From: Jeff Kirsher @ 2019-08-27 16:38 UTC (permalink / raw)
  To: davem
  Cc: Henry Tieman, netdev, nhorman, sassmann, Tony Nguyen,
	Andrew Bowers, Jeff Kirsher
In-Reply-To: <20190827163832.8362-1-jeffrey.t.kirsher@intel.com>

From: Henry Tieman <henry.w.tieman@intel.com>

The order of operations was incorrect in ice_remove(). The code would
try to use adminq operations after the adminq was disabled. This caused
all adminq calls to fail and possibly timeout waiting.

Signed-off-by: Henry Tieman <henry.w.tieman@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 9371148dc489..f029aee32913 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -2633,9 +2633,9 @@ static void ice_remove(struct pci_dev *pdev)
 			continue;
 		ice_vsi_free_q_vectors(pf->vsi[i]);
 	}
-	ice_clear_interrupt_scheme(pf);
 	ice_deinit_pf(pf);
 	ice_deinit_hw(&pf->hw);
+	ice_clear_interrupt_scheme(pf);
 	pci_disable_pcie_error_reporting(pdev);
 }
 
-- 
2.21.0


^ permalink raw reply related

* [net-next 14/15] ice: Rework ice_ena_msix_range
From: Jeff Kirsher @ 2019-08-27 16:38 UTC (permalink / raw)
  To: davem
  Cc: Anirudh Venkataramanan, netdev, nhorman, sassmann, Tony Nguyen,
	Andrew Bowers, Jeff Kirsher
In-Reply-To: <20190827163832.8362-1-jeffrey.t.kirsher@intel.com>

From: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>

The current implementation of ice_ena_msix_range is difficult to read
and has subtle issues. This patch reworks the said function for
clarity and correctness.

More specifically,

1. Add more checks to bail out of 'needed' is greater than 'v_left'.

2. Simplify fallback logic

3. Do not set pf->num_avail_sw_msix in ice_ena_msix_range as it
   gets overwritten by ice_init_interrupt_scheme.

Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_main.c | 32 +++++++++++++++--------
 1 file changed, 21 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 2499c7ee5038..9371148dc489 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -2281,13 +2281,18 @@ static int ice_ena_msix_range(struct ice_pf *pf)
 
 	/* reserve one vector for miscellaneous handler */
 	needed = 1;
+	if (v_left < needed)
+		goto no_hw_vecs_left_err;
 	v_budget += needed;
 	v_left -= needed;
 
 	/* reserve vectors for LAN traffic */
-	pf->num_lan_msix = min_t(int, num_online_cpus(), v_left);
-	v_budget += pf->num_lan_msix;
-	v_left -= pf->num_lan_msix;
+	needed = min_t(int, num_online_cpus(), v_left);
+	if (v_left < needed)
+		goto no_hw_vecs_left_err;
+	pf->num_lan_msix = needed;
+	v_budget += needed;
+	v_left -= needed;
 
 	pf->msix_entries = devm_kcalloc(&pf->pdev->dev, v_budget,
 					sizeof(*pf->msix_entries), GFP_KERNEL);
@@ -2312,18 +2317,18 @@ static int ice_ena_msix_range(struct ice_pf *pf)
 
 	if (v_actual < v_budget) {
 		dev_warn(&pf->pdev->dev,
-			 "not enough vectors. requested = %d, obtained = %d\n",
+			 "not enough OS MSI-X vectors. requested = %d, obtained = %d\n",
 			 v_budget, v_actual);
-		if (v_actual >= (pf->num_lan_msix + 1)) {
-			pf->num_avail_sw_msix = v_actual -
-						(pf->num_lan_msix + 1);
-		} else if (v_actual >= 2) {
-			pf->num_lan_msix = 1;
-			pf->num_avail_sw_msix = v_actual - 2;
-		} else {
+/* 2 vectors for LAN (traffic + OICR) */
+#define ICE_MIN_LAN_VECS 2
+
+		if (v_actual < ICE_MIN_LAN_VECS) {
+			/* error if we can't get minimum vectors */
 			pci_disable_msix(pf->pdev);
 			err = -ERANGE;
 			goto msix_err;
+		} else {
+			pf->num_lan_msix = ICE_MIN_LAN_VECS;
 		}
 	}
 
@@ -2333,6 +2338,11 @@ static int ice_ena_msix_range(struct ice_pf *pf)
 	devm_kfree(&pf->pdev->dev, pf->msix_entries);
 	goto exit_err;
 
+no_hw_vecs_left_err:
+	dev_err(&pf->pdev->dev,
+		"not enough device MSI-X vectors. requested = %d, available = %d\n",
+		needed, v_left);
+	err = -ERANGE;
 exit_err:
 	pf->num_lan_msix = 0;
 	return err;
-- 
2.21.0


^ permalink raw reply related

* [net-next 05/15] ice: Introduce a local variable for a VSI in the rebuild path
From: Jeff Kirsher @ 2019-08-27 16:38 UTC (permalink / raw)
  To: davem
  Cc: Krzysztof Kazimierczak, netdev, nhorman, sassmann, Andrew Bowers,
	Jeff Kirsher
In-Reply-To: <20190827163832.8362-1-jeffrey.t.kirsher@intel.com>

From: Krzysztof Kazimierczak <krzysztof.kazimierczak@intel.com>

When a VSI is accessed inside the ice_for_each_vsi macro in the rebuild
path (ice_vsi_rebuild_all() and ice_vsi_replay_all()), it is referred to
as pf->vsi[i]. Introduce local variables to improve readability.

Signed-off-by: Krzysztof Kazimierczak <krzysztof.kazimierczak@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_main.c | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 2f6125bfd991..2c6b2bc4e30e 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -3704,22 +3704,23 @@ static int ice_vsi_rebuild_all(struct ice_pf *pf)
 
 	/* loop through pf->vsi array and reinit the VSI if found */
 	ice_for_each_vsi(pf, i) {
+		struct ice_vsi *vsi = pf->vsi[i];
 		int err;
 
-		if (!pf->vsi[i])
+		if (!vsi)
 			continue;
 
-		err = ice_vsi_rebuild(pf->vsi[i]);
+		err = ice_vsi_rebuild(vsi);
 		if (err) {
 			dev_err(&pf->pdev->dev,
 				"VSI at index %d rebuild failed\n",
-				pf->vsi[i]->idx);
+				vsi->idx);
 			return err;
 		}
 
 		dev_info(&pf->pdev->dev,
 			 "VSI at index %d rebuilt. vsi_num = 0x%x\n",
-			 pf->vsi[i]->idx, pf->vsi[i]->vsi_num);
+			 vsi->idx, vsi->vsi_num);
 	}
 
 	return 0;
@@ -3737,25 +3738,27 @@ static int ice_vsi_replay_all(struct ice_pf *pf)
 
 	/* loop through pf->vsi array and replay the VSI if found */
 	ice_for_each_vsi(pf, i) {
-		if (!pf->vsi[i])
+		struct ice_vsi *vsi = pf->vsi[i];
+
+		if (!vsi)
 			continue;
 
-		ret = ice_replay_vsi(hw, pf->vsi[i]->idx);
+		ret = ice_replay_vsi(hw, vsi->idx);
 		if (ret) {
 			dev_err(&pf->pdev->dev,
 				"VSI at index %d replay failed %d\n",
-				pf->vsi[i]->idx, ret);
+				vsi->idx, ret);
 			return -EIO;
 		}
 
 		/* Re-map HW VSI number, using VSI handle that has been
 		 * previously validated in ice_replay_vsi() call above
 		 */
-		pf->vsi[i]->vsi_num = ice_get_hw_vsi_num(hw, pf->vsi[i]->idx);
+		vsi->vsi_num = ice_get_hw_vsi_num(hw, vsi->idx);
 
 		dev_info(&pf->pdev->dev,
 			 "VSI at index %d filter replayed successfully - vsi_num %i\n",
-			 pf->vsi[i]->idx, pf->vsi[i]->vsi_num);
+			 vsi->idx, vsi->vsi_num);
 	}
 
 	/* Clean up replay filter after successful re-configuration */
-- 
2.21.0


^ permalink raw reply related

* [net-next 12/15] ice: Alloc queue management bitmaps and arrays dynamically
From: Jeff Kirsher @ 2019-08-27 16:38 UTC (permalink / raw)
  To: davem
  Cc: Anirudh Venkataramanan, netdev, nhorman, sassmann, Andrew Bowers,
	Jeff Kirsher
In-Reply-To: <20190827163832.8362-1-jeffrey.t.kirsher@intel.com>

From: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>

The total number of queues available on the device is divided between
multiple physical functions (PF) in the firmware and provided to the
driver when it gets function capabilities from the firmware. Thus
each PF knows how many Tx/Rx queues it has. These queues are then
doled out to different VSIs (for LAN traffic, SR-IOV VF traffic, etc.)

To track usage of these queues at the PF level, the driver uses two
bitmaps avail_txqs and avail_rxqs. At the VSI level (i.e. struct ice_vsi
instances) the driver uses two arrays txq_map and rxq_map, to track
ownership of VSIs' queues in avail_txqs and avail_rxqs respectively.

The aforementioned bitmaps and arrays should be allocated dynamically,
because the number of queues supported by a PF is only available once
function capabilities have been queried. The current static allocation
consumes way more memory than required.

This patch removes the DECLARE_BITMAP for avail_txqs and avail_rxqs
and instead uses bitmap_zalloc to allocate the bitmaps during init.
Similarly txq_map and rxq_map are now allocated in ice_vsi_alloc_arrays.
As a result ICE_MAX_TXQS and ICE_MAX_RXQS defines are no longer needed.
Also as txq_map and rxq_map are now allocated and freed, some code
reordering was required in ice_vsi_rebuild for correct functioning.

Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice.h      | 12 +++---
 drivers/net/ethernet/intel/ice/ice_lib.c  | 45 ++++++++++++++++++-----
 drivers/net/ethernet/intel/ice/ice_main.c | 40 ++++++++++++++++----
 3 files changed, 74 insertions(+), 23 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 97d0f61cf52b..fb2bc836b20a 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -73,8 +73,6 @@ extern const char ice_drv_ver[];
 #define ICE_MBXRQ_LEN		512
 #define ICE_MIN_MSIX		2
 #define ICE_NO_VSI		0xffff
-#define ICE_MAX_TXQS		2048
-#define ICE_MAX_RXQS		2048
 #define ICE_VSI_MAP_CONTIG	0
 #define ICE_VSI_MAP_SCATTER	1
 #define ICE_MAX_SCATTER_TXQS	16
@@ -284,8 +282,8 @@ struct ice_vsi {
 	/* queue information */
 	u8 tx_mapping_mode;		 /* ICE_MAP_MODE_[CONTIG|SCATTER] */
 	u8 rx_mapping_mode;		 /* ICE_MAP_MODE_[CONTIG|SCATTER] */
-	u16 txq_map[ICE_MAX_TXQS];	 /* index in pf->avail_txqs */
-	u16 rxq_map[ICE_MAX_RXQS];	 /* index in pf->avail_rxqs */
+	u16 *txq_map;			 /* index in pf->avail_txqs */
+	u16 *rxq_map;			 /* index in pf->avail_rxqs */
 	u16 alloc_txq;			 /* Allocated Tx queues */
 	u16 num_txq;			 /* Used Tx queues */
 	u16 alloc_rxq;			 /* Allocated Rx queues */
@@ -355,9 +353,9 @@ struct ice_pf {
 	u16 num_vf_qps;			/* num queue pairs per VF */
 	u16 num_vf_msix;		/* num vectors per VF */
 	DECLARE_BITMAP(state, __ICE_STATE_NBITS);
-	DECLARE_BITMAP(avail_txqs, ICE_MAX_TXQS);
-	DECLARE_BITMAP(avail_rxqs, ICE_MAX_RXQS);
 	DECLARE_BITMAP(flags, ICE_PF_FLAGS_NBITS);
+	unsigned long *avail_txqs;	/* bitmap to track PF Tx queue usage */
+	unsigned long *avail_rxqs;	/* bitmap to track PF Rx queue usage */
 	unsigned long serv_tmr_period;
 	unsigned long serv_tmr_prev;
 	struct timer_list serv_tmr;
@@ -368,6 +366,8 @@ struct ice_pf {
 	u32 hw_csum_rx_error;
 	u32 oicr_idx;		/* Other interrupt cause MSIX vector index */
 	u32 num_avail_sw_msix;	/* remaining MSIX SW vectors left unclaimed */
+	u16 max_pf_txqs;	/* Total Tx queues PF wide */
+	u16 max_pf_rxqs;	/* Total Rx queues PF wide */
 	u32 num_lan_msix;	/* Total MSIX vectors for base driver */
 	u16 num_lan_tx;		/* num LAN Tx queues setup */
 	u16 num_lan_rx;		/* num LAN Rx queues setup */
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index fb866be84088..a39767e8c2a2 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -263,12 +263,24 @@ static int ice_vsi_alloc_arrays(struct ice_vsi *vsi)
 	vsi->tx_rings = devm_kcalloc(&pf->pdev->dev, vsi->alloc_txq,
 				     sizeof(*vsi->tx_rings), GFP_KERNEL);
 	if (!vsi->tx_rings)
-		goto err_txrings;
+		return -ENOMEM;
 
 	vsi->rx_rings = devm_kcalloc(&pf->pdev->dev, vsi->alloc_rxq,
 				     sizeof(*vsi->rx_rings), GFP_KERNEL);
 	if (!vsi->rx_rings)
-		goto err_rxrings;
+		goto err_rings;
+
+	vsi->txq_map = devm_kcalloc(&pf->pdev->dev, vsi->alloc_txq,
+				    sizeof(*vsi->txq_map), GFP_KERNEL);
+
+	if (!vsi->txq_map)
+		goto err_txq_map;
+
+	vsi->rxq_map = devm_kcalloc(&pf->pdev->dev, vsi->alloc_rxq,
+				    sizeof(*vsi->rxq_map), GFP_KERNEL);
+	if (!vsi->rxq_map)
+		goto err_rxq_map;
+
 
 	/* There is no need to allocate q_vectors for a loopback VSI. */
 	if (vsi->type == ICE_VSI_LB)
@@ -283,10 +295,13 @@ static int ice_vsi_alloc_arrays(struct ice_vsi *vsi)
 	return 0;
 
 err_vectors:
+	devm_kfree(&pf->pdev->dev, vsi->rxq_map);
+err_rxq_map:
+	devm_kfree(&pf->pdev->dev, vsi->txq_map);
+err_txq_map:
 	devm_kfree(&pf->pdev->dev, vsi->rx_rings);
-err_rxrings:
+err_rings:
 	devm_kfree(&pf->pdev->dev, vsi->tx_rings);
-err_txrings:
 	return -ENOMEM;
 }
 
@@ -433,6 +448,14 @@ static void ice_vsi_free_arrays(struct ice_vsi *vsi)
 		devm_kfree(&pf->pdev->dev, vsi->rx_rings);
 		vsi->rx_rings = NULL;
 	}
+	if (vsi->txq_map) {
+		devm_kfree(&pf->pdev->dev, vsi->txq_map);
+		vsi->txq_map = NULL;
+	}
+	if (vsi->rxq_map) {
+		devm_kfree(&pf->pdev->dev, vsi->rxq_map);
+		vsi->rxq_map = NULL;
+	}
 }
 
 /**
@@ -664,7 +687,7 @@ static int ice_vsi_get_qs(struct ice_vsi *vsi)
 	struct ice_qs_cfg tx_qs_cfg = {
 		.qs_mutex = &pf->avail_q_mutex,
 		.pf_map = pf->avail_txqs,
-		.pf_map_size = ICE_MAX_TXQS,
+		.pf_map_size = pf->max_pf_txqs,
 		.q_count = vsi->alloc_txq,
 		.scatter_count = ICE_MAX_SCATTER_TXQS,
 		.vsi_map = vsi->txq_map,
@@ -674,7 +697,7 @@ static int ice_vsi_get_qs(struct ice_vsi *vsi)
 	struct ice_qs_cfg rx_qs_cfg = {
 		.qs_mutex = &pf->avail_q_mutex,
 		.pf_map = pf->avail_rxqs,
-		.pf_map_size = ICE_MAX_RXQS,
+		.pf_map_size = pf->max_pf_rxqs,
 		.q_count = vsi->alloc_rxq,
 		.scatter_count = ICE_MAX_SCATTER_RXQS,
 		.vsi_map = vsi->rxq_map,
@@ -3018,6 +3041,7 @@ int ice_vsi_rebuild(struct ice_vsi *vsi)
 		vsi->base_vector = 0;
 	}
 
+	ice_vsi_put_qs(vsi);
 	ice_vsi_clear_rings(vsi);
 	ice_vsi_free_arrays(vsi);
 	ice_dev_onetime_setup(&pf->hw);
@@ -3025,6 +3049,12 @@ int ice_vsi_rebuild(struct ice_vsi *vsi)
 		ice_vsi_set_num_qs(vsi, vf->vf_id);
 	else
 		ice_vsi_set_num_qs(vsi, ICE_INVAL_VFID);
+
+	ret = ice_vsi_alloc_arrays(vsi);
+	if (ret < 0)
+		goto err_vsi;
+
+	ice_vsi_get_qs(vsi);
 	ice_vsi_set_tc_cfg(vsi);
 
 	/* Initialize VSI struct elements and create VSI in FW */
@@ -3032,9 +3062,6 @@ int ice_vsi_rebuild(struct ice_vsi *vsi)
 	if (ret < 0)
 		goto err_vsi;
 
-	ret = ice_vsi_alloc_arrays(vsi);
-	if (ret < 0)
-		goto err_vsi;
 
 	switch (vsi->type) {
 	case ICE_VSI_PF:
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index e47aab6d998d..2499c7ee5038 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -2207,13 +2207,23 @@ static void ice_deinit_pf(struct ice_pf *pf)
 	ice_service_task_stop(pf);
 	mutex_destroy(&pf->sw_mutex);
 	mutex_destroy(&pf->avail_q_mutex);
+
+	if (pf->avail_txqs) {
+		bitmap_free(pf->avail_txqs);
+		pf->avail_txqs = NULL;
+	}
+
+	if (pf->avail_rxqs) {
+		bitmap_free(pf->avail_rxqs);
+		pf->avail_rxqs = NULL;
+	}
 }
 
 /**
  * ice_init_pf - Initialize general software structures (struct ice_pf)
  * @pf: board private structure to initialize
  */
-static void ice_init_pf(struct ice_pf *pf)
+static int ice_init_pf(struct ice_pf *pf)
 {
 	bitmap_zero(pf->flags, ICE_PF_FLAGS_NBITS);
 #ifdef CONFIG_PCI_IOV
@@ -2229,12 +2239,6 @@ static void ice_init_pf(struct ice_pf *pf)
 	mutex_init(&pf->sw_mutex);
 	mutex_init(&pf->avail_q_mutex);
 
-	/* Clear avail_[t|r]x_qs bitmaps (set all to avail) */
-	mutex_lock(&pf->avail_q_mutex);
-	bitmap_zero(pf->avail_txqs, ICE_MAX_TXQS);
-	bitmap_zero(pf->avail_rxqs, ICE_MAX_RXQS);
-	mutex_unlock(&pf->avail_q_mutex);
-
 	if (pf->hw.func_caps.common_cap.rss_table_size)
 		set_bit(ICE_FLAG_RSS_ENA, pf->flags);
 
@@ -2243,6 +2247,22 @@ static void ice_init_pf(struct ice_pf *pf)
 	pf->serv_tmr_period = HZ;
 	INIT_WORK(&pf->serv_task, ice_service_task);
 	clear_bit(__ICE_SERVICE_SCHED, pf->state);
+
+	pf->max_pf_txqs = pf->hw.func_caps.common_cap.num_txq;
+	pf->max_pf_rxqs = pf->hw.func_caps.common_cap.num_rxq;
+
+	pf->avail_txqs = bitmap_zalloc(pf->max_pf_txqs, GFP_KERNEL);
+	if (!pf->avail_txqs)
+		return -ENOMEM;
+
+	pf->avail_rxqs = bitmap_zalloc(pf->max_pf_rxqs, GFP_KERNEL);
+	if (!pf->avail_rxqs) {
+		devm_kfree(&pf->pdev->dev, pf->avail_txqs);
+		pf->avail_txqs = NULL;
+		return -ENOMEM;
+	}
+
+	return 0;
 }
 
 /**
@@ -2467,7 +2487,11 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
 		 hw->fw_maj_ver, hw->fw_min_ver, hw->fw_build,
 		 hw->api_maj_ver, hw->api_min_ver);
 
-	ice_init_pf(pf);
+	err = ice_init_pf(pf);
+	if (err) {
+		dev_err(dev, "ice_init_pf failed: %d\n", err);
+		goto err_init_pf_unroll;
+	}
 
 	err = ice_init_pf_dcb(pf, false);
 	if (err) {
-- 
2.21.0


^ permalink raw reply related

* [net-next 01/15] ice: Fix ethtool port and PFC stats for 4x25G cards
From: Jeff Kirsher @ 2019-08-27 16:38 UTC (permalink / raw)
  To: davem; +Cc: Usha Ketineni, netdev, nhorman, sassmann, Andrew Bowers,
	Jeff Kirsher
In-Reply-To: <20190827163832.8362-1-jeffrey.t.kirsher@intel.com>

From: Usha Ketineni <usha.k.ketineni@intel.com>

This patch fixes the issue where port and PFC statistics counters are
incrementing at the wrong port with 4x25G cards.
Read the GLPRT port registers using lport parameter instead of pf_id to
update the statistics otherwise the pf_ids are flipped for ports 2 and 3
when read from the HW register PF_FUNC_RID and this is expected as per
hardware specification.

Signed-off-by: Usha Ketineni <usha.k.ketineni@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_dcb_lib.c | 13 ++--
 drivers/net/ethernet/intel/ice/ice_main.c    | 76 ++++++++++----------
 2 files changed, 45 insertions(+), 44 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
index 734cef8eed9e..d9578919aad8 100644
--- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
@@ -500,30 +500,31 @@ void ice_update_dcb_stats(struct ice_pf *pf)
 {
 	struct ice_hw_port_stats *prev_ps, *cur_ps;
 	struct ice_hw *hw = &pf->hw;
-	u8 pf_id = hw->pf_id;
+	u8 port;
 	int i;
 
+	port = hw->port_info->lport;
 	prev_ps = &pf->stats_prev;
 	cur_ps = &pf->stats;
 
 	for (i = 0; i < 8; i++) {
-		ice_stat_update32(hw, GLPRT_PXOFFRXC(pf_id, i),
+		ice_stat_update32(hw, GLPRT_PXOFFRXC(port, i),
 				  pf->stat_prev_loaded,
 				  &prev_ps->priority_xoff_rx[i],
 				  &cur_ps->priority_xoff_rx[i]);
-		ice_stat_update32(hw, GLPRT_PXONRXC(pf_id, i),
+		ice_stat_update32(hw, GLPRT_PXONRXC(port, i),
 				  pf->stat_prev_loaded,
 				  &prev_ps->priority_xon_rx[i],
 				  &cur_ps->priority_xon_rx[i]);
-		ice_stat_update32(hw, GLPRT_PXONTXC(pf_id, i),
+		ice_stat_update32(hw, GLPRT_PXONTXC(port, i),
 				  pf->stat_prev_loaded,
 				  &prev_ps->priority_xon_tx[i],
 				  &cur_ps->priority_xon_tx[i]);
-		ice_stat_update32(hw, GLPRT_PXOFFTXC(pf_id, i),
+		ice_stat_update32(hw, GLPRT_PXOFFTXC(port, i),
 				  pf->stat_prev_loaded,
 				  &prev_ps->priority_xoff_tx[i],
 				  &cur_ps->priority_xoff_tx[i]);
-		ice_stat_update32(hw, GLPRT_RXON2OFFCNT(pf_id, i),
+		ice_stat_update32(hw, GLPRT_RXON2OFFCNT(port, i),
 				  pf->stat_prev_loaded,
 				  &prev_ps->priority_xon_2_xoff[i],
 				  &cur_ps->priority_xon_2_xoff[i]);
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index f3923dec32b7..76a647cc2dbd 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -3262,25 +3262,25 @@ void ice_update_pf_stats(struct ice_pf *pf)
 {
 	struct ice_hw_port_stats *prev_ps, *cur_ps;
 	struct ice_hw *hw = &pf->hw;
-	u8 pf_id;
+	u8 port;
 
+	port = hw->port_info->lport;
 	prev_ps = &pf->stats_prev;
 	cur_ps = &pf->stats;
-	pf_id = hw->pf_id;
 
-	ice_stat_update40(hw, GLPRT_GORCL(pf_id), pf->stat_prev_loaded,
+	ice_stat_update40(hw, GLPRT_GORCL(port), pf->stat_prev_loaded,
 			  &prev_ps->eth.rx_bytes,
 			  &cur_ps->eth.rx_bytes);
 
-	ice_stat_update40(hw, GLPRT_UPRCL(pf_id), pf->stat_prev_loaded,
+	ice_stat_update40(hw, GLPRT_UPRCL(port), pf->stat_prev_loaded,
 			  &prev_ps->eth.rx_unicast,
 			  &cur_ps->eth.rx_unicast);
 
-	ice_stat_update40(hw, GLPRT_MPRCL(pf_id), pf->stat_prev_loaded,
+	ice_stat_update40(hw, GLPRT_MPRCL(port), pf->stat_prev_loaded,
 			  &prev_ps->eth.rx_multicast,
 			  &cur_ps->eth.rx_multicast);
 
-	ice_stat_update40(hw, GLPRT_BPRCL(pf_id), pf->stat_prev_loaded,
+	ice_stat_update40(hw, GLPRT_BPRCL(port), pf->stat_prev_loaded,
 			  &prev_ps->eth.rx_broadcast,
 			  &cur_ps->eth.rx_broadcast);
 
@@ -3288,109 +3288,109 @@ void ice_update_pf_stats(struct ice_pf *pf)
 			  &prev_ps->eth.rx_discards,
 			  &cur_ps->eth.rx_discards);
 
-	ice_stat_update40(hw, GLPRT_GOTCL(pf_id), pf->stat_prev_loaded,
+	ice_stat_update40(hw, GLPRT_GOTCL(port), pf->stat_prev_loaded,
 			  &prev_ps->eth.tx_bytes,
 			  &cur_ps->eth.tx_bytes);
 
-	ice_stat_update40(hw, GLPRT_UPTCL(pf_id), pf->stat_prev_loaded,
+	ice_stat_update40(hw, GLPRT_UPTCL(port), pf->stat_prev_loaded,
 			  &prev_ps->eth.tx_unicast,
 			  &cur_ps->eth.tx_unicast);
 
-	ice_stat_update40(hw, GLPRT_MPTCL(pf_id), pf->stat_prev_loaded,
+	ice_stat_update40(hw, GLPRT_MPTCL(port), pf->stat_prev_loaded,
 			  &prev_ps->eth.tx_multicast,
 			  &cur_ps->eth.tx_multicast);
 
-	ice_stat_update40(hw, GLPRT_BPTCL(pf_id), pf->stat_prev_loaded,
+	ice_stat_update40(hw, GLPRT_BPTCL(port), pf->stat_prev_loaded,
 			  &prev_ps->eth.tx_broadcast,
 			  &cur_ps->eth.tx_broadcast);
 
-	ice_stat_update32(hw, GLPRT_TDOLD(pf_id), pf->stat_prev_loaded,
+	ice_stat_update32(hw, GLPRT_TDOLD(port), pf->stat_prev_loaded,
 			  &prev_ps->tx_dropped_link_down,
 			  &cur_ps->tx_dropped_link_down);
 
-	ice_stat_update40(hw, GLPRT_PRC64L(pf_id), pf->stat_prev_loaded,
+	ice_stat_update40(hw, GLPRT_PRC64L(port), pf->stat_prev_loaded,
 			  &prev_ps->rx_size_64, &cur_ps->rx_size_64);
 
-	ice_stat_update40(hw, GLPRT_PRC127L(pf_id), pf->stat_prev_loaded,
+	ice_stat_update40(hw, GLPRT_PRC127L(port), pf->stat_prev_loaded,
 			  &prev_ps->rx_size_127, &cur_ps->rx_size_127);
 
-	ice_stat_update40(hw, GLPRT_PRC255L(pf_id), pf->stat_prev_loaded,
+	ice_stat_update40(hw, GLPRT_PRC255L(port), pf->stat_prev_loaded,
 			  &prev_ps->rx_size_255, &cur_ps->rx_size_255);
 
-	ice_stat_update40(hw, GLPRT_PRC511L(pf_id), pf->stat_prev_loaded,
+	ice_stat_update40(hw, GLPRT_PRC511L(port), pf->stat_prev_loaded,
 			  &prev_ps->rx_size_511, &cur_ps->rx_size_511);
 
-	ice_stat_update40(hw, GLPRT_PRC1023L(pf_id), pf->stat_prev_loaded,
+	ice_stat_update40(hw, GLPRT_PRC1023L(port), pf->stat_prev_loaded,
 			  &prev_ps->rx_size_1023, &cur_ps->rx_size_1023);
 
-	ice_stat_update40(hw, GLPRT_PRC1522L(pf_id), pf->stat_prev_loaded,
+	ice_stat_update40(hw, GLPRT_PRC1522L(port), pf->stat_prev_loaded,
 			  &prev_ps->rx_size_1522, &cur_ps->rx_size_1522);
 
-	ice_stat_update40(hw, GLPRT_PRC9522L(pf_id), pf->stat_prev_loaded,
+	ice_stat_update40(hw, GLPRT_PRC9522L(port), pf->stat_prev_loaded,
 			  &prev_ps->rx_size_big, &cur_ps->rx_size_big);
 
-	ice_stat_update40(hw, GLPRT_PTC64L(pf_id), pf->stat_prev_loaded,
+	ice_stat_update40(hw, GLPRT_PTC64L(port), pf->stat_prev_loaded,
 			  &prev_ps->tx_size_64, &cur_ps->tx_size_64);
 
-	ice_stat_update40(hw, GLPRT_PTC127L(pf_id), pf->stat_prev_loaded,
+	ice_stat_update40(hw, GLPRT_PTC127L(port), pf->stat_prev_loaded,
 			  &prev_ps->tx_size_127, &cur_ps->tx_size_127);
 
-	ice_stat_update40(hw, GLPRT_PTC255L(pf_id), pf->stat_prev_loaded,
+	ice_stat_update40(hw, GLPRT_PTC255L(port), pf->stat_prev_loaded,
 			  &prev_ps->tx_size_255, &cur_ps->tx_size_255);
 
-	ice_stat_update40(hw, GLPRT_PTC511L(pf_id), pf->stat_prev_loaded,
+	ice_stat_update40(hw, GLPRT_PTC511L(port), pf->stat_prev_loaded,
 			  &prev_ps->tx_size_511, &cur_ps->tx_size_511);
 
-	ice_stat_update40(hw, GLPRT_PTC1023L(pf_id), pf->stat_prev_loaded,
+	ice_stat_update40(hw, GLPRT_PTC1023L(port), pf->stat_prev_loaded,
 			  &prev_ps->tx_size_1023, &cur_ps->tx_size_1023);
 
-	ice_stat_update40(hw, GLPRT_PTC1522L(pf_id), pf->stat_prev_loaded,
+	ice_stat_update40(hw, GLPRT_PTC1522L(port), pf->stat_prev_loaded,
 			  &prev_ps->tx_size_1522, &cur_ps->tx_size_1522);
 
-	ice_stat_update40(hw, GLPRT_PTC9522L(pf_id), pf->stat_prev_loaded,
+	ice_stat_update40(hw, GLPRT_PTC9522L(port), pf->stat_prev_loaded,
 			  &prev_ps->tx_size_big, &cur_ps->tx_size_big);
 
-	ice_stat_update32(hw, GLPRT_LXONRXC(pf_id), pf->stat_prev_loaded,
+	ice_stat_update32(hw, GLPRT_LXONRXC(port), pf->stat_prev_loaded,
 			  &prev_ps->link_xon_rx, &cur_ps->link_xon_rx);
 
-	ice_stat_update32(hw, GLPRT_LXOFFRXC(pf_id), pf->stat_prev_loaded,
+	ice_stat_update32(hw, GLPRT_LXOFFRXC(port), pf->stat_prev_loaded,
 			  &prev_ps->link_xoff_rx, &cur_ps->link_xoff_rx);
 
-	ice_stat_update32(hw, GLPRT_LXONTXC(pf_id), pf->stat_prev_loaded,
+	ice_stat_update32(hw, GLPRT_LXONTXC(port), pf->stat_prev_loaded,
 			  &prev_ps->link_xon_tx, &cur_ps->link_xon_tx);
 
-	ice_stat_update32(hw, GLPRT_LXOFFTXC(pf_id), pf->stat_prev_loaded,
+	ice_stat_update32(hw, GLPRT_LXOFFTXC(port), pf->stat_prev_loaded,
 			  &prev_ps->link_xoff_tx, &cur_ps->link_xoff_tx);
 
 	ice_update_dcb_stats(pf);
 
-	ice_stat_update32(hw, GLPRT_CRCERRS(pf_id), pf->stat_prev_loaded,
+	ice_stat_update32(hw, GLPRT_CRCERRS(port), pf->stat_prev_loaded,
 			  &prev_ps->crc_errors, &cur_ps->crc_errors);
 
-	ice_stat_update32(hw, GLPRT_ILLERRC(pf_id), pf->stat_prev_loaded,
+	ice_stat_update32(hw, GLPRT_ILLERRC(port), pf->stat_prev_loaded,
 			  &prev_ps->illegal_bytes, &cur_ps->illegal_bytes);
 
-	ice_stat_update32(hw, GLPRT_MLFC(pf_id), pf->stat_prev_loaded,
+	ice_stat_update32(hw, GLPRT_MLFC(port), pf->stat_prev_loaded,
 			  &prev_ps->mac_local_faults,
 			  &cur_ps->mac_local_faults);
 
-	ice_stat_update32(hw, GLPRT_MRFC(pf_id), pf->stat_prev_loaded,
+	ice_stat_update32(hw, GLPRT_MRFC(port), pf->stat_prev_loaded,
 			  &prev_ps->mac_remote_faults,
 			  &cur_ps->mac_remote_faults);
 
-	ice_stat_update32(hw, GLPRT_RLEC(pf_id), pf->stat_prev_loaded,
+	ice_stat_update32(hw, GLPRT_RLEC(port), pf->stat_prev_loaded,
 			  &prev_ps->rx_len_errors, &cur_ps->rx_len_errors);
 
-	ice_stat_update32(hw, GLPRT_RUC(pf_id), pf->stat_prev_loaded,
+	ice_stat_update32(hw, GLPRT_RUC(port), pf->stat_prev_loaded,
 			  &prev_ps->rx_undersize, &cur_ps->rx_undersize);
 
-	ice_stat_update32(hw, GLPRT_RFC(pf_id), pf->stat_prev_loaded,
+	ice_stat_update32(hw, GLPRT_RFC(port), pf->stat_prev_loaded,
 			  &prev_ps->rx_fragments, &cur_ps->rx_fragments);
 
-	ice_stat_update32(hw, GLPRT_ROC(pf_id), pf->stat_prev_loaded,
+	ice_stat_update32(hw, GLPRT_ROC(port), pf->stat_prev_loaded,
 			  &prev_ps->rx_oversize, &cur_ps->rx_oversize);
 
-	ice_stat_update32(hw, GLPRT_RJC(pf_id), pf->stat_prev_loaded,
+	ice_stat_update32(hw, GLPRT_RJC(port), pf->stat_prev_loaded,
 			  &prev_ps->rx_jabber, &cur_ps->rx_jabber);
 
 	pf->stat_prev_loaded = true;
-- 
2.21.0


^ permalink raw reply related

* [net-next 09/15] ice: fix potential infinite loop
From: Jeff Kirsher @ 2019-08-27 16:38 UTC (permalink / raw)
  To: davem
  Cc: Colin Ian King, netdev, nhorman, sassmann, Andrew Bowers,
	Jeff Kirsher
In-Reply-To: <20190827163832.8362-1-jeffrey.t.kirsher@intel.com>

From: Colin Ian King <colin.king@canonical.com>

The loop counter of a for-loop is a u8 however this is being compared
to an int upper bound and this can lead to an infinite loop if the
upper bound is greater than 255 since the loop counter will wrap back
to zero. Fix this potential issue by making the loop counter an int.

Addresses-Coverity: ("Infinite loop")
Fixes: c7aeb4d1b9bf ("ice: Disable VFs until reset is completed")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 67cbebe1ff3f..0398c86226f0 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -477,7 +477,7 @@ static void
 ice_prepare_for_reset(struct ice_pf *pf)
 {
 	struct ice_hw *hw = &pf->hw;
-	u8 i;
+	int i;
 
 	/* already prepared for reset */
 	if (test_bit(__ICE_PREPARED_FOR_RESET, pf->state))
-- 
2.21.0


^ permalink raw reply related

* [net-next 03/15] ice: Sanitize ice_ena_vsi and ice_dis_vsi
From: Jeff Kirsher @ 2019-08-27 16:38 UTC (permalink / raw)
  To: davem
  Cc: Anirudh Venkataramanan, netdev, nhorman, sassmann, Andrew Bowers,
	Jeff Kirsher
In-Reply-To: <20190827163832.8362-1-jeffrey.t.kirsher@intel.com>

From: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>

1. ndo_open and ndo_stop are implemented by ice_open and ice_stop
   respectively. When enabling/disabling VSIs, just call
   ice_open/ice_stop instead of ndo_open/ndo_stop.

2. Rework logic around rtnl_lock/rtnl_unlock

3. In ice_ena_vsi, remove an unnecessary stack variable and return
   0 instead of err when __ICE_NEEDS_RESTART is not set.

Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_main.c | 24 +++++++++++------------
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 76a647cc2dbd..2f6125bfd991 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -436,13 +436,13 @@ static void ice_dis_vsi(struct ice_vsi *vsi, bool locked)
 
 	if (vsi->type == ICE_VSI_PF && vsi->netdev) {
 		if (netif_running(vsi->netdev)) {
-			if (!locked) {
+			if (!locked)
 				rtnl_lock();
-				vsi->netdev->netdev_ops->ndo_stop(vsi->netdev);
+
+			ice_stop(vsi->netdev);
+
+			if (!locked)
 				rtnl_unlock();
-			} else {
-				vsi->netdev->netdev_ops->ndo_stop(vsi->netdev);
-			}
 		} else {
 			ice_vsi_close(vsi);
 		}
@@ -3654,21 +3654,19 @@ static int ice_ena_vsi(struct ice_vsi *vsi, bool locked)
 	int err = 0;
 
 	if (!test_bit(__ICE_NEEDS_RESTART, vsi->state))
-		return err;
+		return 0;
 
 	clear_bit(__ICE_NEEDS_RESTART, vsi->state);
 
 	if (vsi->netdev && vsi->type == ICE_VSI_PF) {
-		struct net_device *netd = vsi->netdev;
-
 		if (netif_running(vsi->netdev)) {
-			if (locked) {
-				err = netd->netdev_ops->ndo_open(netd);
-			} else {
+			if (!locked)
 				rtnl_lock();
-				err = netd->netdev_ops->ndo_open(netd);
+
+			err = ice_open(vsi->netdev);
+
+			if (!locked)
 				rtnl_unlock();
-			}
 		}
 	}
 
-- 
2.21.0


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox