Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH bpf-next v4 2/6] bpf: refactor masks for ADJ_ROOM flags and encap validation
From: Nick Hudson @ 2026-04-16  7:55 UTC (permalink / raw)
  To: bpf, netdev, Willem de Bruijn, Martin KaFai Lau
  Cc: Nick Hudson, Max Tottenham, Anna Glasgall, Daniel Borkmann,
	Alexei Starovoitov, Andrii Nakryiko, Eduard Zingerman,
	Kumar Kartikeya Dwivedi, David S. Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, linux-kernel
In-Reply-To: <20260416075514.927101-1-nhudson@akamai.com>

Refactor the helper masks for bpf_skb_adjust_room() flags to simplify
validation logic and introduce:

- BPF_F_ADJ_ROOM_ENCAP_MASK
- BPF_F_ADJ_ROOM_DECAP_MASK

Refactor existing validation checks in bpf_skb_net_shrink()
and bpf_skb_adjust_room() to use the new masks (no behavior change).

This is in preparation for supporting the new decap flags.

Co-developed-by: Max Tottenham <mtottenh@akamai.com>
Signed-off-by: Max Tottenham <mtottenh@akamai.com>
Co-developed-by: Anna Glasgall <aglasgal@akamai.com>
Signed-off-by: Anna Glasgall <aglasgal@akamai.com>
Signed-off-by: Nick Hudson <nhudson@akamai.com>
---
---
 net/core/filter.c | 38 +++++++++++++++++++++-----------------
 1 file changed, 21 insertions(+), 17 deletions(-)

diff --git a/net/core/filter.c b/net/core/filter.c
index 78b548158fb0..4e860da4381d 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3490,14 +3490,19 @@ static u32 bpf_skb_net_base_len(const struct sk_buff *skb)
 #define BPF_F_ADJ_ROOM_DECAP_L3_MASK	(BPF_F_ADJ_ROOM_DECAP_L3_IPV4 | \
 					 BPF_F_ADJ_ROOM_DECAP_L3_IPV6)
 
-#define BPF_F_ADJ_ROOM_MASK		(BPF_F_ADJ_ROOM_FIXED_GSO | \
-					 BPF_F_ADJ_ROOM_ENCAP_L3_MASK | \
+#define BPF_F_ADJ_ROOM_ENCAP_MASK	(BPF_F_ADJ_ROOM_ENCAP_L3_MASK | \
 					 BPF_F_ADJ_ROOM_ENCAP_L4_GRE | \
 					 BPF_F_ADJ_ROOM_ENCAP_L4_UDP | \
 					 BPF_F_ADJ_ROOM_ENCAP_L2_ETH | \
 					 BPF_F_ADJ_ROOM_ENCAP_L2( \
-					  BPF_ADJ_ROOM_ENCAP_L2_MASK) | \
-					 BPF_F_ADJ_ROOM_DECAP_L3_MASK)
+					  BPF_ADJ_ROOM_ENCAP_L2_MASK))
+
+#define BPF_F_ADJ_ROOM_DECAP_MASK	(BPF_F_ADJ_ROOM_DECAP_L3_MASK)
+
+#define BPF_F_ADJ_ROOM_MASK		(BPF_F_ADJ_ROOM_FIXED_GSO | \
+					 BPF_F_ADJ_ROOM_ENCAP_MASK | \
+					 BPF_F_ADJ_ROOM_DECAP_MASK | \
+					 BPF_F_ADJ_ROOM_NO_CSUM_RESET)
 
 static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
 			    u64 flags)
@@ -3618,8 +3623,8 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
 {
 	int ret;
 
-	if (unlikely(flags & ~(BPF_F_ADJ_ROOM_FIXED_GSO |
-			       BPF_F_ADJ_ROOM_DECAP_L3_MASK |
+	if (unlikely(flags & ~(BPF_F_ADJ_ROOM_DECAP_MASK |
+			       BPF_F_ADJ_ROOM_FIXED_GSO |
 			       BPF_F_ADJ_ROOM_NO_CSUM_RESET)))
 		return -EINVAL;
 
@@ -3715,8 +3720,7 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
 	u32 off;
 	int ret;
 
-	if (unlikely(flags & ~(BPF_F_ADJ_ROOM_MASK |
-			       BPF_F_ADJ_ROOM_NO_CSUM_RESET)))
+	if (unlikely(flags & ~BPF_F_ADJ_ROOM_MASK))
 		return -EINVAL;
 	if (unlikely(len_diff_abs > 0xfffU))
 		return -EFAULT;
@@ -3735,20 +3739,20 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
 		return -ENOTSUPP;
 	}
 
-	if (flags & BPF_F_ADJ_ROOM_DECAP_L3_MASK) {
+	if (flags & BPF_F_ADJ_ROOM_DECAP_MASK) {
 		if (!shrink)
 			return -EINVAL;
 
-		switch (flags & BPF_F_ADJ_ROOM_DECAP_L3_MASK) {
-		case BPF_F_ADJ_ROOM_DECAP_L3_IPV4:
+		/* Reject mutually exclusive decap flag pairs. */
+		if ((flags & BPF_F_ADJ_ROOM_DECAP_L3_MASK) ==
+		    BPF_F_ADJ_ROOM_DECAP_L3_MASK)
+			return -EINVAL;
+
+		if (flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV4)
 			len_min = sizeof(struct iphdr);
-			break;
-		case BPF_F_ADJ_ROOM_DECAP_L3_IPV6:
+
+		if (flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV6)
 			len_min = sizeof(struct ipv6hdr);
-			break;
-		default:
-			return -EINVAL;
-		}
 	}
 
 	len_cur = skb->len - skb_network_offset(skb);
-- 
2.34.1


^ permalink raw reply related

* [PATCH bpf-next v4 5/6] bpf: clear decap tunnel GSO state in skb_adjust_room
From: Nick Hudson @ 2026-04-16  7:55 UTC (permalink / raw)
  To: bpf, netdev, Willem de Bruijn, Martin KaFai Lau
  Cc: Nick Hudson, Max Tottenham, Anna Glasgall, Daniel Borkmann,
	Alexei Starovoitov, Andrii Nakryiko, Eduard Zingerman,
	Kumar Kartikeya Dwivedi, David S. Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, linux-kernel
In-Reply-To: <20260416075514.927101-1-nhudson@akamai.com>

On shrink in bpf_skb_adjust_room(), clear tunnel-specific GSO flags
according to the decapsulation flags:

- BPF_F_ADJ_ROOM_DECAP_L4_UDP clears SKB_GSO_UDP_TUNNEL{,_CSUM}
- BPF_F_ADJ_ROOM_DECAP_L4_GRE clears SKB_GSO_GRE{,_CSUM}
- BPF_F_ADJ_ROOM_DECAP_IPXIP4 clears SKB_GSO_IPXIP4
- BPF_F_ADJ_ROOM_DECAP_IPXIP6 clears SKB_GSO_IPXIP6

When all tunnel-related GSO bits are cleared, also clear
skb->encapsulation.

Handle the ESP inside a UDP tunnel case where encapsulation should remain
set.

If UDP decap is performed, clear encap_hdr_csum and remcsum_offload.

Co-developed-by: Max Tottenham <mtottenh@akamai.com>
Signed-off-by: Max Tottenham <mtottenh@akamai.com>
Co-developed-by: Anna Glasgall <aglasgal@akamai.com>
Signed-off-by: Anna Glasgall <aglasgal@akamai.com>
Signed-off-by: Nick Hudson <nhudson@akamai.com>
---
 net/core/filter.c | 38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/net/core/filter.c b/net/core/filter.c
index 7f8d43420afb..e113ae2f3f14 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3667,6 +3667,44 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
 		if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
 			skb_increase_gso_size(shinfo, len_diff);
 
+		/* Selective GSO flag clearing based on decap type.
+		 * Only clear the flags for the tunnel layer being removed.
+		 */
+		if ((flags & BPF_F_ADJ_ROOM_DECAP_L4_UDP) &&
+		    (shinfo->gso_type & (SKB_GSO_UDP_TUNNEL |
+					 SKB_GSO_UDP_TUNNEL_CSUM)))
+			shinfo->gso_type &= ~(SKB_GSO_UDP_TUNNEL |
+					      SKB_GSO_UDP_TUNNEL_CSUM);
+		if ((flags & BPF_F_ADJ_ROOM_DECAP_L4_GRE) &&
+		    (shinfo->gso_type & (SKB_GSO_GRE | SKB_GSO_GRE_CSUM)))
+			shinfo->gso_type &= ~(SKB_GSO_GRE |
+					      SKB_GSO_GRE_CSUM);
+		if ((flags & BPF_F_ADJ_ROOM_DECAP_IPXIP4) &&
+		    (shinfo->gso_type & SKB_GSO_IPXIP4))
+			shinfo->gso_type &= ~SKB_GSO_IPXIP4;
+		if ((flags & BPF_F_ADJ_ROOM_DECAP_IPXIP6) &&
+		    (shinfo->gso_type & SKB_GSO_IPXIP6))
+			shinfo->gso_type &= ~SKB_GSO_IPXIP6;
+
+		/* Clear encapsulation flag only when no tunnel GSO flags remain */
+		if (flags & (BPF_F_ADJ_ROOM_DECAP_L4_MASK |
+			     BPF_F_ADJ_ROOM_DECAP_IPXIP_MASK)) {
+			if (!(shinfo->gso_type & (SKB_GSO_UDP_TUNNEL |
+						  SKB_GSO_UDP_TUNNEL_CSUM |
+						  SKB_GSO_GRE |
+						  SKB_GSO_GRE_CSUM |
+						  SKB_GSO_IPXIP4 |
+						  SKB_GSO_IPXIP6 |
+						  SKB_GSO_ESP)))
+				if (skb->encapsulation)
+					skb->encapsulation = 0;
+
+			if (flags & BPF_F_ADJ_ROOM_DECAP_L4_UDP) {
+				skb->encap_hdr_csum = 0;
+				skb->remcsum_offload = 0;
+			}
+		}
+
 		/* Header must be checked, and gso_segs recomputed. */
 		shinfo->gso_type |= SKB_GSO_DODGY;
 		shinfo->gso_segs = 0;
-- 
2.34.1


^ permalink raw reply related

* Re: [PATCH v12 net-next 00/11] nbl driver for Nebulamatrix NICs
From: Paolo Abeni @ 2026-04-16  8:01 UTC (permalink / raw)
  To: illusion.wang, netdev; +Cc: open list
In-Reply-To: <20260415033608.2438-1-illusion.wang@nebula-matrix.com>

On 4/15/26 10:29 AM, illusion.wang wrote:
> This patch series represents the first phase. We plan to integrate it in
> two phases: the first phase covers mailbox and chip configuration,
> while the second phase involves net dev configuration.
> Together, they will provide basic PF-based Ethernet port transmission and
> reception capabilities.
> 
> After that, we will consider other features, such as ethtool support,
> flow management, adminq messaging, VF support, debugfs support, etc.
> 
> changes v11->v12
> Link to v10:https://lore.kernel.org/netdev/20260408093739.56001-1-illusion.wang@nebula-matrix.com/
> AI review issues
> changes v10->v11
> Link to v10:https://lore.kernel.org/netdev/20260401022318.28550-1-illusion.wang@nebula-matrix.com/
> 1.Issues found by Mohsin
> 2.AI review issues
> changes v9->v10
> Link to v9:https://lore.kernel.org/netdev/20260325040048.2313-1-illusion.wang@nebula-matrix.com/
> 1.Issues found by Jakub
> 2.AI review issue
> changes v8->v9
> Link to v8:https://lore.kernel.org/netdev/20260317034533.5600-1-illusion.wang@nebula-matrix.com/
> 1.Issues found by Jakub
> 2.AI review issue
> Changes v7→v8
> Link to v7:https://lore.kernel.org/netdev/20260310120959.22015-1-illusion.wang@nebula-matrix.com/
> 1.Issues found by Paolo
> Changes v6->v7
> Link to v6:https://lore.kernel.org/netdev/20260306033451.5196-1-illusion.wang@nebula-matrix.com/
> 1.Issue found by Jakub
> 2.AI review issue
> Changes v5->v6
> Link to V5:https://lore.kernel.org/netdev/20260226073840.3222-1-illusion.wang@nebula-matrix.com/
> 1.put all standard linux includes files the .c file which needs it & others
> --Andrew
> 2.AI review issue
> Changes v4->v5
> Link to V4:https://lore.kernel.org/netdev/20260206021608.85381-1-illusion.wang@nebula-matrix.com/
> 1.change nbl_core to nbl & change ** pointers to *pointers & others
> --Andrew
> 2.AI review issue
> Changes v3->v4
> Link to v3: https://lore.kernel.org/netdev/20260123011804.31263-1-illusion.wang@nebula-matrix.com
> 1.cut down to part of a mini driver(mailbox and chip init)
> --Jakub Kicinski Simon Horman(some sort of staged approached)
> 2.modify issues found by ai.
> 3. Reverse Christmas tree/nbl_err/devm_kfree/remove some macros/
> void type to real type/others
> --Andrew Lunn
> 4.change deprecated pci_enable_msix_range to pci_alloc_irq_vectors
> 5.delete service layer
> 6.the style of kconfig---Randy Dunlap
> 7.add to Documentation/networking/device_drivers/ethernet/index.rst
> --Simon Horman
> Changes v2 →v3
> Link to v2: https://lore.kernel.org/netdev/20260109100146.63569-1-illusion.wang@nebula-matrix.com/
> 1.cut down to a mini driver:
>     delete vf support
>     use promisc mode to cut down flow management
>     drop patch15 in v2
>     delete adminq msg
>     delete abnormal handling
>     delete some unimportant interfaces
> 2.modify issues found by ai review
> Changes v1->v2
> Link to v1: https://lore.kernel.org/netdev/20251223035113.31122-1-illusion.wang@nebula-matrix.com/
> 1.Format Issues and Compilation Issues
> - Paolo Abeni
> 2.add sysfs patch and drop coexisting patch
> - Andrew Lunn
> 3.delete some unimportant ndo operations
> 4.add machine generated headers patch
> 5.Modify the issues found in patch1-2 and apply the same fixes to other
> patches
> 6.modify issues found by nipa

## Form letter - net-next-closed

We have already submitted our pull request with net-next material for
v7.1, and therefore net-next is closed for new drivers, features, code
refactoring and optimizations. We are currently accepting bug fixes only.

Please repost when net-next reopens after Apr 26th.

RFC patches sent for review only are obviously welcome at any time.

See:
https://www.kernel.org/doc/html/next/process/maintainer-netdev.html#development-cycle


^ permalink raw reply

* Re: [PATCH net-next v7 0/7] net: bcmgenet: add XDP support
From: Paolo Abeni @ 2026-04-16  8:06 UTC (permalink / raw)
  To: Nicolai Buchwitz, netdev
  Cc: Justin Chen, Simon Horman, Mohsin Bashir, Doug Berger,
	Florian Fainelli, Broadcom internal kernel review list,
	Andrew Lunn, Eric Dumazet, Alexei Starovoitov, Daniel Borkmann,
	David S. Miller, Jakub Kicinski, Jesper Dangaard Brouer,
	John Fastabend, Stanislav Fomichev, bpf
In-Reply-To: <20260416054743.1289191-1-nb@tipi-net.de>

On 4/16/26 7:47 AM, Nicolai Buchwitz wrote:
> Add XDP support to the bcmgenet driver, covering XDP_PASS, XDP_DROP,
> XDP_TX, XDP_REDIRECT, and ndo_xdp_xmit.
> 
> The first patch converts the RX path from the existing kmalloc-based
> allocation to page_pool, which is a prerequisite for XDP. The remaining
> patches incrementally add XDP functionality and per-action statistics.
> 
> Tested on Raspberry Pi CM4 (BCM2711, bcmgenet, 1Gbps link):
> - XDP_PASS: 943 Mbit/s TX, 935 Mbit/s RX (no regression vs baseline)
> - XDP_PASS latency: 0.164ms avg, 0% packet loss
> - XDP_DROP: all inbound traffic blocked as expected
> - XDP_TX: TX counter increments (packet reflection working)
> - Link flap with XDP attached: no errors
> - Program swap under iperf3 load: no errors
> - Upstream XDP selftests (xdp.py): pass_sb, drop_sb, tx_sb passing
> - XDP-based EtherCAT master (~37 kHz cycle rate, all packet processing
>   in BPF/XDP), stable over multiple days

## Form letter - net-next-closed

We have already submitted our pull request with net-next material for
v7.1, and therefore net-next is closed for new drivers, features, code
refactoring and optimizations. We are currently accepting bug fixes only.

Please repost when net-next reopens after Apr 26th.

RFC patches sent for review only are obviously welcome at any time.

See:
https://www.kernel.org/doc/html/next/process/maintainer-netdev.html#development-cycle


^ permalink raw reply

* Re: [PATCHv3] selftests: Use ktap helpers for runner.sh
From: Qingfang Deng @ 2026-04-16  8:07 UTC (permalink / raw)
  To: Hangbin Liu; +Cc: Brendan Jackman, Shuah Khan, linux-kselftest, netdev

Hi, Hangbin

This patch broke selftests run with `make -C tools/testing/selftests` as 
make uses /bin/sh by default:

/bin/sh: 5: 
/home/qf/linux-next/tools/testing/selftests/kselftest/runner.sh: Bad 
substitution

Add `SHELL := /bin/bash` to the start of lib.mk to fix this.

^ permalink raw reply

* Re: [PATCH net 1/3] octeontx2-af: npc: cn20k: Handle npc_mcam_idx_2_key_type() failures
From: Dan Carpenter @ 2026-04-16  8:08 UTC (permalink / raw)
  To: Ratheesh Kannoth
  Cc: netdev, linux-kernel, sgoutham, davem, edumazet, kuba, pabeni,
	andrew+netdev, dan.carpenter
In-Reply-To: <20260416035352.333808-2-rkannoth@marvell.com>

On Thu, Apr 16, 2026 at 09:23:50AM +0530, Ratheesh Kannoth wrote:
> npc_mcam_idx_2_key_type() can fail; ignoring its return value left
> kw_type unchecked in MCAM enable, configure, copy, and read paths.
> Return early on error so we do not program or interpret MCAM state
> with an invalid key type.
> 
> CC: Dan Carpenter <error27@gmail.com>
> Fixes: 6d1e70282f76 ("octeontx2-af: npc: cn20k: Use common APIs")
> Link: https://lore.kernel.org/netdev/adiQJvuKlEhq2ILx@stanley.mountain/
> Signed-off-by: Ratheesh Kannoth <rkannoth@marvell.com>

Thanks.  That silences the uninitialized variable warning.

regards,
dan carpenter


^ permalink raw reply

* Re: [PATCH net,v2 1/1] net: stmmac: Update default_an_inband before passing value to phylink_config
From: Paolo Abeni @ 2026-04-16  8:12 UTC (permalink / raw)
  To: KhaiWenTan, andrew+netdev, davem, edumazet, kuba, mcoquelin.stm32,
	alexandre.torgue, rmk+kernel, maxime.chevallier, ovidiu.panait.rb,
	vladimir.oltean
  Cc: netdev, linux-stm32, linux-arm-kernel, linux-kernel,
	yoong.siang.song, hong.aun.looi, khai.wen.tan
In-Reply-To: <20260413020339.68426-1-khai.wen.tan@linux.intel.com>

On 4/13/26 4:03 AM, KhaiWenTan wrote:
> get_interfaces() will update both the plat->phy_interfaces and
> mdio_bus_data->default_an_inband based on reading a SERDES register. As
> get_interfaces() will be called after default_an_inband had already been
> read, dwmac-intel regressed as a result with incorrect default_an_inband
> value in phylink_config.
> 
> Therefore, we moved the priv->plat->get_interfaces() to be executed first
> before assigning mdio_bus_data->default_an_inband to
> config->default_an_inband to ensure default_an_inband is in correct value.
> 
> Fixes: d3836052fe09 ("net: stmmac: intel: convert speed_mode_2500() to get_interfaces()")
> Signed-off-by: KhaiWenTan <khai.wen.tan@linux.intel.com>

Since Jakub sent the net-next PR and forwarded the trees, this patch
does not apply anymore. Please rebase and repost. You can retain
Russell's reviewed-by tag.

Thanks,

Paolo


^ permalink raw reply

* Re: [PATCH net v2] RDS: Fix memory leak in rds_rdma_extra_size()
From: Paolo Abeni @ 2026-04-16  8:20 UTC (permalink / raw)
  To: Xiaobo Liu, Allison Henderson, David S. Miller
  Cc: Eric Dumazet, Jakub Kicinski, Simon Horman, netdev, linux-rdma,
	rds-devel, linux-kernel
In-Reply-To: <20260413070005.15272-1-cppcoffee@gmail.com>

On 4/13/26 9:00 AM, Xiaobo Liu wrote:
> @@ -595,11 +600,20 @@ int rds_rdma_extra_size(struct rds_rdma_args *args,
>  		 * nr_pages for one entry is limited to (UINT_MAX>>PAGE_SHIFT)+1,
>  		 * so tot_pages cannot overflow without first going negative.
>  		 */
> -		if (tot_pages < 0)
> -			return -EINVAL;
> +		if (tot_pages < 0) {
> +			ret = -EINVAL;
> +			goto out;
> +		}
>  	}
>  
> -	return tot_pages * sizeof(struct scatterlist);
> +	ret = tot_pages * sizeof(struct scatterlist);
> +
> +out:
> +	if (ret < 0) {
> +		kfree(iov->iov);
> +		iov->iov = NULL;

Is this really needed?!? AFAICS rds_rdma_extra_size() is invoked only
via: rds_sendmsg() -> rds_rm_size() -> rds_rdma_extra_size(), and the
rds_sendmsg() error path already frees any non NULL iov.

/P


^ permalink raw reply

* Re: [PATCH net,v2 1/1] net: stmmac: Update default_an_inband before passing value to phylink_config
From: KhaiWenTan @ 2026-04-16  8:22 UTC (permalink / raw)
  To: Paolo Abeni, andrew+netdev, davem, edumazet, kuba,
	mcoquelin.stm32, alexandre.torgue, rmk+kernel, maxime.chevallier,
	ovidiu.panait.rb, vladimir.oltean
  Cc: netdev, linux-stm32, linux-arm-kernel, linux-kernel,
	yoong.siang.song, hong.aun.looi, khai.wen.tan
In-Reply-To: <72d1b0b7-c8df-463e-a2d9-bf5ff04ba33c@redhat.com>

On 4/16/2026 4:12 PM, Paolo Abeni wrote:

> On 4/13/26 4:03 AM, KhaiWenTan wrote:
>> get_interfaces() will update both the plat->phy_interfaces and
>> mdio_bus_data->default_an_inband based on reading a SERDES register. As
>> get_interfaces() will be called after default_an_inband had already been
>> read, dwmac-intel regressed as a result with incorrect default_an_inband
>> value in phylink_config.
>>
>> Therefore, we moved the priv->plat->get_interfaces() to be executed first
>> before assigning mdio_bus_data->default_an_inband to
>> config->default_an_inband to ensure default_an_inband is in correct value.
>>
>> Fixes: d3836052fe09 ("net: stmmac: intel: convert speed_mode_2500() to get_interfaces()")
>> Signed-off-by: KhaiWenTan <khai.wen.tan@linux.intel.com>
> Since Jakub sent the net-next PR and forwarded the trees, this patch
> does not apply anymore. Please rebase and repost. You can retain
> Russell's reviewed-by tag.
>
> Thanks,
>
> Paolo

Thank you Paolo, will be rebasing the patch and update a v3.


^ permalink raw reply

* RE: [Intel-wired-lan] [PATCH net] ice: fix VF queue configuration with low MTU values
From: Romanowski, Rafal @ 2026-04-16  8:23 UTC (permalink / raw)
  To: Paul Menzel, Jose Ignacio Tornos Martinez
  Cc: intel-wired-lan@lists.osuosl.org, netdev@vger.kernel.org,
	Nguyen, Anthony L, Kitszel, Przemyslaw, Andrew Lunn,
	David S . Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
	Keller, Jacob E, Loktionov, Aleksandr, Michal Swiatkowski,
	Ertman, David M, Michal Kubiak, stable@vger.kernel.org
In-Reply-To: <22f2d325-fc2a-4801-91b5-b64fac4d86e9@molgen.mpg.de>

> -----Original Message-----
> From: Intel-wired-lan <intel-wired-lan-bounces@osuosl.org> On Behalf Of Paul
> Menzel
> Sent: Wednesday, April 8, 2026 8:25 AM
> To: Jose Ignacio Tornos Martinez <jtornosm@redhat.com>
> Cc: intel-wired-lan@lists.osuosl.org; netdev@vger.kernel.org; Nguyen, Anthony L
> <anthony.l.nguyen@intel.com>; Kitszel, Przemyslaw
> <przemyslaw.kitszel@intel.com>; Andrew Lunn <andrew+netdev@lunn.ch>;
> David S . Miller <davem@davemloft.net>; Eric Dumazet
> <edumazet@google.com>; Jakub Kicinski <kuba@kernel.org>; Paolo Abeni
> <pabeni@redhat.com>; Keller, Jacob E <jacob.e.keller@intel.com>; Loktionov,
> Aleksandr <aleksandr.loktionov@intel.com>; Michal Swiatkowski
> <michal.swiatkowski@linux.intel.com>; Ertman, David M
> <david.m.ertman@intel.com>; Michal Kubiak <michal.kubiak@intel.com>;
> stable@vger.kernel.org
> Subject: Re: [Intel-wired-lan] [PATCH net] ice: fix VF queue configuration with low
> MTU values
> 
> Dear Jose,
> 
> 
> Thank you for the patch.
> 
> Am 06.04.26 um 16:56 schrieb Jose Ignacio Tornos Martinez:
> > The ice driver's VF queue configuration validation rejects
> > databuffer_size values below 1024 bytes, which prevents VFs from using
> > MTU values below 871 bytes.
> >
> > The iavf driver calculates databuffer_size based on the MTU using:
> >    databuffer_size = ALIGN(MTU + LIBETH_RX_LL_LEN, 128)
> >
> > where LIBETH_RX_LL_LEN = 26 (ETH_HLEN + 2*VLAN_HLEN + ETH_FCS_LEN).
> >
> > For MTU values below 871:
> >    MTU 870: 870 + 26 = 896, aligned to 128 = 896 (< 1024, rejected)
> >    MTU 871: 871 + 26 = 897, aligned to 128 = 1024 (>= 1024, accepted)
> >
> > The 1024-byte minimum seems unnecessarily restrictive, because the
> > hardware supports databuffer_size as low as 128 bytes (the alignment
> > boundary), which should allow MTU values down to the standard minimum of
> 68 bytes.
> >
> > I haven't found the reason why the limit was configured in the commit
> > 9c7dd7566d18 ("ice: add validation in OP_CONFIG_VSI_QUEUES VF
> > message"), so with no more information and since it is working, change
> > the minimum databuffer_size validation from 1024 to 128 bytes to allow
> > standard low MTU values while still preventing invalid configurations.
> 
> Should you resend, having the reproducer script would be nice to have.
> 
> > Fixes: 9c7dd7566d18 ("ice: add validation in OP_CONFIG_VSI_QUEUES VF
> > message")
> > cc: stable@vger.kernel.org
> > Signed-off-by: Jose Ignacio Tornos Martinez <jtornosm@redhat.com>
> > ---
> >   drivers/net/ethernet/intel/ice/virt/queues.c | 2 +-
> >   1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/drivers/net/ethernet/intel/ice/virt/queues.c
> > b/drivers/net/ethernet/intel/ice/virt/queues.c
> > index f73d5a3e83d4..31be2f76181c 100644
> > --- a/drivers/net/ethernet/intel/ice/virt/queues.c
> > +++ b/drivers/net/ethernet/intel/ice/virt/queues.c
> > @@ -840,7 +840,7 @@ int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg)
> >
> >   			if (qpi->rxq.databuffer_size != 0 &&
> >   			    (qpi->rxq.databuffer_size > ((16 * 1024) - 128) ||
> > -			     qpi->rxq.databuffer_size < 1024))
> > +			     qpi->rxq.databuffer_size < 128))
> >   				goto error_param;
> >
> >   			ring->rx_buf_len = qpi->rxq.databuffer_size;
> 
> Either way:
> 
> Reviewed-by: Paul Menzel <pmenzel@molgen.mpg.de>
> 
> 
> Kind regards,
> 
> Paul


Tested-by: Rafal Romanowski <rafal.romanowski@intel.com>


^ permalink raw reply

* [PATCH bpf-next v4 6/6] selftests/bpf: tc_tunnel validate decap GSO state
From: Nick Hudson @ 2026-04-16  7:55 UTC (permalink / raw)
  To: bpf, netdev, Willem de Bruijn, Martin KaFai Lau
  Cc: Nick Hudson, Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
	Eduard Zingerman, Kumar Kartikeya Dwivedi, Shuah Khan,
	linux-kselftest, linux-kernel
In-Reply-To: <20260416075514.927101-1-nhudson@akamai.com>

Require BPF_F_ADJ_ROOM_DECAP_L4_UDP and BPF_F_ADJ_ROOM_DECAP_L4_GRE enum
values at runtime using CO-RE enum existence checks so missing kernel
support fails fast instead of silently proceeding.

After bpf_skb_adjust_room() decapsulation, inspect skb_shared_info and
sk_buff state for GSO packets and assert that the expected tunnel GSO
bits are cleared and encapsulation matches the remaining tunnel state.

Signed-off-by: Nick Hudson <nhudson@akamai.com>
---
 .../selftests/bpf/progs/test_tc_tunnel.c      | 58 +++++++++++++++++++
 1 file changed, 58 insertions(+)

diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
index 7376df405a6b..74dfb694a210 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
@@ -6,6 +6,7 @@
 
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_endian.h>
+#include <bpf/bpf_core_read.h>
 #include "bpf_tracing_net.h"
 #include "bpf_compiler.h"
 
@@ -37,6 +38,23 @@ struct vxlanhdr___local {
 
 #define	EXTPROTO_VXLAN	0x1
 
+#define SKB_GSO_UDP_TUNNEL_MASK	(SKB_GSO_UDP_TUNNEL |			\
+				 SKB_GSO_UDP_TUNNEL_CSUM |		\
+				 SKB_GSO_TUNNEL_REMCSUM)
+
+#define SKB_GSO_TUNNEL_MASK		(SKB_GSO_UDP_TUNNEL_MASK |		\
+				 SKB_GSO_GRE |				\
+				 SKB_GSO_GRE_CSUM |			\
+				 SKB_GSO_IPXIP4 |			\
+				 SKB_GSO_IPXIP6 |			\
+				 SKB_GSO_ESP)
+
+#define BPF_F_ADJ_ROOM_DECAP_L4_MASK	(BPF_F_ADJ_ROOM_DECAP_L4_UDP |	\
+				 BPF_F_ADJ_ROOM_DECAP_L4_GRE)
+
+#define BPF_F_ADJ_ROOM_DECAP_IPXIP_MASK	(BPF_F_ADJ_ROOM_DECAP_IPXIP4 |	\
+					 BPF_F_ADJ_ROOM_DECAP_IPXIP6)
+
 #define	VXLAN_FLAGS     bpf_htonl(1<<27)
 #define	VNI_ID		1
 #define	VXLAN_VNI	bpf_htonl(VNI_ID << 8)
@@ -592,6 +610,8 @@ int __encap_ip6vxlan_eth(struct __sk_buff *skb)
 static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
 {
 	__u64 flags = BPF_F_ADJ_ROOM_FIXED_GSO;
+	struct sk_buff *kskb;
+	struct skb_shared_info *shinfo;
 	struct ipv6_opt_hdr ip6_opt_hdr;
 	struct gre_hdr greh;
 	struct udphdr udph;
@@ -621,6 +641,11 @@ static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
 		break;
 	case IPPROTO_GRE:
 		olen += sizeof(struct gre_hdr);
+		if (!bpf_core_enum_value_exists(enum bpf_adj_room_flags,
+						BPF_F_ADJ_ROOM_DECAP_L4_GRE))
+			return TC_ACT_SHOT;
+		flags |= BPF_F_ADJ_ROOM_DECAP_L4_GRE;
+
 		if (bpf_skb_load_bytes(skb, off + len, &greh, sizeof(greh)) < 0)
 			return TC_ACT_OK;
 		switch (bpf_ntohs(greh.protocol)) {
@@ -634,6 +659,10 @@ static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
 		break;
 	case IPPROTO_UDP:
 		olen += sizeof(struct udphdr);
+		if (!bpf_core_enum_value_exists(enum bpf_adj_room_flags,
+						BPF_F_ADJ_ROOM_DECAP_L4_UDP))
+			return TC_ACT_SHOT;
+		flags |= BPF_F_ADJ_ROOM_DECAP_L4_UDP;
 		if (bpf_skb_load_bytes(skb, off + len, &udph, sizeof(udph)) < 0)
 			return TC_ACT_OK;
 		switch (bpf_ntohs(udph.dest)) {
@@ -655,6 +684,35 @@ static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
 	if (bpf_skb_adjust_room(skb, -olen, BPF_ADJ_ROOM_MAC, flags))
 		return TC_ACT_SHOT;
 
+	kskb = bpf_cast_to_kern_ctx(skb);
+	shinfo = bpf_core_cast(kskb->head + kskb->end, struct skb_shared_info);
+	if (!shinfo->gso_size)
+		return TC_ACT_OK;
+
+	if ((flags & BPF_F_ADJ_ROOM_DECAP_L4_UDP) &&
+	    (shinfo->gso_type & SKB_GSO_UDP_TUNNEL_MASK))
+		return TC_ACT_SHOT;
+
+	if ((flags & BPF_F_ADJ_ROOM_DECAP_L4_GRE) &&
+	    (shinfo->gso_type & (SKB_GSO_GRE | SKB_GSO_GRE_CSUM)))
+		return TC_ACT_SHOT;
+
+	if ((flags & BPF_F_ADJ_ROOM_DECAP_IPXIP4) &&
+	    (shinfo->gso_type & SKB_GSO_IPXIP4))
+		return TC_ACT_SHOT;
+
+	if ((flags & BPF_F_ADJ_ROOM_DECAP_IPXIP6) &&
+	    (shinfo->gso_type & SKB_GSO_IPXIP6))
+		return TC_ACT_SHOT;
+
+	if (flags & (BPF_F_ADJ_ROOM_DECAP_L4_MASK |
+		     BPF_F_ADJ_ROOM_DECAP_IPXIP_MASK)) {
+		if ((shinfo->gso_type & SKB_GSO_TUNNEL_MASK) && !kskb->encapsulation)
+			return TC_ACT_SHOT;
+		if (!(shinfo->gso_type & SKB_GSO_TUNNEL_MASK) && kskb->encapsulation)
+			return TC_ACT_SHOT;
+	}
+
 	return TC_ACT_OK;
 }
 
-- 
2.34.1


^ permalink raw reply related

* [RFC net-next 1/3] ppp: use file.dead to check channel unregistration
From: Qingfang Deng @ 2026-04-16  8:26 UTC (permalink / raw)
  To: Andrew Lunn, David S. Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni, Qingfang Deng, Breno Leitao,
	Sebastian Andrzej Siewior, Kuniyuki Iwashima, Kees Cook,
	linux-ppp, netdev, linux-kernel
  Cc: Paul Mackerras, Jaco Kroon, James Carlson

Currently, ppp_generic checks if pch->chan is NULL to determine if a
channel is being unregistered. However, struct ppp_file already has a
'dead' flag for this purpose, which is used by ppp units and other
parts of the driver.

Switch all pch->chan NULL checks to pch->file.dead checks. In
ppp_unregister_channel, move the setting of pch->file.dead inside the
locked section to ensure atomicity and remove the now redundant
pch->chan = NULL assignment.

This is a preparation to eventually unify 'struct ppp_channel' and
'struct channel' into a single struct.

Signed-off-by: Qingfang Deng <qingfang.deng@linux.dev>
---
 drivers/net/ppp/ppp_generic.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index b0d3bc49c685..fd2889e374c9 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -790,7 +790,7 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 			down_read(&pch->chan_sem);
 			chan = pch->chan;
 			err = -ENOTTY;
-			if (chan && chan->ops->ioctl)
+			if (!pch->file.dead && chan->ops->ioctl)
 				err = chan->ops->ioctl(chan, cmd, arg);
 			up_read(&pch->chan_sem);
 		}
@@ -2167,7 +2167,7 @@ static void __ppp_channel_push(struct channel *pch, struct ppp *ppp)
 	struct sk_buff *skb;
 
 	spin_lock(&pch->downl);
-	if (pch->chan) {
+	if (!pch->file.dead) {
 		while (!skb_queue_empty(&pch->file.xq)) {
 			skb = skb_dequeue(&pch->file.xq);
 			if (!pch->chan->ops->start_xmit(pch->chan, skb)) {
@@ -2288,7 +2288,7 @@ static bool ppp_channel_bridge_input(struct channel *pch, struct sk_buff *skb)
 		goto out_rcu;
 
 	spin_lock_bh(&pchb->downl);
-	if (!pchb->chan) {
+	if (pchb->file.dead) {
 		/* channel got unregistered */
 		kfree_skb(skb);
 		goto outl;
@@ -3002,7 +3002,7 @@ ppp_unregister_channel(struct ppp_channel *chan)
 	ppp_disconnect_channel(pch);
 	down_write(&pch->chan_sem);
 	spin_lock_bh(&pch->downl);
-	pch->chan = NULL;
+	pch->file.dead = 1;
 	spin_unlock_bh(&pch->downl);
 	up_write(&pch->chan_sem);
 
@@ -3013,7 +3013,6 @@ ppp_unregister_channel(struct ppp_channel *chan)
 
 	ppp_unbridge_channels(pch);
 
-	pch->file.dead = 1;
 	wake_up_interruptible(&pch->file.rwait);
 
 	ppp_release_channel(pch);
@@ -3505,7 +3504,7 @@ ppp_connect_channel(struct channel *pch, int unit)
 
 	ppp_lock(ppp);
 	spin_lock_bh(&pch->downl);
-	if (!pch->chan) {
+	if (pch->file.dead) {
 		/* Don't connect unregistered channels */
 		spin_unlock_bh(&pch->downl);
 		ppp_unlock(ppp);
-- 
2.43.0


^ permalink raw reply related

* [RFC net-next 2/3] ppp: unify two channel structs
From: Qingfang Deng @ 2026-04-16  8:26 UTC (permalink / raw)
  To: Andrew Lunn, David S. Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni, Jiri Kosina, David Sterba, Greg Kroah-Hartman,
	Jiri Slaby, Mitchell Blank Jr, Simon Horman, James Chapman,
	Qingfang Deng, Kees Cook, Yue Haibing, Sebastian Andrzej Siewior,
	Taegu Ha, Kuniyuki Iwashima, Guillaume Nault, Eric Woudstra,
	Arnd Bergmann, Dawid Osuchowski, Breno Leitao, linux-ppp, netdev,
	linux-kernel, linux-serial
  Cc: Paul Mackerras, Jaco Kroon, James Carlson
In-Reply-To: <20260416082656.86963-1-qingfang.deng@linux.dev>

Historically, PPP maintained two separate structures for a channel:
'struct channel' was internal to ppp_generic.c, while 'struct ppp_channel'
was the public interface that drivers were required to embed. This
duplication was redundant and forced drivers to manage the lifecycle of
the public structure.

Unify these two structures into a single 'struct ppp_channel', which is
now internal to ppp_generic.c. Drivers now use a 'ppp_channel_conf'
structure to specify registration parameters and receive an opaque
pointer to the allocated channel.

Key changes:
- ppp_register_channel() and ppp_register_net_channel() now return
  a 'struct ppp_channel *' instead of taking a pointer to a driver-
  embedded structure.
- 'struct ppp_channel_ops' methods now take the driver's 'private'
  pointer directly as their first argument, simplifying driver logic.
- ppp_unregister_channel() now takes the opaque pointer.
- Multilink-specific fields are unified and handled via the new
  configuration structure.

This cleanup simplifies the driver interface and makes the channel
lifecycle management more robust by centralizing allocation in the PPP
generic layer.

Assisted-by: Gemini:gemini-3-flash
Signed-off-by: Qingfang Deng <qingfang.deng@linux.dev>
---
 drivers/net/ppp/ppp_async.c      |  51 +++++-----
 drivers/net/ppp/ppp_generic.c    | 161 +++++++++++++++----------------
 drivers/net/ppp/ppp_synctty.c    |  51 +++++-----
 drivers/net/ppp/pppoe.c          |  34 ++++---
 drivers/net/ppp/pppox.c          |   4 +-
 drivers/net/ppp/pptp.c           |  40 ++++----
 drivers/tty/ipwireless/network.c |  30 +++---
 include/linux/if_pppox.h         |   2 +-
 include/linux/ppp_channel.h      |  49 ++++++----
 net/atm/pppoatm.c                |  61 ++++++------
 net/l2tp/l2tp_ppp.c              |  34 ++++---
 11 files changed, 271 insertions(+), 246 deletions(-)

diff --git a/drivers/net/ppp/ppp_async.c b/drivers/net/ppp/ppp_async.c
index 93a7b0f6c4e7..faa299cc3db9 100644
--- a/drivers/net/ppp/ppp_async.c
+++ b/drivers/net/ppp/ppp_async.c
@@ -67,7 +67,7 @@ struct asyncppp {
 
 	refcount_t	refcnt;
 	struct completion dead;
-	struct ppp_channel chan;	/* interface to generic ppp layer */
+	struct ppp_channel *chan;	/* interface to generic ppp layer */
 	unsigned char	obuf[OBUFSIZE];
 };
 
@@ -95,12 +95,12 @@ MODULE_ALIAS_LDISC(N_PPP);
  * Prototypes.
  */
 static int ppp_async_encode(struct asyncppp *ap);
-static int ppp_async_send(struct ppp_channel *chan, struct sk_buff *skb);
+static int ppp_async_send(void *private, struct sk_buff *skb);
 static int ppp_async_push(struct asyncppp *ap);
 static void ppp_async_flush_output(struct asyncppp *ap);
 static void ppp_async_input(struct asyncppp *ap, const unsigned char *buf,
 			    const u8 *flags, int count);
-static int ppp_async_ioctl(struct ppp_channel *chan, unsigned int cmd,
+static int ppp_async_ioctl(void *private, unsigned int cmd,
 			   unsigned long arg);
 static void ppp_async_process(struct tasklet_struct *t);
 
@@ -155,9 +155,10 @@ static void ap_put(struct asyncppp *ap)
 static int
 ppp_asynctty_open(struct tty_struct *tty)
 {
+	struct ppp_channel_conf conf = {};
+	struct ppp_channel *chan;
 	struct asyncppp *ap;
 	int err;
-	int speed;
 
 	if (tty->ops->write == NULL)
 		return -EOPNOTSUPP;
@@ -185,14 +186,18 @@ ppp_asynctty_open(struct tty_struct *tty)
 	refcount_set(&ap->refcnt, 1);
 	init_completion(&ap->dead);
 
-	ap->chan.private = ap;
-	ap->chan.ops = &async_ops;
-	ap->chan.mtu = PPP_MRU;
-	speed = tty_get_baud_rate(tty);
-	ap->chan.speed = speed;
-	err = ppp_register_channel(&ap->chan);
-	if (err)
+	conf.private = ap;
+	conf.ops = &async_ops;
+#ifdef CONFIG_PPP_MULTILINK
+	conf.mtu = PPP_MRU;
+	conf.speed = tty_get_baud_rate(tty);
+#endif
+	chan = ppp_register_channel(&conf);
+	if (!chan) {
+		err = -ENOMEM;
 		goto out_free;
+	}
+	ap->chan = chan;
 
 	tty->disc_data = ap;
 	tty->receive_room = 65536;
@@ -235,7 +240,7 @@ ppp_asynctty_close(struct tty_struct *tty)
 		wait_for_completion(&ap->dead);
 	tasklet_kill(&ap->tsk);
 
-	ppp_unregister_channel(&ap->chan);
+	ppp_unregister_channel(ap->chan);
 	kfree_skb(ap->rpkt);
 	skb_queue_purge(&ap->rqueue);
 	kfree_skb(ap->tpkt);
@@ -293,14 +298,14 @@ ppp_asynctty_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg)
 	switch (cmd) {
 	case PPPIOCGCHAN:
 		err = -EFAULT;
-		if (put_user(ppp_channel_index(&ap->chan), p))
+		if (put_user(ppp_channel_index(ap->chan), p))
 			break;
 		err = 0;
 		break;
 
 	case PPPIOCGUNIT:
 		err = -EFAULT;
-		if (put_user(ppp_unit_number(&ap->chan), p))
+		if (put_user(ppp_unit_number(ap->chan), p))
 			break;
 		err = 0;
 		break;
@@ -391,9 +396,9 @@ ppp_async_init(void)
  * The following routines provide the PPP channel interface.
  */
 static int
-ppp_async_ioctl(struct ppp_channel *chan, unsigned int cmd, unsigned long arg)
+ppp_async_ioctl(void *private, unsigned int cmd, unsigned long arg)
 {
-	struct asyncppp *ap = chan->private;
+	struct asyncppp *ap = private;
 	void __user *argp = (void __user *)arg;
 	int __user *p = argp;
 	int err, val;
@@ -491,13 +496,13 @@ static void ppp_async_process(struct tasklet_struct *t)
 	/* process received packets */
 	while ((skb = skb_dequeue(&ap->rqueue)) != NULL) {
 		if (skb->cb[0])
-			ppp_input_error(&ap->chan);
-		ppp_input(&ap->chan, skb);
+			ppp_input_error(ap->chan);
+		ppp_input(ap->chan, skb);
 	}
 
 	/* try to push more stuff out */
 	if (test_bit(XMIT_WAKEUP, &ap->xmit_flags) && ppp_async_push(ap))
-		ppp_output_wakeup(&ap->chan);
+		ppp_output_wakeup(ap->chan);
 }
 
 /*
@@ -620,9 +625,9 @@ ppp_async_encode(struct asyncppp *ap)
  * at some later time.
  */
 static int
-ppp_async_send(struct ppp_channel *chan, struct sk_buff *skb)
+ppp_async_send(void *private, struct sk_buff *skb)
 {
-	struct asyncppp *ap = chan->private;
+	struct asyncppp *ap = private;
 
 	ppp_async_push(ap);
 
@@ -733,7 +738,7 @@ ppp_async_flush_output(struct asyncppp *ap)
 	}
 	spin_unlock_bh(&ap->xmit_lock);
 	if (done)
-		ppp_output_wakeup(&ap->chan);
+		ppp_output_wakeup(ap->chan);
 }
 
 /*
@@ -992,7 +997,7 @@ static void async_lcp_peek(struct asyncppp *ap, unsigned char *data,
 			if (inbound)
 				ap->mru = val;
 			else
-				ap->chan.mtu = val;
+				ppp_channel_update_mtu(ap->chan, val);
 			break;
 		case LCP_ASYNCMAP:
 			val = get_unaligned_be32(data + 2);
diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index fd2889e374c9..882709551bbd 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -106,7 +106,7 @@ struct ppp_file {
 #define PF_TO_X(pf, X)		container_of(pf, X, file)
 
 #define PF_TO_PPP(pf)		PF_TO_X(pf, struct ppp)
-#define PF_TO_CHANNEL(pf)	PF_TO_X(pf, struct channel)
+#define PF_TO_CHANNEL(pf)	PF_TO_X(pf, struct ppp_channel)
 
 struct ppp_xmit_recursion {
 	struct task_struct *owner;
@@ -172,10 +172,11 @@ struct ppp {
  * Private data structure for each channel.
  * This includes the data structure used for multilink.
  */
-struct channel {
+struct ppp_channel {
 	struct ppp_file	file;		/* stuff for read/write/poll */
 	struct list_head list;		/* link in all/new_channels list */
-	struct ppp_channel *chan;	/* public channel data structure */
+	const struct ppp_channel_ops *ops; /* operations for this channel */
+	void *private;			/* channel private data */
 	struct rw_semaphore chan_sem;	/* protects `chan' during chan ioctl */
 	spinlock_t	downl;		/* protects `chan', file.xq dequeue */
 	struct ppp __rcu *ppp;		/* ppp unit we're connected to */
@@ -183,11 +184,13 @@ struct channel {
 	netns_tracker	ns_tracker;
 	struct list_head clist;		/* link in list of channels per unit */
 	spinlock_t	upl;		/* protects `ppp' and 'bridge' */
-	struct channel __rcu *bridge;	/* "bridged" ppp channel */
+	struct ppp_channel __rcu *bridge;	/* "bridged" ppp channel */
+	bool direct_xmit;		/* no qdisc, xmit directly */
 #ifdef CONFIG_PPP_MULTILINK
 	u8		avail;		/* flag used in multilink stuff */
 	u8		had_frag;	/* >= 1 fragments have been sent */
 	u32		lastseq;	/* MP: last sequence # received */
+	int		mtu;		/* max transmit packet size */
 	int		speed;		/* speed of the corresponding ppp channel*/
 #endif /* CONFIG_PPP_MULTILINK */
 };
@@ -265,16 +268,16 @@ static int ppp_unattached_ioctl(struct net *net, struct ppp_file *pf,
 static void ppp_xmit_process(struct ppp *ppp, struct sk_buff *skb);
 static int ppp_prepare_tx_skb(struct ppp *ppp, struct sk_buff **pskb);
 static int ppp_push(struct ppp *ppp, struct sk_buff *skb);
-static void ppp_channel_push(struct channel *pch);
+static void ppp_channel_push(struct ppp_channel *pch);
 static void ppp_receive_frame(struct ppp *ppp, struct sk_buff *skb,
-			      struct channel *pch);
+			      struct ppp_channel *pch);
 static void ppp_receive_error(struct ppp *ppp);
 static void ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb);
 static struct sk_buff *ppp_decompress_frame(struct ppp *ppp,
 					    struct sk_buff *skb);
 #ifdef CONFIG_PPP_MULTILINK
 static void ppp_receive_mp_frame(struct ppp *ppp, struct sk_buff *skb,
-				struct channel *pch);
+				struct ppp_channel *pch);
 static void ppp_mp_insert(struct ppp *ppp, struct sk_buff *skb);
 static struct sk_buff *ppp_mp_reconstruct(struct ppp *ppp);
 static int ppp_mp_explode(struct ppp *ppp, struct sk_buff *skb);
@@ -288,10 +291,10 @@ static int ppp_create_interface(struct net *net, struct file *file, int *unit);
 static void init_ppp_file(struct ppp_file *pf, int kind);
 static void ppp_release_interface(struct ppp *ppp);
 static struct ppp *ppp_find_unit(struct ppp_net *pn, int unit);
-static struct channel *ppp_find_channel(struct ppp_net *pn, int unit);
-static int ppp_connect_channel(struct channel *pch, int unit);
-static int ppp_disconnect_channel(struct channel *pch);
-static void ppp_release_channel(struct channel *pch);
+static struct ppp_channel *ppp_find_channel(struct ppp_net *pn, int unit);
+static int ppp_connect_channel(struct ppp_channel *pch, int unit);
+static int ppp_disconnect_channel(struct ppp_channel *pch);
+static void ppp_release_channel(struct ppp_channel *pch);
 static int unit_get(struct idr *p, void *ptr, int min);
 static int unit_set(struct idr *p, void *ptr, int n);
 static void unit_put(struct idr *p, int n);
@@ -638,7 +641,7 @@ static struct bpf_prog *compat_ppp_get_filter(struct sock_fprog32 __user *p)
  * Once successfully bridged, each channel holds a reference on the other
  * to prevent it being freed while the bridge is extant.
  */
-static int ppp_bridge_channels(struct channel *pch, struct channel *pchb)
+static int ppp_bridge_channels(struct ppp_channel *pch, struct ppp_channel *pchb)
 {
 	spin_lock(&pch->upl);
 	if (rcu_dereference_protected(pch->ppp, lockdep_is_held(&pch->upl)) ||
@@ -676,9 +679,9 @@ static int ppp_bridge_channels(struct channel *pch, struct channel *pchb)
 	return -EALREADY;
 }
 
-static int ppp_unbridge_channels(struct channel *pch)
+static int ppp_unbridge_channels(struct ppp_channel *pch)
 {
-	struct channel *pchb, *pchbb;
+	struct ppp_channel *pchb, *pchbb;
 
 	spin_lock(&pch->upl);
 	pchb = rcu_dereference_protected(pch->bridge, lockdep_is_held(&pch->upl));
@@ -745,8 +748,7 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	}
 
 	if (pf->kind == CHANNEL) {
-		struct channel *pch, *pchb;
-		struct ppp_channel *chan;
+		struct ppp_channel *pch, *pchb;
 		struct ppp_net *pn;
 
 		pch = PF_TO_CHANNEL(pf);
@@ -788,10 +790,9 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 
 		default:
 			down_read(&pch->chan_sem);
-			chan = pch->chan;
 			err = -ENOTTY;
-			if (!pch->file.dead && chan->ops->ioctl)
-				err = chan->ops->ioctl(chan, cmd, arg);
+			if (!pch->file.dead && pch->ops->ioctl)
+				err = pch->ops->ioctl(pch->private, cmd, arg);
 			up_read(&pch->chan_sem);
 		}
 		goto out;
@@ -1044,7 +1045,7 @@ static int ppp_unattached_ioctl(struct net *net, struct ppp_file *pf,
 {
 	int unit, err = -EFAULT;
 	struct ppp *ppp;
-	struct channel *chan;
+	struct ppp_channel *chan;
 	struct ppp_net *pn;
 	int __user *p = (int __user *)arg;
 
@@ -1586,21 +1587,19 @@ static int ppp_fill_forward_path(struct net_device_path_ctx *ctx,
 				 struct net_device_path *path)
 {
 	struct ppp *ppp = netdev_priv(ctx->dev);
-	struct ppp_channel *chan;
-	struct channel *pch;
+	struct ppp_channel *pch;
 
 	if (ppp->flags & SC_MULTILINK)
 		return -EOPNOTSUPP;
 
-	pch = list_first_or_null_rcu(&ppp->channels, struct channel, clist);
+	pch = list_first_or_null_rcu(&ppp->channels, struct ppp_channel, clist);
 	if (!pch)
 		return -ENODEV;
 
-	chan = pch->chan;
-	if (!chan->ops->fill_forward_path)
+	if (!pch->ops->fill_forward_path)
 		return -EOPNOTSUPP;
 
-	return chan->ops->fill_forward_path(ctx, path, chan);
+	return pch->ops->fill_forward_path(ctx, path, pch->private);
 }
 
 static const struct net_device_ops ppp_netdev_ops = {
@@ -1901,7 +1900,6 @@ static int
 ppp_push(struct ppp *ppp, struct sk_buff *skb)
 {
 	struct list_head *list;
-	struct channel *pch;
 
 	list = &ppp->channels;
 	if (list_empty(list)) {
@@ -1911,15 +1909,14 @@ ppp_push(struct ppp *ppp, struct sk_buff *skb)
 	}
 
 	if ((ppp->flags & SC_MULTILINK) == 0) {
-		struct ppp_channel *chan;
+		struct ppp_channel *pch;
 		int ret;
 		/* not doing multilink: send it down the first channel */
 		list = list->next;
-		pch = list_entry(list, struct channel, clist);
+		pch = list_entry(list, struct ppp_channel, clist);
 
 		spin_lock(&pch->downl);
-		chan = pch->chan;
-		if (unlikely(!chan->direct_xmit && skb_linearize(skb))) {
+		if (unlikely(!pch->direct_xmit && skb_linearize(skb))) {
 			/* channel requires a linear skb but linearization
 			 * failed
 			 */
@@ -1928,7 +1925,7 @@ ppp_push(struct ppp *ppp, struct sk_buff *skb)
 			goto out;
 		}
 
-		ret = chan->ops->start_xmit(chan, skb);
+		ret = pch->ops->start_xmit(pch->private, skb);
 
 out:
 		spin_unlock(&pch->downl);
@@ -1967,9 +1964,8 @@ static int ppp_mp_explode(struct ppp *ppp, struct sk_buff *skb)
 	int totfree;
 	unsigned char *p, *q;
 	struct list_head *list;
-	struct channel *pch;
+	struct ppp_channel *pch;
 	struct sk_buff *frag;
-	struct ppp_channel *chan;
 
 	totspeed = 0; /*total bitrate of the bundle*/
 	nfree = 0; /* # channels which have no packet already queued */
@@ -1984,8 +1980,6 @@ static int ppp_mp_explode(struct ppp *ppp, struct sk_buff *skb)
 	list_for_each_entry(pch, &ppp->channels, clist) {
 		pch->avail = 1;
 		navail++;
-		pch->speed = pch->chan->speed;
-
 		if (skb_queue_empty(&pch->file.xq) || !pch->had_frag) {
 			if (pch->speed == 0)
 				nzero++;
@@ -2041,7 +2035,7 @@ static int ppp_mp_explode(struct ppp *ppp, struct sk_buff *skb)
 			i = 0;
 			continue;
 		}
-		pch = list_entry(list, struct channel, clist);
+		pch = list_entry(list, struct ppp_channel, clist);
 		++i;
 		if (!pch->avail)
 			continue;
@@ -2108,7 +2102,7 @@ static int ppp_mp_explode(struct ppp *ppp, struct sk_buff *skb)
 		 * MTU counts only the payload excluding the protocol field.
 		 * (RFC1661 Section 2)
 		 */
-		mtu = pch->chan->mtu - (hdrlen - 2);
+		mtu = pch->mtu - (hdrlen - 2);
 		if (mtu < 4)
 			mtu = 4;
 		if (flen > mtu)
@@ -2135,9 +2129,8 @@ static int ppp_mp_explode(struct ppp *ppp, struct sk_buff *skb)
 		memcpy(q + hdrlen, p, flen);
 
 		/* try to send it down the channel */
-		chan = pch->chan;
 		if (!skb_queue_empty(&pch->file.xq) ||
-			!chan->ops->start_xmit(chan, frag))
+			!pch->ops->start_xmit(pch->private, frag))
 			skb_queue_tail(&pch->file.xq, frag);
 		pch->had_frag = 1;
 		p += flen;
@@ -2162,7 +2155,7 @@ static int ppp_mp_explode(struct ppp *ppp, struct sk_buff *skb)
 #endif /* CONFIG_PPP_MULTILINK */
 
 /* Try to send data out on a channel */
-static void __ppp_channel_push(struct channel *pch, struct ppp *ppp)
+static void __ppp_channel_push(struct ppp_channel *pch, struct ppp *ppp)
 {
 	struct sk_buff *skb;
 
@@ -2170,7 +2163,7 @@ static void __ppp_channel_push(struct channel *pch, struct ppp *ppp)
 	if (!pch->file.dead) {
 		while (!skb_queue_empty(&pch->file.xq)) {
 			skb = skb_dequeue(&pch->file.xq);
-			if (!pch->chan->ops->start_xmit(pch->chan, skb)) {
+			if (!pch->ops->start_xmit(pch->private, skb)) {
 				/* put the packet back and try again later */
 				skb_queue_head(&pch->file.xq, skb);
 				break;
@@ -2192,7 +2185,7 @@ static void __ppp_channel_push(struct channel *pch, struct ppp *ppp)
 	}
 }
 
-static void ppp_channel_push(struct channel *pch)
+static void ppp_channel_push(struct ppp_channel *pch)
 {
 	struct ppp_xmit_recursion *xmit_recursion;
 	struct ppp *ppp;
@@ -2223,7 +2216,7 @@ struct ppp_mp_skb_parm {
 #define PPP_MP_CB(skb)	((struct ppp_mp_skb_parm *)((skb)->cb))
 
 static inline void
-ppp_do_recv(struct ppp *ppp, struct sk_buff *skb, struct channel *pch)
+ppp_do_recv(struct ppp *ppp, struct sk_buff *skb, struct ppp_channel *pch)
 {
 	ppp_recv_lock(ppp);
 	if (!ppp->closing)
@@ -2278,9 +2271,9 @@ static bool ppp_decompress_proto(struct sk_buff *skb)
  * If not, the caller must handle the frame by normal recv mechanisms.
  * Returns true if the frame is consumed, false otherwise.
  */
-static bool ppp_channel_bridge_input(struct channel *pch, struct sk_buff *skb)
+static bool ppp_channel_bridge_input(struct ppp_channel *pch, struct sk_buff *skb)
 {
-	struct channel *pchb;
+	struct ppp_channel *pchb;
 
 	rcu_read_lock();
 	pchb = rcu_dereference(pch->bridge);
@@ -2295,7 +2288,7 @@ static bool ppp_channel_bridge_input(struct channel *pch, struct sk_buff *skb)
 	}
 
 	skb_scrub_packet(skb, !net_eq(pch->chan_net, pchb->chan_net));
-	if (!pchb->chan->ops->start_xmit(pchb->chan, skb))
+	if (!pchb->ops->start_xmit(pchb->private, skb))
 		kfree_skb(skb);
 
 outl:
@@ -2308,9 +2301,8 @@ static bool ppp_channel_bridge_input(struct channel *pch, struct sk_buff *skb)
 }
 
 void
-ppp_input(struct ppp_channel *chan, struct sk_buff *skb)
+ppp_input(struct ppp_channel *pch, struct sk_buff *skb)
 {
-	struct channel *pch = chan->ppp;
 	struct ppp *ppp;
 	int proto;
 
@@ -2352,9 +2344,8 @@ ppp_input(struct ppp_channel *chan, struct sk_buff *skb)
 }
 
 void
-ppp_input_error(struct ppp_channel *chan)
+ppp_input_error(struct ppp_channel *pch)
 {
-	struct channel *pch = chan->ppp;
 	struct ppp *ppp;
 
 	if (!pch)
@@ -2375,7 +2366,7 @@ ppp_input_error(struct ppp_channel *chan)
  * The receive side of the ppp unit is locked.
  */
 static void
-ppp_receive_frame(struct ppp *ppp, struct sk_buff *skb, struct channel *pch)
+ppp_receive_frame(struct ppp *ppp, struct sk_buff *skb, struct ppp_channel *pch)
 {
 	skb_checksum_complete_unset(skb);
 #ifdef CONFIG_PPP_MULTILINK
@@ -2611,10 +2602,10 @@ ppp_decompress_frame(struct ppp *ppp, struct sk_buff *skb)
  * as many completed frames as we can.
  */
 static void
-ppp_receive_mp_frame(struct ppp *ppp, struct sk_buff *skb, struct channel *pch)
+ppp_receive_mp_frame(struct ppp *ppp, struct sk_buff *skb, struct ppp_channel *pch)
 {
 	u32 mask, seq;
-	struct channel *ch;
+	struct ppp_channel *ch;
 	int mphdrlen = (ppp->flags & SC_MP_SHORTSEQ)? MPHDRLEN_SSN: MPHDRLEN;
 
 	if (!pskb_may_pull(skb, mphdrlen + 1) || ppp->mrru == 0)
@@ -2885,6 +2876,13 @@ ppp_mp_reconstruct(struct ppp *ppp)
 
 	return skb;
 }
+
+/* Update the MTU of a multilink channel */
+void ppp_channel_update_mtu(struct ppp_channel *pch, int mtu)
+{
+	pch->mtu = mtu;
+}
+EXPORT_SYMBOL(ppp_channel_update_mtu);
 #endif /* CONFIG_PPP_MULTILINK */
 
 /*
@@ -2892,29 +2890,33 @@ ppp_mp_reconstruct(struct ppp *ppp)
  */
 
 /* Create a new, unattached ppp channel. */
-int ppp_register_channel(struct ppp_channel *chan)
+struct ppp_channel *ppp_register_channel(const struct ppp_channel_conf *conf)
 {
-	return ppp_register_net_channel(current->nsproxy->net_ns, chan);
+	return ppp_register_net_channel(current->nsproxy->net_ns, conf);
 }
 
 /* Create a new, unattached ppp channel for specified net. */
-int ppp_register_net_channel(struct net *net, struct ppp_channel *chan)
+struct ppp_channel *ppp_register_net_channel(struct net *net,
+					     const struct ppp_channel_conf *conf)
 {
-	struct channel *pch;
+	struct ppp_channel *pch;
 	struct ppp_net *pn;
 
-	pch = kzalloc_obj(struct channel);
+	pch = kzalloc_obj(struct ppp_channel);
 	if (!pch)
-		return -ENOMEM;
+		return NULL;
 
 	pn = ppp_pernet(net);
 
-	pch->chan = chan;
 	pch->chan_net = get_net_track(net, &pch->ns_tracker, GFP_KERNEL);
-	chan->ppp = pch;
 	init_ppp_file(&pch->file, CHANNEL);
-	pch->file.hdrlen = chan->hdrlen;
+	pch->file.hdrlen = conf->hdrlen;
+	pch->ops = conf->ops;
+	pch->private = conf->private;
+	pch->direct_xmit = conf->direct_xmit;
 #ifdef CONFIG_PPP_MULTILINK
+	pch->speed = conf->speed;
+	pch->mtu = conf->mtu;
 	pch->lastseq = -1;
 #endif /* CONFIG_PPP_MULTILINK */
 	init_rwsem(&pch->chan_sem);
@@ -2927,16 +2929,14 @@ int ppp_register_net_channel(struct net *net, struct ppp_channel *chan)
 	atomic_inc(&channel_count);
 	spin_unlock_bh(&pn->all_channels_lock);
 
-	return 0;
+	return pch;
 }
 
 /*
  * Return the index of a channel.
  */
-int ppp_channel_index(struct ppp_channel *chan)
+int ppp_channel_index(struct ppp_channel *pch)
 {
-	struct channel *pch = chan->ppp;
-
 	if (pch)
 		return pch->file.index;
 	return -1;
@@ -2945,9 +2945,8 @@ int ppp_channel_index(struct ppp_channel *chan)
 /*
  * Return the PPP unit number to which a channel is connected.
  */
-int ppp_unit_number(struct ppp_channel *chan)
+int ppp_unit_number(struct ppp_channel *pch)
 {
-	struct channel *pch = chan->ppp;
 	struct ppp *ppp;
 	int unit = -1;
 
@@ -2965,9 +2964,8 @@ int ppp_unit_number(struct ppp_channel *chan)
  * Return the PPP device interface name of a channel.
  * Caller must hold RCU read lock.
  */
-char *ppp_dev_name(struct ppp_channel *chan)
+char *ppp_dev_name(struct ppp_channel *pch)
 {
-	struct channel *pch = chan->ppp;
 	char *name = NULL;
 	struct ppp *ppp;
 
@@ -2985,16 +2983,13 @@ char *ppp_dev_name(struct ppp_channel *chan)
  * This must be called in process context.
  */
 void
-ppp_unregister_channel(struct ppp_channel *chan)
+ppp_unregister_channel(struct ppp_channel *pch)
 {
-	struct channel *pch = chan->ppp;
 	struct ppp_net *pn;
 
 	if (!pch)
 		return;		/* should never happen */
 
-	chan->ppp = NULL;
-
 	/*
 	 * This ensures that we have returned from any calls into
 	 * the channel's start_xmit or ioctl routine before we proceed.
@@ -3023,10 +3018,8 @@ ppp_unregister_channel(struct ppp_channel *chan)
  * This should be called at BH/softirq level, not interrupt level.
  */
 void
-ppp_output_wakeup(struct ppp_channel *chan)
+ppp_output_wakeup(struct ppp_channel *pch)
 {
-	struct channel *pch = chan->ppp;
-
 	if (!pch)
 		return;
 	ppp_channel_push(pch);
@@ -3459,10 +3452,10 @@ ppp_find_unit(struct ppp_net *pn, int unit)
  * we move it to the all_channels list.  This is for speed
  * when we have a lot of channels in use.
  */
-static struct channel *
+static struct ppp_channel *
 ppp_find_channel(struct ppp_net *pn, int unit)
 {
-	struct channel *pch;
+	struct ppp_channel *pch;
 
 	list_for_each_entry(pch, &pn->new_channels, list) {
 		if (pch->file.index == unit) {
@@ -3483,7 +3476,7 @@ ppp_find_channel(struct ppp_net *pn, int unit)
  * Connect a PPP channel to a PPP interface unit.
  */
 static int
-ppp_connect_channel(struct channel *pch, int unit)
+ppp_connect_channel(struct ppp_channel *pch, int unit)
 {
 	struct ppp *ppp;
 	struct ppp_net *pn;
@@ -3511,7 +3504,7 @@ ppp_connect_channel(struct channel *pch, int unit)
 		ret = -ENOTCONN;
 		goto outl;
 	}
-	if (pch->chan->direct_xmit)
+	if (pch->direct_xmit)
 		ppp->dev->priv_flags |= IFF_NO_QUEUE;
 	else
 		ppp->dev->priv_flags &= ~IFF_NO_QUEUE;
@@ -3539,7 +3532,7 @@ ppp_connect_channel(struct channel *pch, int unit)
  * Disconnect a channel from its ppp unit.
  */
 static int
-ppp_disconnect_channel(struct channel *pch)
+ppp_disconnect_channel(struct ppp_channel *pch)
 {
 	struct ppp *ppp;
 	int err = -EINVAL;
@@ -3565,7 +3558,7 @@ ppp_disconnect_channel(struct channel *pch)
  * Drop a reference to a ppp channel and free its memory if the refcount reaches
  * zero.
  */
-static void ppp_release_channel(struct channel *pch)
+static void ppp_release_channel(struct ppp_channel *pch)
 {
 	if (!refcount_dec_and_test(&pch->file.refcnt))
 		return;
diff --git a/drivers/net/ppp/ppp_synctty.c b/drivers/net/ppp/ppp_synctty.c
index b7f243b416f8..d84c267f4da1 100644
--- a/drivers/net/ppp/ppp_synctty.c
+++ b/drivers/net/ppp/ppp_synctty.c
@@ -71,7 +71,7 @@ struct syncppp {
 
 	refcount_t	refcnt;
 	struct completion dead_cmp;
-	struct ppp_channel chan;	/* interface to generic ppp layer */
+	struct ppp_channel *chan;	/* interface to generic ppp layer */
 };
 
 /* Bit numbers in xmit_flags */
@@ -87,8 +87,8 @@ struct syncppp {
  * Prototypes.
  */
 static struct sk_buff* ppp_sync_txmunge(struct syncppp *ap, struct sk_buff *);
-static int ppp_sync_send(struct ppp_channel *chan, struct sk_buff *skb);
-static int ppp_sync_ioctl(struct ppp_channel *chan, unsigned int cmd,
+static int ppp_sync_send(void *private, struct sk_buff *skb);
+static int ppp_sync_ioctl(void *private, unsigned int cmd,
 			  unsigned long arg);
 static void ppp_sync_process(struct tasklet_struct *t);
 static int ppp_sync_push(struct syncppp *ap);
@@ -155,9 +155,10 @@ static void sp_put(struct syncppp *ap)
 static int
 ppp_sync_open(struct tty_struct *tty)
 {
+	struct ppp_channel_conf conf = {};
+	struct ppp_channel *chan;
 	struct syncppp *ap;
 	int err;
-	int speed;
 
 	if (tty->ops->write == NULL)
 		return -EOPNOTSUPP;
@@ -182,15 +183,19 @@ ppp_sync_open(struct tty_struct *tty)
 	refcount_set(&ap->refcnt, 1);
 	init_completion(&ap->dead_cmp);
 
-	ap->chan.private = ap;
-	ap->chan.ops = &sync_ops;
-	ap->chan.mtu = PPP_MRU;
-	ap->chan.hdrlen = 2;	/* for A/C bytes */
-	speed = tty_get_baud_rate(tty);
-	ap->chan.speed = speed;
-	err = ppp_register_channel(&ap->chan);
-	if (err)
+	conf.private = ap;
+	conf.ops = &sync_ops;
+	conf.hdrlen = 2;	/* for A/C bytes */
+#ifdef CONFIG_PPP_MULTILINK
+	conf.mtu = PPP_MRU;
+	conf.speed = tty_get_baud_rate(tty);
+#endif
+	chan = ppp_register_channel(&conf);
+	if (!chan) {
+		err = -ENOMEM;
 		goto out_free;
+	}
+	ap->chan = chan;
 
 	tty->disc_data = ap;
 	tty->receive_room = 65536;
@@ -233,7 +238,7 @@ ppp_sync_close(struct tty_struct *tty)
 		wait_for_completion(&ap->dead_cmp);
 	tasklet_kill(&ap->tsk);
 
-	ppp_unregister_channel(&ap->chan);
+	ppp_unregister_channel(ap->chan);
 	skb_queue_purge(&ap->rqueue);
 	kfree_skb(ap->tpkt);
 	kfree(ap);
@@ -285,14 +290,14 @@ ppp_synctty_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg)
 	switch (cmd) {
 	case PPPIOCGCHAN:
 		err = -EFAULT;
-		if (put_user(ppp_channel_index(&ap->chan), p))
+		if (put_user(ppp_channel_index(ap->chan), p))
 			break;
 		err = 0;
 		break;
 
 	case PPPIOCGUNIT:
 		err = -EFAULT;
-		if (put_user(ppp_unit_number(&ap->chan), p))
+		if (put_user(ppp_unit_number(ap->chan), p))
 			break;
 		err = 0;
 		break;
@@ -383,9 +388,9 @@ ppp_sync_init(void)
  * The following routines provide the PPP channel interface.
  */
 static int
-ppp_sync_ioctl(struct ppp_channel *chan, unsigned int cmd, unsigned long arg)
+ppp_sync_ioctl(void *private, unsigned int cmd, unsigned long arg)
 {
-	struct syncppp *ap = chan->private;
+	struct syncppp *ap = private;
 	int err, val;
 	u32 accm[8];
 	void __user *argp = (void __user *)arg;
@@ -483,16 +488,16 @@ static void ppp_sync_process(struct tasklet_struct *t)
 	while ((skb = skb_dequeue(&ap->rqueue)) != NULL) {
 		if (skb->len == 0) {
 			/* zero length buffers indicate error */
-			ppp_input_error(&ap->chan);
+			ppp_input_error(ap->chan);
 			kfree_skb(skb);
 		}
 		else
-			ppp_input(&ap->chan, skb);
+			ppp_input(ap->chan, skb);
 	}
 
 	/* try to push more stuff out */
 	if (test_bit(XMIT_WAKEUP, &ap->xmit_flags) && ppp_sync_push(ap))
-		ppp_output_wakeup(&ap->chan);
+		ppp_output_wakeup(ap->chan);
 }
 
 /*
@@ -562,9 +567,9 @@ ppp_sync_txmunge(struct syncppp *ap, struct sk_buff *skb)
  * at some later time.
  */
 static int
-ppp_sync_send(struct ppp_channel *chan, struct sk_buff *skb)
+ppp_sync_send(void *private, struct sk_buff *skb)
 {
-	struct syncppp *ap = chan->private;
+	struct syncppp *ap = private;
 
 	ppp_sync_push(ap);
 
@@ -649,7 +654,7 @@ ppp_sync_flush_output(struct syncppp *ap)
 	}
 	spin_unlock_bh(&ap->xmit_lock);
 	if (done)
-		ppp_output_wakeup(&ap->chan);
+		ppp_output_wakeup(ap->chan);
 }
 
 /*
diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
index d546a7af0d54..47d36d071775 100644
--- a/drivers/net/ppp/pppoe.c
+++ b/drivers/net/ppp/pppoe.c
@@ -357,7 +357,7 @@ static int pppoe_rcv_core(struct sock *sk, struct sk_buff *skb)
 	 */
 
 	if (sk->sk_state & PPPOX_BOUND) {
-		ppp_input(&po->chan, skb);
+		ppp_input(po->chan, skb);
 	} else {
 		if (sock_queue_rcv_skb(sk, skb))
 			goto abort_kfree;
@@ -625,7 +625,7 @@ static int pppoe_connect(struct socket *sock, struct sockaddr_unsized *uservaddr
 
 		po->pppoe_ifindex = 0;
 		memset(&po->pppoe_pa, 0, sizeof(po->pppoe_pa));
-		memset(&po->chan, 0, sizeof(po->chan));
+		po->chan = NULL;
 		po->next = NULL;
 		po->num = 0;
 
@@ -634,6 +634,9 @@ static int pppoe_connect(struct socket *sock, struct sockaddr_unsized *uservaddr
 
 	/* Re-bind in session stage only */
 	if (stage_session(sp->sa_addr.pppoe.sid)) {
+		struct ppp_channel_conf conf = {};
+		struct ppp_channel *chan;
+
 		error = -ENODEV;
 		net = sock_net(sk);
 		dev = dev_get_by_name(net, sp->sa_addr.pppoe.dev);
@@ -657,20 +660,23 @@ static int pppoe_connect(struct socket *sock, struct sockaddr_unsized *uservaddr
 		if (error < 0)
 			goto err_put;
 
-		po->chan.hdrlen = (sizeof(struct pppoe_hdr) +
+		conf.hdrlen = (sizeof(struct pppoe_hdr) +
 				   dev->hard_header_len);
+#ifdef CONFIG_PPP_MULTILINK
+		conf.mtu = dev->mtu - sizeof(struct pppoe_hdr) - 2;
+#endif
+		conf.private = sk;
+		conf.ops = &pppoe_chan_ops;
+		conf.direct_xmit = true;
 
-		po->chan.mtu = dev->mtu - sizeof(struct pppoe_hdr) - 2;
-		po->chan.private = sk;
-		po->chan.ops = &pppoe_chan_ops;
-		po->chan.direct_xmit = true;
-
-		error = ppp_register_net_channel(dev_net(dev), &po->chan);
-		if (error) {
+		chan = ppp_register_net_channel(dev_net(dev), &conf);
+		if (!chan) {
+			error = -ENOMEM;
 			delete_item(pn, po->pppoe_pa.sid,
 				    po->pppoe_pa.remote, po->pppoe_ifindex);
 			goto err_put;
 		}
+		po->chan = chan;
 
 		sk->sk_state = PPPOX_CONNECTED;
 	}
@@ -891,17 +897,17 @@ static int __pppoe_xmit(struct sock *sk, struct sk_buff *skb)
  * sends PPP frame over PPPoE socket
  *
  ***********************************************************************/
-static int pppoe_xmit(struct ppp_channel *chan, struct sk_buff *skb)
+static int pppoe_xmit(void *private, struct sk_buff *skb)
 {
-	struct sock *sk = chan->private;
+	struct sock *sk = private;
 	return __pppoe_xmit(sk, skb);
 }
 
 static int pppoe_fill_forward_path(struct net_device_path_ctx *ctx,
 				   struct net_device_path *path,
-				   const struct ppp_channel *chan)
+				   void *private)
 {
-	struct sock *sk = chan->private;
+	struct sock *sk = private;
 	struct pppox_sock *po = pppox_sk(sk);
 	struct net_device *dev = po->pppoe_dev;
 
diff --git a/drivers/net/ppp/pppox.c b/drivers/net/ppp/pppox.c
index 5861a2f6ce3e..df4fb23a926d 100644
--- a/drivers/net/ppp/pppox.c
+++ b/drivers/net/ppp/pppox.c
@@ -55,7 +55,7 @@ void pppox_unbind_sock(struct sock *sk)
 	/* Clear connection to ppp device, if attached. */
 
 	if (sk->sk_state & (PPPOX_BOUND | PPPOX_CONNECTED)) {
-		ppp_unregister_channel(&pppox_sk(sk)->chan);
+		ppp_unregister_channel(pppox_sk(sk)->chan);
 		sk->sk_state = PPPOX_DEAD;
 	}
 }
@@ -80,7 +80,7 @@ int pppox_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 			break;
 
 		rc = -EINVAL;
-		index = ppp_channel_index(&po->chan);
+		index = ppp_channel_index(po->chan);
 		if (put_user(index , (int __user *) arg))
 			break;
 
diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c
index cc8c102122d8..e49abbe63bf1 100644
--- a/drivers/net/ppp/pptp.c
+++ b/drivers/net/ppp/pptp.c
@@ -146,9 +146,9 @@ static struct rtable *pptp_route_output(const struct pppox_sock *po,
 	return ip_route_output_flow(net, fl4, sk);
 }
 
-static int pptp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
+static int pptp_xmit(void *private, struct sk_buff *skb)
 {
-	struct sock *sk = chan->private;
+	struct sock *sk = private;
 	struct pppox_sock *po = pppox_sk(sk);
 	struct net *net = sock_net(sk);
 	struct pptp_opt *opt = &po->proto.pptp;
@@ -338,7 +338,7 @@ static int pptp_rcv_core(struct sock *sk, struct sk_buff *skb)
 
 		skb->ip_summed = CHECKSUM_NONE;
 		skb_set_network_header(skb, skb->head-skb->data);
-		ppp_input(&po->chan, skb);
+		ppp_input(po->chan, skb);
 
 		return NET_RX_SUCCESS;
 	}
@@ -422,6 +422,8 @@ static int pptp_connect(struct socket *sock, struct sockaddr_unsized *uservaddr,
 	struct sockaddr_pppox *sp = (struct sockaddr_pppox *) uservaddr;
 	struct pppox_sock *po = pppox_sk(sk);
 	struct pptp_opt *opt = &po->proto.pptp;
+	struct ppp_channel_conf conf = {};
+	struct ppp_channel *chan;
 	struct rtable *rt;
 	struct flowi4 fl4;
 	int error = 0;
@@ -453,9 +455,6 @@ static int pptp_connect(struct socket *sock, struct sockaddr_unsized *uservaddr,
 		goto end;
 	}
 
-	po->chan.private = sk;
-	po->chan.ops = &pptp_chan_ops;
-
 	rt = pptp_route_output(po, &fl4);
 	if (IS_ERR(rt)) {
 		error = -EHOSTUNREACH;
@@ -463,18 +462,23 @@ static int pptp_connect(struct socket *sock, struct sockaddr_unsized *uservaddr,
 	}
 	sk_setup_caps(sk, &rt->dst);
 
-	po->chan.mtu = dst_mtu(&rt->dst);
-	if (!po->chan.mtu)
-		po->chan.mtu = PPP_MRU;
-	po->chan.mtu -= PPTP_HEADER_OVERHEAD;
-
-	po->chan.hdrlen = 2 + sizeof(struct pptp_gre_header);
-	po->chan.direct_xmit = true;
-	error = ppp_register_channel(&po->chan);
-	if (error) {
+	conf.private = sk;
+	conf.ops = &pptp_chan_ops;
+	conf.hdrlen = 2 + sizeof(struct pptp_gre_header);
+	conf.direct_xmit = true;
+#ifdef CONFIG_PPP_MULTILINK
+	conf.mtu = dst_mtu(&rt->dst);
+	if (!conf.mtu)
+		conf.mtu = PPP_MRU;
+	conf.mtu -= PPTP_HEADER_OVERHEAD;
+#endif
+	chan = ppp_register_channel(&conf);
+	if (!chan) {
+		error = -ENOMEM;
 		pr_err("PPTP: failed to register PPP channel (%d)\n", error);
 		goto end;
 	}
+	po->chan = chan;
 
 	opt->dst_addr = sp->sa_addr.pptp;
 	sk->sk_state |= PPPOX_CONNECTED;
@@ -577,10 +581,10 @@ static int pptp_create(struct net *net, struct socket *sock, int kern)
 	return error;
 }
 
-static int pptp_ppp_ioctl(struct ppp_channel *chan, unsigned int cmd,
-	unsigned long arg)
+static int pptp_ppp_ioctl(void *private, unsigned int cmd,
+			  unsigned long arg)
 {
-	struct sock *sk = chan->private;
+	struct sock *sk = private;
 	struct pppox_sock *po = pppox_sk(sk);
 	struct pptp_opt *opt = &po->proto.pptp;
 	void __user *argp = (void __user *)arg;
diff --git a/drivers/tty/ipwireless/network.c b/drivers/tty/ipwireless/network.c
index ad2c5157a018..7ac5a2d02d44 100644
--- a/drivers/tty/ipwireless/network.c
+++ b/drivers/tty/ipwireless/network.c
@@ -88,10 +88,10 @@ static void notify_packet_sent(void *callback_data, unsigned int packet_length)
 /*
  * Called by the ppp system when it has a packet to send to the hardware.
  */
-static int ipwireless_ppp_start_xmit(struct ppp_channel *ppp_channel,
+static int ipwireless_ppp_start_xmit(void *private,
 				     struct sk_buff *skb)
 {
-	struct ipw_network *network = ppp_channel->private;
+	struct ipw_network *network = private;
 	unsigned long flags;
 
 	spin_lock_irqsave(&network->lock, flags);
@@ -153,10 +153,10 @@ static int ipwireless_ppp_start_xmit(struct ppp_channel *ppp_channel,
 }
 
 /* Handle an ioctl call that has come in via ppp. (copy of ppp_async_ioctl() */
-static int ipwireless_ppp_ioctl(struct ppp_channel *ppp_channel,
+static int ipwireless_ppp_ioctl(void *private,
 				unsigned int cmd, unsigned long arg)
 {
-	struct ipw_network *network = ppp_channel->private;
+	struct ipw_network *network = private;
 	int err, val;
 	u32 accm[8];
 	int __user *user_arg = (int __user *) arg;
@@ -254,19 +254,17 @@ static void do_go_online(struct work_struct *work_go_online)
 
 	spin_lock_irqsave(&network->lock, flags);
 	if (!network->ppp_channel) {
+		struct ppp_channel_conf conf = {};
 		struct ppp_channel *channel;
 
 		spin_unlock_irqrestore(&network->lock, flags);
-		channel = kzalloc_obj(struct ppp_channel);
-		if (!channel) {
-			printk(KERN_ERR IPWIRELESS_PCCARD_NAME
-					": unable to allocate PPP channel\n");
-			return;
-		}
-		channel->private = network;
-		channel->mtu = 16384;	/* Wild guess */
-		channel->hdrlen = 2;
-		channel->ops = &ipwireless_ppp_channel_ops;
+
+		conf.private = network;
+		conf.hdrlen = 2;
+		conf.ops = &ipwireless_ppp_channel_ops;
+#ifdef CONFIG_PPP_MULTILINK
+		conf.mtu = 16384;	/* Wild guess */
+#endif
 
 		network->flags = 0;
 		network->rbits = 0;
@@ -275,10 +273,10 @@ static void do_go_online(struct work_struct *work_go_online)
 		network->xaccm[0] = ~0U;
 		network->xaccm[3] = 0x60000000U;
 		network->raccm = ~0U;
-		if (ppp_register_channel(channel) < 0) {
+		channel = ppp_register_channel(&conf);
+		if (!channel) {
 			printk(KERN_ERR IPWIRELESS_PCCARD_NAME
 					": unable to register PPP channel\n");
-			kfree(channel);
 			return;
 		}
 		spin_lock_irqsave(&network->lock, flags);
diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h
index 594d6dc3f4c9..a1d7c11182ec 100644
--- a/include/linux/if_pppox.h
+++ b/include/linux/if_pppox.h
@@ -40,7 +40,7 @@ struct pptp_opt {
 struct pppox_sock {
 	/* struct sock must be the first member of pppox_sock */
 	struct sock sk;
-	struct ppp_channel chan;
+	struct ppp_channel *chan;
 	struct pppox_sock __rcu	*next;	  /* for hash table */
 	union {
 		struct pppoe_opt pppoe;
diff --git a/include/linux/ppp_channel.h b/include/linux/ppp_channel.h
index 2f63e9a6cc88..e3a3d59d40dc 100644
--- a/include/linux/ppp_channel.h
+++ b/include/linux/ppp_channel.h
@@ -27,55 +27,64 @@ struct ppp_channel;
 struct ppp_channel_ops {
 	/* Send a packet (or multilink fragment) on this channel.
 	   Returns 1 if it was accepted, 0 if not. */
-	int	(*start_xmit)(struct ppp_channel *, struct sk_buff *);
+	int	(*start_xmit)(void *private, struct sk_buff *skb);
 	/* Handle an ioctl call that has come in via /dev/ppp. */
-	int	(*ioctl)(struct ppp_channel *, unsigned int, unsigned long);
-	int	(*fill_forward_path)(struct net_device_path_ctx *,
-				     struct net_device_path *,
-				     const struct ppp_channel *);
+	int	(*ioctl)(void *private, unsigned int cmd, unsigned long arg);
+	int	(*fill_forward_path)(struct net_device_path_ctx *ctx,
+				     struct net_device_path *path,
+				     void *private);
 };
 
-struct ppp_channel {
+struct ppp_channel_conf {
 	void		*private;	/* channel private data */
 	const struct ppp_channel_ops *ops; /* operations for this channel */
-	int		mtu;		/* max transmit packet size */
 	int		hdrlen;		/* amount of headroom channel needs */
-	void		*ppp;		/* opaque to channel */
-	int		speed;		/* transfer rate (bytes/second) */
 	bool		direct_xmit;	/* no qdisc, xmit directly */
+#ifdef CONFIG_PPP_MULTILINK
+	int		speed;		/* transfer rate (bytes/second) */
+	int		mtu;		/* max transmit packet size */
+#endif
 };
 
 #ifdef __KERNEL__
 /* Called by the channel when it can send some more data. */
-extern void ppp_output_wakeup(struct ppp_channel *);
+void ppp_output_wakeup(struct ppp_channel *pch);
 
 /* Called by the channel to process a received PPP packet.
    The packet should have just the 2-byte PPP protocol header. */
-extern void ppp_input(struct ppp_channel *, struct sk_buff *);
+void ppp_input(struct ppp_channel *pch, struct sk_buff *skb);
 
 /* Called by the channel when an input error occurs, indicating
    that we may have missed a packet. */
-extern void ppp_input_error(struct ppp_channel *);
+void ppp_input_error(struct ppp_channel *pch);
 
-/* Attach a channel to a given PPP unit in specified net. */
-extern int ppp_register_net_channel(struct net *, struct ppp_channel *);
+/* Create a new, unattached ppp channel for specified net. */
+struct ppp_channel *ppp_register_net_channel(struct net *net,
+					 const struct ppp_channel_conf *chan);
 
-/* Attach a channel to a given PPP unit. */
-extern int ppp_register_channel(struct ppp_channel *);
+/* Create a new, unattached ppp channel. */
+struct ppp_channel *ppp_register_channel(const struct ppp_channel_conf *chan);
 
 /* Detach a channel from its PPP unit (e.g. on hangup). */
-extern void ppp_unregister_channel(struct ppp_channel *);
+void ppp_unregister_channel(struct ppp_channel *pch);
 
 /* Get the channel number for a channel */
-extern int ppp_channel_index(struct ppp_channel *);
+int ppp_channel_index(struct ppp_channel *pch);
 
 /* Get the unit number associated with a channel, or -1 if none */
-extern int ppp_unit_number(struct ppp_channel *);
+int ppp_unit_number(struct ppp_channel *pch);
 
 /* Get the device name associated with a channel, or NULL if none.
  * Caller must hold RCU read lock.
  */
-extern char *ppp_dev_name(struct ppp_channel *);
+char *ppp_dev_name(struct ppp_channel *pch);
+
+/* Update the MTU of a multilink channel */
+#ifdef CONFIG_PPP_MULTILINK
+void ppp_channel_update_mtu(struct ppp_channel *pch, int mtu);
+#else
+static inline void ppp_channel_update_mtu(struct ppp_channel *pch, int mtu) {}
+#endif
 
 /*
  * SMP locking notes:
diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c
index e3c422dc533a..d801233700e7 100644
--- a/net/atm/pppoatm.c
+++ b/net/atm/pppoatm.c
@@ -64,7 +64,7 @@ struct pppoatm_vcc {
 	atomic_t inflight;
 	unsigned long blocked;
 	int flags;			/* SC_COMP_PROT - compress protocol */
-	struct ppp_channel chan;	/* interface to generic ppp layer */
+	struct ppp_channel *chan;	/* interface to generic ppp layer */
 	struct tasklet_struct wakeup_tasklet;
 };
 
@@ -91,11 +91,6 @@ static inline struct pppoatm_vcc *atmvcc_to_pvcc(const struct atm_vcc *atmvcc)
 	return (struct pppoatm_vcc *) (atmvcc->user_back);
 }
 
-static inline struct pppoatm_vcc *chan_to_pvcc(const struct ppp_channel *chan)
-{
-	return (struct pppoatm_vcc *) (chan->private);
-}
-
 /*
  * We can't do this directly from our _pop handler, since the ppp code
  * doesn't want to be called in interrupt context, so we do it from
@@ -105,7 +100,7 @@ static void pppoatm_wakeup_sender(struct tasklet_struct *t)
 {
 	struct pppoatm_vcc *pvcc = from_tasklet(pvcc, t, wakeup_tasklet);
 
-	ppp_output_wakeup(&pvcc->chan);
+	ppp_output_wakeup(pvcc->chan);
 }
 
 static void pppoatm_release_cb(struct atm_vcc *atmvcc)
@@ -172,7 +167,7 @@ static void pppoatm_unassign_vcc(struct atm_vcc *atmvcc)
 	atmvcc->pop = pvcc->old_pop;
 	atmvcc->release_cb = pvcc->old_release_cb;
 	tasklet_kill(&pvcc->wakeup_tasklet);
-	ppp_unregister_channel(&pvcc->chan);
+	ppp_unregister_channel(pvcc->chan);
 	atmvcc->user_back = NULL;
 	kfree(pvcc);
 }
@@ -201,7 +196,7 @@ static void pppoatm_push(struct atm_vcc *atmvcc, struct sk_buff *skb)
 		skb_pull(skb, LLC_LEN);
 		break;
 	case e_autodetect:
-		if (pvcc->chan.ppp == NULL) {	/* Not bound yet! */
+		if (!pvcc->chan) {	/* Not bound yet! */
 			kfree_skb(skb);
 			return;
 		}
@@ -215,7 +210,8 @@ static void pppoatm_push(struct atm_vcc *atmvcc, struct sk_buff *skb)
 		    !memcmp(skb->data, &pppllc[LLC_LEN],
 		    sizeof(pppllc) - LLC_LEN)) {
 			pvcc->encaps = e_vc;
-			pvcc->chan.mtu += LLC_LEN;
+			ppp_channel_update_mtu(pvcc->chan,
+					       atmvcc->qos.txtp.max_sdu - PPP_HDRLEN);
 			break;
 		}
 		pr_debug("Couldn't autodetect yet (skb: %6ph)\n", skb->data);
@@ -223,12 +219,12 @@ static void pppoatm_push(struct atm_vcc *atmvcc, struct sk_buff *skb)
 	case e_vc:
 		break;
 	}
-	ppp_input(&pvcc->chan, skb);
+	ppp_input(pvcc->chan, skb);
 	return;
 
 error:
 	kfree_skb(skb);
-	ppp_input_error(&pvcc->chan);
+	ppp_input_error(pvcc->chan);
 }
 
 static int pppoatm_may_send(struct pppoatm_vcc *pvcc, int size)
@@ -286,9 +282,9 @@ static int pppoatm_may_send(struct pppoatm_vcc *pvcc, int size)
  * as success, just to be clear what we're really doing.
  */
 #define DROP_PACKET 1
-static int pppoatm_send(struct ppp_channel *chan, struct sk_buff *skb)
+static int pppoatm_send(void *private, struct sk_buff *skb)
 {
-	struct pppoatm_vcc *pvcc = chan_to_pvcc(chan);
+	struct pppoatm_vcc *pvcc = private;
 	struct atm_vcc *vcc;
 	int ret;
 
@@ -367,16 +363,15 @@ static int pppoatm_send(struct ppp_channel *chan, struct sk_buff *skb)
 }
 
 /* This handles ioctls sent to the /dev/ppp interface */
-static int pppoatm_devppp_ioctl(struct ppp_channel *chan, unsigned int cmd,
-	unsigned long arg)
+static int pppoatm_devppp_ioctl(void *private, unsigned int cmd,
+				unsigned long arg)
 {
+	struct pppoatm_vcc *pvcc = private;
 	switch (cmd) {
 	case PPPIOCGFLAGS:
-		return put_user(chan_to_pvcc(chan)->flags, (int __user *) arg)
-		    ? -EFAULT : 0;
+		return put_user(pvcc->flags, (int __user *)arg) ? -EFAULT : 0;
 	case PPPIOCSFLAGS:
-		return get_user(chan_to_pvcc(chan)->flags, (int __user *) arg)
-		    ? -EFAULT : 0;
+		return get_user(pvcc->flags, (int __user *)arg) ? -EFAULT : 0;
 	}
 	return -ENOTTY;
 }
@@ -388,9 +383,10 @@ static const struct ppp_channel_ops pppoatm_ops = {
 
 static int pppoatm_assign_vcc(struct atm_vcc *atmvcc, void __user *arg)
 {
+	struct ppp_channel_conf conf = {};
 	struct atm_backend_ppp be;
 	struct pppoatm_vcc *pvcc;
-	int err;
+	struct ppp_channel *chan;
 
 	if (copy_from_user(&be, arg, sizeof be))
 		return -EFAULT;
@@ -409,16 +405,19 @@ static int pppoatm_assign_vcc(struct atm_vcc *atmvcc, void __user *arg)
 	pvcc->old_owner = atmvcc->owner;
 	pvcc->old_release_cb = atmvcc->release_cb;
 	pvcc->encaps = (enum pppoatm_encaps) be.encaps;
-	pvcc->chan.private = pvcc;
-	pvcc->chan.ops = &pppoatm_ops;
-	pvcc->chan.mtu = atmvcc->qos.txtp.max_sdu - PPP_HDRLEN -
+	conf.private = pvcc;
+	conf.ops = &pppoatm_ops;
+#ifdef CONFIG_PPP_MULTILINK
+	conf.mtu = atmvcc->qos.txtp.max_sdu - PPP_HDRLEN -
 	    (be.encaps == e_vc ? 0 : LLC_LEN);
+#endif
 	tasklet_setup(&pvcc->wakeup_tasklet, pppoatm_wakeup_sender);
-	err = ppp_register_channel(&pvcc->chan);
-	if (err != 0) {
+	chan = ppp_register_channel(&conf);
+	if (!chan) {
 		kfree(pvcc);
-		return err;
+		return -ENOMEM;
 	}
+	pvcc->chan = chan;
 	atmvcc->user_back = pvcc;
 	atmvcc->push = pppoatm_push;
 	atmvcc->pop = pppoatm_pop;
@@ -458,11 +457,11 @@ static int pppoatm_ioctl(struct socket *sock, unsigned int cmd,
 		return pppoatm_assign_vcc(atmvcc, argp);
 		}
 	case PPPIOCGCHAN:
-		return put_user(ppp_channel_index(&atmvcc_to_pvcc(atmvcc)->
-		    chan), (int __user *) argp) ? -EFAULT : 0;
+		return put_user(ppp_channel_index(atmvcc_to_pvcc(atmvcc)->chan),
+		    (int __user *)argp) ? -EFAULT : 0;
 	case PPPIOCGUNIT:
-		return put_user(ppp_unit_number(&atmvcc_to_pvcc(atmvcc)->
-		    chan), (int __user *) argp) ? -EFAULT : 0;
+		return put_user(ppp_unit_number(atmvcc_to_pvcc(atmvcc)->chan),
+		    (int __user *)argp) ? -EFAULT : 0;
 	}
 	return -ENOIOCTLCMD;
 }
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 99d6582f41de..6c7b08f4e49a 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -121,7 +121,7 @@ struct pppol2tp_session {
 	struct sock		*__sk;		/* Copy of .sk, for cleanup */
 };
 
-static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb);
+static int pppol2tp_xmit(void *private, struct sk_buff *skb);
 
 static const struct ppp_channel_ops pppol2tp_chan_ops = {
 	.start_xmit =  pppol2tp_xmit,
@@ -221,7 +221,7 @@ static void pppol2tp_recv(struct l2tp_session *session, struct sk_buff *skb, int
 		struct pppox_sock *po;
 
 		po = pppox_sk(sk);
-		ppp_input(&po->chan, skb);
+		ppp_input(po->chan, skb);
 	} else {
 		if (sock_queue_rcv_skb(sk, skb) < 0) {
 			atomic_long_inc(&session->stats.rx_errors);
@@ -326,9 +326,9 @@ static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m,
  * the skb it supplied, not our cloned skb. So we take care to always
  * leave the original skb unfreed if we return an error.
  */
-static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
+static int pppol2tp_xmit(void *private, struct sk_buff *skb)
 {
-	struct sock *sk = (struct sock *)chan->private;
+	struct sock *sk = private;
 	struct l2tp_session *session;
 	struct l2tp_tunnel *tunnel;
 	int uhlen, headroom;
@@ -504,7 +504,7 @@ static void pppol2tp_show(struct seq_file *m, void *arg)
 	if (sk) {
 		struct pppox_sock *po = pppox_sk(sk);
 
-		seq_printf(m, "   interface %s\n", ppp_dev_name(&po->chan));
+		seq_printf(m, "   interface %s\n", ppp_dev_name(po->chan));
 	}
 	rcu_read_unlock();
 }
@@ -612,7 +612,7 @@ static int pppol2tp_sockaddr_get_info(const void *sa, int sa_len,
  * numbers and no IP option. Not quite accurate, but the result is mostly
  * unused anyway.
  */
-static int pppol2tp_tunnel_mtu(const struct l2tp_tunnel *tunnel)
+static int __maybe_unused pppol2tp_tunnel_mtu(const struct l2tp_tunnel *tunnel)
 {
 	int mtu;
 
@@ -694,6 +694,8 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr_unsized *userva
 	struct l2tp_tunnel *tunnel;
 	struct pppol2tp_session *ps;
 	struct l2tp_session_cfg cfg = { 0, };
+	struct ppp_channel_conf conf = {};
+	struct ppp_channel *chan;
 	bool drop_refcnt = false;
 	bool new_session = false;
 	bool new_tunnel = false;
@@ -792,18 +794,22 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr_unsized *userva
 	 * the net device's hard_header_len at registration, which must be
 	 * sufficient regardless of whether sequence numbers are enabled later.
 	 */
-	po->chan.hdrlen = PPPOL2TP_L2TP_HDR_SIZE_SEQ;
+	conf.hdrlen = PPPOL2TP_L2TP_HDR_SIZE_SEQ;
 
-	po->chan.private = sk;
-	po->chan.ops	 = &pppol2tp_chan_ops;
-	po->chan.mtu	 = pppol2tp_tunnel_mtu(tunnel);
-	po->chan.direct_xmit	= true;
+	conf.private = sk;
+	conf.ops	 = &pppol2tp_chan_ops;
+#ifdef CONFIG_PPP_MULTILINK
+	conf.mtu	 = pppol2tp_tunnel_mtu(tunnel);
+#endif
+	conf.direct_xmit	= true;
 
-	error = ppp_register_net_channel(sock_net(sk), &po->chan);
-	if (error) {
+	chan = ppp_register_net_channel(sock_net(sk), &conf);
+	if (!chan) {
+		error = -ENOMEM;
 		mutex_unlock(&ps->sk_lock);
 		goto end;
 	}
+	po->chan = chan;
 
 out_no_ppp:
 	/* This is how we get the session context from the socket. */
@@ -1550,7 +1556,7 @@ static void pppol2tp_seq_session_show(struct seq_file *m, void *v)
 	if (sk) {
 		struct pppox_sock *po = pppox_sk(sk);
 
-		seq_printf(m, "   interface %s\n", ppp_dev_name(&po->chan));
+		seq_printf(m, "   interface %s\n", ppp_dev_name(po->chan));
 	}
 	rcu_read_unlock();
 }
-- 
2.43.0


^ permalink raw reply related

* [RFC net-next 3/3] docs: update ppp_generic.rst for API changes
From: Qingfang Deng @ 2026-04-16  8:26 UTC (permalink / raw)
  To: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
	Simon Horman, Jonathan Corbet, Shuah Khan, netdev, linux-doc,
	linux-kernel
  Cc: Paul Mackerras, linux-ppp, Jaco Kroon, James Carlson,
	Qingfang Deng
In-Reply-To: <20260416082656.86963-1-qingfang.deng@linux.dev>

Document the new ppp_channel_conf struct and ppp_channel lifecycle
management changes.

Assisted-by: Gemini:gemini-3-flash
Signed-off-by: Qingfang Deng <qingfang.deng@linux.dev>
---
 Documentation/networking/ppp_generic.rst | 33 ++++++++++--------------
 1 file changed, 13 insertions(+), 20 deletions(-)

diff --git a/Documentation/networking/ppp_generic.rst b/Documentation/networking/ppp_generic.rst
index 5a10abce5964..8d63f997fb3f 100644
--- a/Documentation/networking/ppp_generic.rst
+++ b/Documentation/networking/ppp_generic.rst
@@ -124,18 +124,19 @@ presented to the start_xmit() function contain only the 2-byte
 protocol number and the data, and the skbuffs presented to ppp_input()
 must be in the same format.
 
-The channel must provide an instance of a ppp_channel struct to
-represent the channel.  The channel is free to use the ``private`` field
-however it wishes.  The channel should initialize the ``mtu`` and
-``hdrlen`` fields before calling ppp_register_channel() and not change
-them until after ppp_unregister_channel() returns.  The ``mtu`` field
-represents the maximum size of the data part of the PPP frames, that
-is, it does not include the 2-byte protocol number.
+The channel must provide an instance of a ppp_channel_conf struct to
+describe the channel during registration.  The generic layer will
+allocate a ppp_channel struct and return a pointer to it.  The
+ppp_channel struct is opaque to the channel driver.  The ``mtu`` field
+(if multilink is enabled) represents the maximum size of the data part
+of the PPP frames, that is, it does not include the 2-byte protocol
+number.  ppp_channel_update_mtu() can be called by the channel driver
+to update the ``mtu`` field once LCP MRU negotiation is complete.
 
 If the channel needs some headroom in the skbuffs presented to it for
 transmission (i.e., some space free in the skbuff data area before the
 start of the PPP frame), it should set the ``hdrlen`` field of the
-ppp_channel struct to the amount of headroom required.  The generic
+ppp_channel_conf struct to the amount of headroom required.  The generic
 PPP layer will attempt to provide that much headroom but the channel
 should still check if there is sufficient headroom and copy the skbuff
 if there isn't.
@@ -199,20 +200,12 @@ The PPP generic layer has been designed to be SMP-safe.  Locks are
 used around accesses to the internal data structures where necessary
 to ensure their integrity.  As part of this, the generic layer
 requires that the channels adhere to certain requirements and in turn
-provides certain guarantees to the channels.  Essentially the channels
-are required to provide the appropriate locking on the ppp_channel
-structures that form the basis of the communication between the
-channel and the generic layer.  This is because the channel provides
-the storage for the ppp_channel structure, and so the channel is
-required to provide the guarantee that this storage exists and is
-valid at the appropriate times.
+provides certain guarantees to the channels.  The generic layer manages
+the ppp_channel object, ensuring it exists and is valid while the
+channel is registered.
 
 The generic layer requires these guarantees from the channel:
 
-* The ppp_channel object must exist from the time that
-  ppp_register_channel() is called until after the call to
-  ppp_unregister_channel() returns.
-
 * No thread may be in a call to any of ppp_input(), ppp_input_error(),
   ppp_output_wakeup(), ppp_channel_index() or ppp_unit_number() for a
   channel at the time that ppp_unregister_channel() is called for that
@@ -453,4 +446,4 @@ an interface unit are:
   fragments is disabled.  This ioctl is only available if the
   CONFIG_PPP_MULTILINK option is selected.
 
-Last modified: 7-feb-2002
+Last modified: 16-apr-2026
-- 
2.43.0


^ permalink raw reply related

* Re: [PATCH net-next v3] net: mctp: don't require received header reserved bits to be zero
From: Paolo Abeni @ 2026-04-16  8:30 UTC (permalink / raw)
  To: wit_yuan, jk; +Cc: yuanzm2, matt, davem, edumazet, kuba, netdev, linux-kernel
In-Reply-To: <20260413080333.73086-1-yuanzhaoming901030@126.com>

On 4/13/26 10:03 AM, wit_yuan wrote:
> From: Yuan Zhaoming <yuanzm2@lenovo.com>
> 
> From the MCTP Base specification (DSP0236 v1.2.1), the first byte of
> the MCTP header contains a 4 bit reserved field, and 4 bit version.
> 
> On our current receive path, we require those 4 reserved bits to be
> zero, but the 9500-8i card is non-conformant, and may set these
> reserved bits.
> 
> DSP0236 states that the reserved bits must be written as zero, and
> ignored when read. While the device might not conform to the former,
> we should accept these message to conform to the latter.
> 
> Relax our check on the MCTP version byte to allow non-zero bits in the
> reserved field.
> 
> Signed-off-by: Yuan Zhaoming <yuanzm2@lenovo.com>

The net-next tree is currently closed for the merge window, but IMHO
this change could be considered a fix. Please repost for 'net' and add a
suitable fixes tag.

> ---
> v2: https://lore.kernel.org/netdev/20260410144339.0d1b289a@kernel.org/T/#t
> v1: https://lore.kernel.org/netdev/ff147a3f0d27ef2aa6026cc86f9113d56a8c61ac.camel@codeconstruct.com.au/T/#t
> ---
>  include/net/mctp.h | 3 +++
>  net/mctp/route.c   | 8 ++++++--
>  2 files changed, 9 insertions(+), 2 deletions(-)
> 
> diff --git a/include/net/mctp.h b/include/net/mctp.h
> index e1e0a69..d8bf907 100644
> --- a/include/net/mctp.h
> +++ b/include/net/mctp.h
> @@ -26,6 +26,9 @@ struct mctp_hdr {
>  #define MCTP_VER_MIN	1
>  #define MCTP_VER_MAX	1
>  
> +/* Definitions for ver field */
> +#define MCTP_HDR_VER_MASK	GENMASK(3, 0)
> +
>  /* Definitions for flags_seq_tag field */
>  #define MCTP_HDR_FLAG_SOM	BIT(7)
>  #define MCTP_HDR_FLAG_EOM	BIT(6)
> diff --git a/net/mctp/route.c b/net/mctp/route.c
> index e69c6f7..62517c9 100644
> --- a/net/mctp/route.c
> +++ b/net/mctp/route.c
> @@ -439,6 +439,7 @@ static int mctp_dst_input(struct mctp_dst *dst, struct sk_buff *skb)
>  	struct mctp_hdr *mh;
>  	unsigned int netid;
>  	unsigned long f;
> +	u8 ver;
>  	u8 tag, flags;
>  	int rc;

Please respect the reverse christmas tree order above.

/P


^ permalink raw reply

* [PATCH bpf-next v4 4/6] bpf: allow new DECAP flags and add guard rails
From: Nick Hudson @ 2026-04-16  7:55 UTC (permalink / raw)
  To: bpf, netdev, Willem de Bruijn, Martin KaFai Lau
  Cc: Nick Hudson, Max Tottenham, Anna Glasgall, Alexei Starovoitov,
	Daniel Borkmann, Andrii Nakryiko, Eduard Zingerman,
	Kumar Kartikeya Dwivedi, David S. Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, linux-kernel
In-Reply-To: <20260416075514.927101-1-nhudson@akamai.com>

Add checks to require shrink-only decap, reject conflicting decap flag
combinations, and verify removed length is sufficient for claimed header
decapsulation.

Co-developed-by: Max Tottenham <mtottenh@akamai.com>
Signed-off-by: Max Tottenham <mtottenh@akamai.com>
Co-developed-by: Anna Glasgall <aglasgal@akamai.com>
Signed-off-by: Anna Glasgall <aglasgal@akamai.com>
Signed-off-by: Nick Hudson <nhudson@akamai.com>
---
 net/core/filter.c | 44 +++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 43 insertions(+), 1 deletion(-)

diff --git a/net/core/filter.c b/net/core/filter.c
index 4e860da4381d..7f8d43420afb 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -56,6 +56,7 @@
 #include <net/sock_reuseport.h>
 #include <net/busy_poll.h>
 #include <net/tcp.h>
+#include <net/gre.h>
 #include <net/xfrm.h>
 #include <net/udp.h>
 #include <linux/bpf_trace.h>
@@ -3490,6 +3491,12 @@ static u32 bpf_skb_net_base_len(const struct sk_buff *skb)
 #define BPF_F_ADJ_ROOM_DECAP_L3_MASK	(BPF_F_ADJ_ROOM_DECAP_L3_IPV4 | \
 					 BPF_F_ADJ_ROOM_DECAP_L3_IPV6)
 
+#define BPF_F_ADJ_ROOM_DECAP_L4_MASK	(BPF_F_ADJ_ROOM_DECAP_L4_UDP | \
+					 BPF_F_ADJ_ROOM_DECAP_L4_GRE)
+
+#define BPF_F_ADJ_ROOM_DECAP_IPXIP_MASK	(BPF_F_ADJ_ROOM_DECAP_IPXIP4 | \
+					 BPF_F_ADJ_ROOM_DECAP_IPXIP6)
+
 #define BPF_F_ADJ_ROOM_ENCAP_MASK	(BPF_F_ADJ_ROOM_ENCAP_L3_MASK | \
 					 BPF_F_ADJ_ROOM_ENCAP_L4_GRE | \
 					 BPF_F_ADJ_ROOM_ENCAP_L4_UDP | \
@@ -3497,7 +3504,9 @@ static u32 bpf_skb_net_base_len(const struct sk_buff *skb)
 					 BPF_F_ADJ_ROOM_ENCAP_L2( \
 					  BPF_ADJ_ROOM_ENCAP_L2_MASK))
 
-#define BPF_F_ADJ_ROOM_DECAP_MASK	(BPF_F_ADJ_ROOM_DECAP_L3_MASK)
+#define BPF_F_ADJ_ROOM_DECAP_MASK	(BPF_F_ADJ_ROOM_DECAP_L3_MASK | \
+					 BPF_F_ADJ_ROOM_DECAP_L4_MASK | \
+					 BPF_F_ADJ_ROOM_DECAP_IPXIP_MASK)
 
 #define BPF_F_ADJ_ROOM_MASK		(BPF_F_ADJ_ROOM_FIXED_GSO | \
 					 BPF_F_ADJ_ROOM_ENCAP_MASK | \
@@ -3740,6 +3749,8 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
 	}
 
 	if (flags & BPF_F_ADJ_ROOM_DECAP_MASK) {
+		u32 len_decap_min = 0;
+
 		if (!shrink)
 			return -EINVAL;
 
@@ -3748,6 +3759,37 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
 		    BPF_F_ADJ_ROOM_DECAP_L3_MASK)
 			return -EINVAL;
 
+		if ((flags & BPF_F_ADJ_ROOM_DECAP_L4_MASK) ==
+		    BPF_F_ADJ_ROOM_DECAP_L4_MASK)
+			return -EINVAL;
+
+		if ((flags & BPF_F_ADJ_ROOM_DECAP_IPXIP_MASK) ==
+		    BPF_F_ADJ_ROOM_DECAP_IPXIP_MASK)
+			return -EINVAL;
+
+		/* Reject mutually exclusive decap tunnel type flags. */
+		if ((flags & BPF_F_ADJ_ROOM_DECAP_L4_MASK) &&
+		    (flags & BPF_F_ADJ_ROOM_DECAP_IPXIP_MASK))
+			return -EINVAL;
+
+		if (flags & BPF_F_ADJ_ROOM_DECAP_L4_MASK)
+			len_decap_min += bpf_skb_net_base_len(skb);
+
+		if (flags & BPF_F_ADJ_ROOM_DECAP_L4_UDP)
+			len_decap_min += sizeof(struct udphdr);
+
+		if (flags & BPF_F_ADJ_ROOM_DECAP_L4_GRE)
+			len_decap_min += sizeof(struct gre_base_hdr);
+
+		if (flags & BPF_F_ADJ_ROOM_DECAP_IPXIP4)
+			len_decap_min += sizeof(struct iphdr);
+
+		if (flags & BPF_F_ADJ_ROOM_DECAP_IPXIP6)
+			len_decap_min += sizeof(struct ipv6hdr);
+
+		if (len_diff_abs < len_decap_min)
+			return -EINVAL;
+
 		if (flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV4)
 			len_min = sizeof(struct iphdr);
 
-- 
2.34.1


^ permalink raw reply related

* Re: [PATCH bpf-next v4 5/6] bpf: clear decap tunnel GSO state in skb_adjust_room
From: bot+bpf-ci @ 2026-04-16  8:34 UTC (permalink / raw)
  To: nhudson, bpf, netdev, willemb, martin.lau
  Cc: nhudson, mtottenh, aglasgal, daniel, ast, andrii, eddyz87, memxor,
	davem, edumazet, kuba, pabeni, linux-kernel, ast, andrii, daniel,
	martin.lau, eddyz87, yonghong.song, clm, ihor.solodrai
In-Reply-To: <20260416075514.927101-6-nhudson@akamai.com>

[-- Attachment #1: Type: text/plain, Size: 2910 bytes --]

> diff --git a/net/core/filter.c b/net/core/filter.c
> index 1dac64b50de4..4e77e9c46f1a 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -3657,6 +3657,44 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
>  		if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
>  			skb_increase_gso_size(shinfo, len_diff);
>
> +		/* Selective GSO flag clearing based on decap type.
> +		 * Only clear the flags for the tunnel layer being removed.
> +		 */
> +		if ((flags & BPF_F_ADJ_ROOM_DECAP_L4_UDP) &&

The symbols BPF_F_ADJ_ROOM_DECAP_L4_UDP, BPF_F_ADJ_ROOM_DECAP_L4_GRE,
BPF_F_ADJ_ROOM_DECAP_IPXIP4, BPF_F_ADJ_ROOM_DECAP_IPXIP6,
BPF_F_ADJ_ROOM_DECAP_L4_MASK, and BPF_F_ADJ_ROOM_DECAP_IPXIP_MASK
are used throughout this hunk but are not defined anywhere in the tree.

The series cover letter lists patch 3/6 "bpf: add
BPF_F_ADJ_ROOM_DECAP_* flags for tunnel decapsulation" which would
add these to include/uapi/linux/bpf.h, but that patch is not present.
Without it, this commit will not compile.

Additionally, even if these symbols were defined, BPF_F_ADJ_ROOM_MASK
and BPF_F_ADJ_ROOM_DECAP_MASK do not include the new flag bits.  Both
bpf_skb_adjust_room():

    if (unlikely(flags & ~BPF_F_ADJ_ROOM_MASK))
        return -EINVAL;

and bpf_skb_net_shrink():

    if (unlikely(flags & ~(BPF_F_ADJ_ROOM_DECAP_MASK |
                           BPF_F_ADJ_ROOM_FIXED_GSO |
                           BPF_F_ADJ_ROOM_NO_CSUM_RESET)))
        return -EINVAL;

would reject any flags value containing these new bits, making the
new code unreachable.  Patch 4/6 "bpf: allow new DECAP flags and add
guard rails" from the series would presumably update these masks, but
it is also not present.

Are patches 3/6 and 4/6 of the series missing from this tree?

> +		    (shinfo->gso_type & (SKB_GSO_UDP_TUNNEL |
> +					 SKB_GSO_UDP_TUNNEL_CSUM)))
> +			shinfo->gso_type &= ~(SKB_GSO_UDP_TUNNEL |
> +					      SKB_GSO_UDP_TUNNEL_CSUM);

[ ... ]

> +		/* Clear encapsulation flag only when no tunnel GSO flags remain */
> +		if (flags & (BPF_F_ADJ_ROOM_DECAP_L4_MASK |
> +			     BPF_F_ADJ_ROOM_DECAP_IPXIP_MASK)) {

[ ... ]

> +			if (flags & BPF_F_ADJ_ROOM_DECAP_L4_UDP) {
> +				skb->encap_hdr_csum = 0;
> +				skb->remcsum_offload = 0;
> +			}
> +		}

This isn't a bug, but the BPF subsystem convention is that multi-line
comments should have the opening /* on its own line:

> +		/* Selective GSO flag clearing based on decap type.
> +		 * Only clear the flags for the tunnel layer being removed.
> +		 */

Should be:

    /*
     * Selective GSO flag clearing based on decap type.
     * Only clear the flags for the tunnel layer being removed.
     */

---
AI reviewed your patch. Please fix the bug or email reply why it's not a bug.
See: https://github.com/kernel-patches/vmtest/blob/master/ci/claude/README.md

CI run summary: https://github.com/kernel-patches/bpf/actions/runs/24499314045

^ permalink raw reply

* [PATCH bpf-next v4 3/6] bpf: add BPF_F_ADJ_ROOM_DECAP_* flags for tunnel decapsulation
From: Nick Hudson @ 2026-04-16  7:55 UTC (permalink / raw)
  To: bpf, netdev, Willem de Bruijn, Martin KaFai Lau
  Cc: Nick Hudson, Max Tottenham, Anna Glasgall, Alexei Starovoitov,
	Daniel Borkmann, Andrii Nakryiko, Eduard Zingerman,
	Kumar Kartikeya Dwivedi, linux-kernel
In-Reply-To: <20260416075514.927101-1-nhudson@akamai.com>

Add new bpf_skb_adjust_room() decapsulation flags:

- BPF_F_ADJ_ROOM_DECAP_L4_GRE
- BPF_F_ADJ_ROOM_DECAP_L4_UDP
- BPF_F_ADJ_ROOM_DECAP_IPXIP4
- BPF_F_ADJ_ROOM_DECAP_IPXIP6

These flags let BPF programs describe which tunnel layer is being
removed, so later changes can update tunnel-related GSO state
accordingly during decapsulation.

This patch only introduces the UAPI flag definitions and helper
documentation.

Co-developed-by: Max Tottenham <mtottenh@akamai.com>
Signed-off-by: Max Tottenham <mtottenh@akamai.com>
Co-developed-by: Anna Glasgall <aglasgal@akamai.com>
Signed-off-by: Anna Glasgall <aglasgal@akamai.com>
Signed-off-by: Nick Hudson <nhudson@akamai.com>
---
 include/uapi/linux/bpf.h       | 34 ++++++++++++++++++++++++++++++++--
 tools/include/uapi/linux/bpf.h | 34 ++++++++++++++++++++++++++++++++--
 2 files changed, 64 insertions(+), 4 deletions(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index c021ed8d7b44..4a53e731c554 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3010,8 +3010,34 @@ union bpf_attr {
  *
  *		* **BPF_F_ADJ_ROOM_DECAP_L3_IPV4**,
  *		  **BPF_F_ADJ_ROOM_DECAP_L3_IPV6**:
- *		  Indicate the new IP header version after decapsulating the outer
- *		  IP header. Used when the inner and outer IP versions are different.
+ *		  Indicate the new IP header version after decapsulating the
+ *		  outer IP header. Used when the inner and outer IP versions
+ *		  are different. These flags only trigger a protocol change
+ *		  without clearing any tunnel-specific GSO flags.
+ *
+ *		* **BPF_F_ADJ_ROOM_DECAP_L4_GRE**:
+ *		  Clear GRE tunnel GSO flags (SKB_GSO_GRE and SKB_GSO_GRE_CSUM)
+ *		  when decapsulating a GRE tunnel.
+ *
+ *		* **BPF_F_ADJ_ROOM_DECAP_L4_UDP**:
+ *		  Clear UDP tunnel GSO flags (SKB_GSO_UDP_TUNNEL and
+ *		  SKB_GSO_UDP_TUNNEL_CSUM) when decapsulating a UDP tunnel.
+ *
+ *		* **BPF_F_ADJ_ROOM_DECAP_IPXIP4**:
+ *		  Clear IPIP/SIT tunnel GSO flag (SKB_GSO_IPXIP4) when decapsulating
+ *		  a tunnel with an outer IPv4 header (IPv4-in-IPv4 or IPv6-in-IPv4).
+ *
+ *		* **BPF_F_ADJ_ROOM_DECAP_IPXIP6**:
+ *		  Clear IPv6 encapsulation tunnel GSO flag (SKB_GSO_IPXIP6) when
+ *		  decapsulating a tunnel with an outer IPv6 header (IPv6-in-IPv6
+ *		  or IPv4-in-IPv6).
+ *
+ *		When using the decapsulation flags above, the skb->encapsulation
+ *		flag is automatically cleared if all tunnel-specific GSO flags
+ *		(SKB_GSO_UDP_TUNNEL, SKB_GSO_UDP_TUNNEL_CSUM, SKB_GSO_GRE,
+ *		SKB_GSO_GRE_CSUM, SKB_GSO_IPXIP4, SKB_GSO_IPXIP6) have been
+ *		removed from the packet. This handles cases where all tunnel
+ *		layers have been decapsulated.
  *
  * 		A call to this helper is susceptible to change the underlying
  * 		packet buffer. Therefore, at load time, all checks on pointers
@@ -6221,6 +6247,10 @@ enum bpf_adj_room_flags {
 	BPF_F_ADJ_ROOM_ENCAP_L2_ETH	= (1ULL << 6),
 	BPF_F_ADJ_ROOM_DECAP_L3_IPV4	= (1ULL << 7),
 	BPF_F_ADJ_ROOM_DECAP_L3_IPV6	= (1ULL << 8),
+	BPF_F_ADJ_ROOM_DECAP_L4_GRE	= (1ULL << 9),
+	BPF_F_ADJ_ROOM_DECAP_L4_UDP	= (1ULL << 10),
+	BPF_F_ADJ_ROOM_DECAP_IPXIP4	= (1ULL << 11),
+	BPF_F_ADJ_ROOM_DECAP_IPXIP6	= (1ULL << 12),
 };
 
 enum {
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index ca35ed622ed5..f4c2fbd8fe68 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3010,8 +3010,34 @@ union bpf_attr {
  *
  *		* **BPF_F_ADJ_ROOM_DECAP_L3_IPV4**,
  *		  **BPF_F_ADJ_ROOM_DECAP_L3_IPV6**:
- *		  Indicate the new IP header version after decapsulating the outer
- *		  IP header. Used when the inner and outer IP versions are different.
+ *		  Indicate the new IP header version after decapsulating the
+ *		  outer IP header. Used when the inner and outer IP versions
+ *		  are different. These flags only trigger a protocol change
+ *		  without clearing any tunnel-specific GSO flags.
+ *
+ *		* **BPF_F_ADJ_ROOM_DECAP_L4_GRE**:
+ *		  Clear GRE tunnel GSO flags (SKB_GSO_GRE and SKB_GSO_GRE_CSUM)
+ *		  when decapsulating a GRE tunnel.
+ *
+ *		* **BPF_F_ADJ_ROOM_DECAP_L4_UDP**:
+ *		  Clear UDP tunnel GSO flags (SKB_GSO_UDP_TUNNEL and
+ *		  SKB_GSO_UDP_TUNNEL_CSUM) when decapsulating a UDP tunnel.
+ *
+ *		* **BPF_F_ADJ_ROOM_DECAP_IPXIP4**:
+ *		  Clear IPIP/SIT tunnel GSO flag (SKB_GSO_IPXIP4) when decapsulating
+ *		  a tunnel with an outer IPv4 header (IPv4-in-IPv4 or IPv6-in-IPv4).
+ *
+ *		* **BPF_F_ADJ_ROOM_DECAP_IPXIP6**:
+ *		  Clear IPv6 encapsulation tunnel GSO flag (SKB_GSO_IPXIP6) when
+ *		  decapsulating a tunnel with an outer IPv6 header (IPv6-in-IPv6
+ *		  or IPv4-in-IPv6).
+ *
+ *		When using the decapsulation flags above, the skb->encapsulation
+ *		flag is automatically cleared if all tunnel-specific GSO flags
+ *		(SKB_GSO_UDP_TUNNEL, SKB_GSO_UDP_TUNNEL_CSUM, SKB_GSO_GRE,
+ *		SKB_GSO_GRE_CSUM, SKB_GSO_IPXIP4, SKB_GSO_IPXIP6) have been
+ *		removed from the packet. This handles cases where all tunnel
+ *		layers have been decapsulated.
  *
  * 		A call to this helper is susceptible to change the underlying
  * 		packet buffer. Therefore, at load time, all checks on pointers
@@ -6221,6 +6247,10 @@ enum bpf_adj_room_flags {
 	BPF_F_ADJ_ROOM_ENCAP_L2_ETH	= (1ULL << 6),
 	BPF_F_ADJ_ROOM_DECAP_L3_IPV4	= (1ULL << 7),
 	BPF_F_ADJ_ROOM_DECAP_L3_IPV6	= (1ULL << 8),
+	BPF_F_ADJ_ROOM_DECAP_L4_GRE	= (1ULL << 9),
+	BPF_F_ADJ_ROOM_DECAP_L4_UDP	= (1ULL << 10),
+	BPF_F_ADJ_ROOM_DECAP_IPXIP4	= (1ULL << 11),
+	BPF_F_ADJ_ROOM_DECAP_IPXIP6	= (1ULL << 12),
 };
 
 enum {
-- 
2.34.1


^ permalink raw reply related

* [PATCH net] ixgbe: only access vfinfo and mv_list under RCU lock
From: Corinna Vinschen @ 2026-04-16  8:42 UTC (permalink / raw)
  To: intel-wired-lan, netdev; +Cc: Corinna Vinschen

Commit 1e53834ce541d ("ixgbe: Add locking to prevent panic when setting
sriov_numvfs to zero") added a spinlock to the adapter info.  The reason
at the time was an observed crash when ixgbe_disable_sriov() freed the
adapter->vfinfo array while the interrupt driven function ixgbe_msg_task()
was handling VF messages.

Recent stability testing turned up another crash, which is very easily
reproducible:

  while true
  do
    for numvfs in 5 0
    do
      echo $numvfs > /sys/class/net/eth0/device/sriov_numvfs
    done
  done

This crashed almost always within the first two hundred runs with
a NULL pointer deref while running the ixgbe_service_task() workqueue:

[ 5052.036491] BUG: kernel NULL pointer dereference, address: 0000000000000258
[ 5052.043454] #PF: supervisor read access in kernel mode
[ 5052.048594] #PF: error_code(0x0000) - not-present page
[ 5052.053734] PGD 0 P4D 0
[ 5052.056272] Oops: Oops: 0000 #1 SMP NOPTI
[ 5052.060459] CPU: 2 UID: 0 PID: 132253 Comm: kworker/u96:0 Kdump: loaded Not tainted 6.12.0-180.el10.x86_64 #1 PREEMPT(voluntary)
[ 5052.072100] Hardware name: Dell Inc. PowerEdge R740/0DY2X0, BIOS 2.12.2 07/09/2021
[ 5052.079664] Workqueue: ixgbe ixgbe_service_task [ixgbe]
[ 5052.084907] RIP: 0010:ixgbe_update_stats+0x8b1/0xb40 [ixgbe]
[ 5052.090585] Code: 21 56 50 49 8b b6 18 26 00 00 4c 01 fe 48 09 46 50 42 8d 34 a5 00 83 00 00 e8 cb 7a ff ff 49 8b b6 18 26 00 00 89 c0 4c 01 fe <48> 3b 86 88 00 00 00 73 18 48 b9 00 00 00 00 01 00 00 00 48 01 4e
[ 5052.109331] RSP: 0018:ffffd5f1e8a6bd88 EFLAGS: 00010202
[ 5052.114558] RAX: 0000000000000000 RBX: ffff8f49b22b14a0 RCX: 000000000000023c
[ 5052.121689] RDX: ffffffff00000000 RSI: 00000000000001d0 RDI: ffff8f49b22b14a0
[ 5052.128823] RBP: 000000000000109c R08: 0000000000000000 R09: 0000000000000000
[ 5052.135955] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000002
[ 5052.143086] R13: 0000000000008410 R14: ffff8f49b22b01a0 R15: 00000000000001d0
[ 5052.150221] FS:  0000000000000000(0000) GS:ffff8f58bfc80000(0000) knlGS:0000000000000000
[ 5052.158307] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 5052.164054] CR2: 0000000000000258 CR3: 0000000bf2624006 CR4: 00000000007726f0
[ 5052.171187] PKRU: 55555554
[ 5052.173898] Call Trace:
[ 5052.176351]  <TASK>
[ 5052.178457]  ? show_trace_log_lvl+0x1b0/0x2f0
[ 5052.182816]  ? show_trace_log_lvl+0x1b0/0x2f0
[ 5052.187177]  ? ixgbe_watchdog_subtask+0x1a1/0x230 [ixgbe]
[ 5052.192591]  ? __die_body.cold+0x8/0x12
[ 5052.196433]  ? page_fault_oops+0x148/0x160
[ 5052.200532]  ? exc_page_fault+0x7f/0x150
[ 5052.204458]  ? asm_exc_page_fault+0x26/0x30
[ 5052.208643]  ? ixgbe_update_stats+0x8b1/0xb40 [ixgbe]
[ 5052.213714]  ? ixgbe_update_stats+0x8a5/0xb40 [ixgbe]
[ 5052.218784]  ixgbe_watchdog_subtask+0x1a1/0x230 [ixgbe]
[ 5052.224026]  ixgbe_service_task+0x15a/0x3f0 [ixgbe]
[ 5052.228916]  process_one_work+0x177/0x330
[ 5052.232928]  worker_thread+0x256/0x3a0
[ 5052.236681]  ? __pfx_worker_thread+0x10/0x10
[ 5052.240952]  kthread+0xfa/0x240
[ 5052.244099]  ? __pfx_kthread+0x10/0x10
[ 5052.247852]  ret_from_fork+0x34/0x50
[ 5052.251429]  ? __pfx_kthread+0x10/0x10
[ 5052.255185]  ret_from_fork_asm+0x1a/0x30
[ 5052.259112]  </TASK>

The first simple patch, just adding spinlocking to ixgbe_update_stats()
while reading from adapter->vfinfo, did not fix the problem, it just
moved it elsewhere: I could now reproduce the same kind of crash in
ixgbe_restore_vf_multicasts().

But adding more spinlocking doesn't really cut it.  One reason is that
ixgbe_restore_vf_multicasts() is called from within ixgbe_msg_task()
with active spinlock, as well as from outside without locking.

Additionally, given that ixgbe_disable_sriov() is the only call changing
adapter->vfinfo, and given ixgbe_disable_sriov() is called very
seldom compared to other actions in the driver, just adding more
spinlocks would unnecessarily occupy the driver with spinning when
multiple functions accessing adapter->vfinfo are running in parallel.

So this patch drops the spinlock in favor of RCU and uses it throughout
the driver.

While changing this, it seems prudent to do the same for the
adapter->mv_list array, which is allocated and freed at the same time as
adapter->vfinfo, albeit there was no crash observed.

Fixes: 1e53834ce541d ("ixgbe: Add locking to prevent panic when setting sriov_numvfs to zero")
Signed-off-by: Corinna Vinschen <vinschen@redhat.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe.h      |   7 +-
 .../net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c   |  36 +-
 .../net/ethernet/intel/ixgbe/ixgbe_ethtool.c  |  44 +-
 .../net/ethernet/intel/ixgbe/ixgbe_ipsec.c    |  17 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 229 +++++---
 .../net/ethernet/intel/ixgbe/ixgbe_sriov.c    | 547 ++++++++++++------
 6 files changed, 593 insertions(+), 287 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index 9b8217523fd2..8849b9f42bf6 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -210,6 +210,7 @@ struct vf_stats {
 };
 
 struct vf_data_storage {
+	struct rcu_head rcu_head;
 	struct pci_dev *vfdev;
 	unsigned char vf_mac_addresses[ETH_ALEN];
 	u16 vf_mc_hashes[IXGBE_MAX_VF_MC_ENTRIES];
@@ -240,6 +241,7 @@ enum ixgbevf_xcast_modes {
 };
 
 struct vf_macvlans {
+	struct rcu_head rcu_head;
 	struct list_head l;
 	int vf;
 	bool free;
@@ -808,10 +810,10 @@ struct ixgbe_adapter {
 	/* SR-IOV */
 	DECLARE_BITMAP(active_vfs, IXGBE_MAX_VF_FUNCTIONS);
 	unsigned int num_vfs;
-	struct vf_data_storage *vfinfo;
+	struct vf_data_storage __rcu *vfinfo;
 	int vf_rate_link_speed;
 	struct vf_macvlans vf_mvs;
-	struct vf_macvlans *mv_list;
+	struct vf_macvlans __rcu *mv_list;
 
 	u32 timer_event_accumulator;
 	u32 vferr_refcount;
@@ -844,7 +846,6 @@ struct ixgbe_adapter {
 #ifdef CONFIG_IXGBE_IPSEC
 	struct ixgbe_ipsec *ipsec;
 #endif /* CONFIG_IXGBE_IPSEC */
-	spinlock_t vfs_lock;
 };
 
 struct ixgbe_netdevice_priv {
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c
index 382d097e4b11..9a84cfc09120 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c
@@ -640,17 +640,21 @@ static int ixgbe_dcbnl_ieee_setapp(struct net_device *dev,
 	/* VF devices should use default UP when available */
 	if (app->selector == IEEE_8021QAZ_APP_SEL_ETHERTYPE &&
 	    app->protocol == 0) {
+		struct vf_data_storage *vfinfo;
 		int vf;
 
 		adapter->default_up = app->priority;
 
-		for (vf = 0; vf < adapter->num_vfs; vf++) {
-			struct vf_data_storage *vfinfo = &adapter->vfinfo[vf];
-
-			if (!vfinfo->pf_qos)
-				ixgbe_set_vmvir(adapter, vfinfo->pf_vlan,
-						app->priority, vf);
-		}
+		rcu_read_lock();
+		vfinfo = rcu_dereference(adapter->vfinfo);
+		if (vfinfo)
+			for (vf = 0; vf < adapter->num_vfs; vf++) {
+				if (!vfinfo[vf].pf_qos)
+					ixgbe_set_vmvir(adapter,
+							vfinfo[vf].pf_vlan,
+							app->priority, vf);
+			}
+		rcu_read_unlock();
 	}
 
 	return 0;
@@ -683,19 +687,23 @@ static int ixgbe_dcbnl_ieee_delapp(struct net_device *dev,
 	/* IF default priority is being removed clear VF default UP */
 	if (app->selector == IEEE_8021QAZ_APP_SEL_ETHERTYPE &&
 	    app->protocol == 0 && adapter->default_up == app->priority) {
+		struct vf_data_storage *vfinfo;
 		int vf;
 		long unsigned int app_mask = dcb_ieee_getapp_mask(dev, app);
 		int qos = app_mask ? find_first_bit(&app_mask, 8) : 0;
 
 		adapter->default_up = qos;
 
-		for (vf = 0; vf < adapter->num_vfs; vf++) {
-			struct vf_data_storage *vfinfo = &adapter->vfinfo[vf];
-
-			if (!vfinfo->pf_qos)
-				ixgbe_set_vmvir(adapter, vfinfo->pf_vlan,
-						qos, vf);
-		}
+		rcu_read_lock();
+		vfinfo = rcu_dereference(adapter->vfinfo);
+		if (vfinfo)
+			for (vf = 0; vf < adapter->num_vfs; vf++) {
+				if (!vfinfo[vf].pf_qos)
+					ixgbe_set_vmvir(adapter,
+							vfinfo[vf].pf_vlan,
+							qos, vf);
+			}
+		rcu_read_unlock();
 	}
 
 	return err;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
index ba049b3a9609..b77317476af4 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
@@ -2265,21 +2265,28 @@ static void ixgbe_diag_test(struct net_device *netdev,
 		struct ixgbe_hw *hw = &adapter->hw;
 
 		if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) {
+			struct vf_data_storage *vfinfo;
 			int i;
-			for (i = 0; i < adapter->num_vfs; i++) {
-				if (adapter->vfinfo[i].clear_to_send) {
-					netdev_warn(netdev, "offline diagnostic is not supported when VFs are present\n");
-					data[0] = 1;
-					data[1] = 1;
-					data[2] = 1;
-					data[3] = 1;
-					data[4] = 1;
-					eth_test->flags |= ETH_TEST_FL_FAILED;
-					clear_bit(__IXGBE_TESTING,
-						  &adapter->state);
-					return;
+
+			rcu_read_lock();
+			vfinfo = rcu_dereference(adapter->vfinfo);
+			if (vfinfo)
+				for (i = 0; i < adapter->num_vfs; i++) {
+					if (vfinfo[i].clear_to_send) {
+						netdev_warn(netdev, "offline diagnostic is not supported when VFs are present\n");
+						data[0] = 1;
+						data[1] = 1;
+						data[2] = 1;
+						data[3] = 1;
+						data[4] = 1;
+						eth_test->flags |= ETH_TEST_FL_FAILED;
+						clear_bit(__IXGBE_TESTING,
+							  &adapter->state);
+						rcu_read_unlock();
+						return;
+					}
 				}
-			}
+			rcu_read_unlock();
 		}
 
 		/* Offline tests */
@@ -3700,9 +3707,14 @@ static int ixgbe_set_priv_flags(struct net_device *netdev, u32 priv_flags)
 	if (priv_flags & IXGBE_PRIV_FLAGS_AUTO_DISABLE_VF) {
 		if (adapter->hw.mac.type == ixgbe_mac_82599EB) {
 			/* Reset primary abort counter */
-			for (i = 0; i < adapter->num_vfs; i++)
-				adapter->vfinfo[i].primary_abort_count = 0;
-
+			struct vf_data_storage *vfinfo;
+
+			rcu_read_lock();
+			vfinfo = rcu_dereference(adapter->vfinfo);
+			if (vfinfo)
+				for (i = 0; i < adapter->num_vfs; i++)
+					vfinfo[i].primary_abort_count = 0;
+			rcu_read_unlock();
 			flags2 |= IXGBE_FLAG2_AUTO_DISABLE_VF;
 		} else {
 			e_info(probe,
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
index bd397b3d7dea..b524a3a61eb6 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
@@ -874,6 +874,7 @@ void ixgbe_ipsec_vf_clear(struct ixgbe_adapter *adapter, u32 vf)
 int ixgbe_ipsec_vf_add_sa(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf)
 {
 	struct ixgbe_ipsec *ipsec = adapter->ipsec;
+	struct vf_data_storage *vfinfo;
 	struct xfrm_algo_desc *algo;
 	struct sa_mbx_msg *sam;
 	struct xfrm_state *xs;
@@ -883,7 +884,13 @@ int ixgbe_ipsec_vf_add_sa(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf)
 	int err;
 
 	sam = (struct sa_mbx_msg *)(&msgbuf[1]);
-	if (!adapter->vfinfo[vf].trusted ||
+
+	lockdep_assert_in_rcu_read_lock();
+	vfinfo = rcu_dereference(adapter->vfinfo);
+	if (!vfinfo)
+		return 0;
+
+	if (!vfinfo[vf].trusted ||
 	    !(adapter->flags2 & IXGBE_FLAG2_VF_IPSEC_ENABLED)) {
 		e_warn(drv, "VF %d attempted to add an IPsec SA\n", vf);
 		err = -EACCES;
@@ -984,11 +991,17 @@ int ixgbe_ipsec_vf_add_sa(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf)
 int ixgbe_ipsec_vf_del_sa(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf)
 {
 	struct ixgbe_ipsec *ipsec = adapter->ipsec;
+	struct vf_data_storage *vfinfo;
 	struct xfrm_state *xs;
 	u32 pfsa = msgbuf[1];
 	u16 sa_idx;
 
-	if (!adapter->vfinfo[vf].trusted) {
+	lockdep_assert_in_rcu_read_lock();
+	vfinfo = rcu_dereference(adapter->vfinfo);
+	if (!vfinfo)
+		return 0;
+
+	if (!vfinfo[vf].trusted) {
 		e_err(drv, "vf %d attempted to delete an SA\n", vf);
 		return -EPERM;
 	}
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 2646ee6f295f..6ee8c2a140c2 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -1240,20 +1240,26 @@ static void ixgbe_pf_handle_tx_hang(struct ixgbe_ring *tx_ring,
 static void ixgbe_vf_handle_tx_hang(struct ixgbe_adapter *adapter, u16 vf)
 {
 	struct ixgbe_hw *hw = &adapter->hw;
+	struct vf_data_storage *vfinfo;
 
 	if (adapter->hw.mac.type != ixgbe_mac_e610)
 		return;
 
-	e_warn(drv,
-	       "Malicious Driver Detection tx hang detected on PF %d VF %d MAC: %pM",
-	       hw->bus.func, vf, adapter->vfinfo[vf].vf_mac_addresses);
-
-	adapter->tx_hang_count[vf]++;
-	if (adapter->tx_hang_count[vf] == IXGBE_MAX_TX_VF_HANGS) {
-		ixgbe_set_vf_link_state(adapter, vf,
-					IFLA_VF_LINK_STATE_DISABLE);
-		adapter->tx_hang_count[vf] = 0;
+	rcu_read_lock();
+	vfinfo = rcu_dereference(adapter->vfinfo);
+	if (vfinfo) {
+		e_warn(drv,
+		       "Malicious Driver Detection tx hang detected on PF %d VF %d MAC: %pM",
+		       hw->bus.func, vf, vfinfo[vf].vf_mac_addresses);
+
+		adapter->tx_hang_count[vf]++;
+		if (adapter->tx_hang_count[vf] == IXGBE_MAX_TX_VF_HANGS) {
+			ixgbe_set_vf_link_state(adapter, vf,
+						IFLA_VF_LINK_STATE_DISABLE);
+			adapter->tx_hang_count[vf] = 0;
+		}
 	}
+	rcu_read_unlock();
 }
 
 static u32 ixgbe_poll_tx_icache(struct ixgbe_hw *hw, u16 queue, u16 idx)
@@ -4625,6 +4631,7 @@ static void ixgbe_configure_virtualization(struct ixgbe_adapter *adapter)
 	struct ixgbe_hw *hw = &adapter->hw;
 	u16 pool = adapter->num_rx_pools;
 	u32 reg_offset, vf_shift, vmolr;
+	struct vf_data_storage *vfinfo;
 	u32 gcr_ext, vmdctl;
 	int i;
 
@@ -4680,15 +4687,19 @@ static void ixgbe_configure_virtualization(struct ixgbe_adapter *adapter)
 
 	IXGBE_WRITE_REG(hw, IXGBE_GCR_EXT, gcr_ext);
 
-	for (i = 0; i < adapter->num_vfs; i++) {
-		/* configure spoof checking */
-		ixgbe_ndo_set_vf_spoofchk(adapter->netdev, i,
-					  adapter->vfinfo[i].spoofchk_enabled);
+	rcu_read_lock();
+	vfinfo = rcu_dereference(adapter->vfinfo);
+	if (vfinfo)
+		for (i = 0; i < adapter->num_vfs; i++) {
+			/* configure spoof checking */
+			ixgbe_ndo_set_vf_spoofchk(adapter->netdev, i,
+						  vfinfo[i].spoofchk_enabled);
 
-		/* Enable/Disable RSS query feature  */
-		ixgbe_ndo_set_vf_rss_query_en(adapter->netdev, i,
-					  adapter->vfinfo[i].rss_query_enabled);
-	}
+			/* Enable/Disable RSS query feature  */
+			ixgbe_ndo_set_vf_rss_query_en(adapter->netdev, i,
+						  vfinfo[i].rss_query_enabled);
+		}
+	rcu_read_unlock();
 }
 
 static void ixgbe_set_rx_buffer_len(struct ixgbe_adapter *adapter)
@@ -6093,35 +6104,40 @@ static void ixgbe_check_media_subtask(struct ixgbe_adapter *adapter)
 static void ixgbe_clear_vf_stats_counters(struct ixgbe_adapter *adapter)
 {
 	struct ixgbe_hw *hw = &adapter->hw;
+	struct vf_data_storage *vfinfo;
 	int i;
 
-	for (i = 0; i < adapter->num_vfs; i++) {
-		adapter->vfinfo[i].last_vfstats.gprc =
-			IXGBE_READ_REG(hw, IXGBE_PVFGPRC(i));
-		adapter->vfinfo[i].saved_rst_vfstats.gprc +=
-			adapter->vfinfo[i].vfstats.gprc;
-		adapter->vfinfo[i].vfstats.gprc = 0;
-		adapter->vfinfo[i].last_vfstats.gptc =
-			IXGBE_READ_REG(hw, IXGBE_PVFGPTC(i));
-		adapter->vfinfo[i].saved_rst_vfstats.gptc +=
-			adapter->vfinfo[i].vfstats.gptc;
-		adapter->vfinfo[i].vfstats.gptc = 0;
-		adapter->vfinfo[i].last_vfstats.gorc =
-			IXGBE_READ_REG(hw, IXGBE_PVFGORC_LSB(i));
-		adapter->vfinfo[i].saved_rst_vfstats.gorc +=
-			adapter->vfinfo[i].vfstats.gorc;
-		adapter->vfinfo[i].vfstats.gorc = 0;
-		adapter->vfinfo[i].last_vfstats.gotc =
-			IXGBE_READ_REG(hw, IXGBE_PVFGOTC_LSB(i));
-		adapter->vfinfo[i].saved_rst_vfstats.gotc +=
-			adapter->vfinfo[i].vfstats.gotc;
-		adapter->vfinfo[i].vfstats.gotc = 0;
-		adapter->vfinfo[i].last_vfstats.mprc =
-			IXGBE_READ_REG(hw, IXGBE_PVFMPRC(i));
-		adapter->vfinfo[i].saved_rst_vfstats.mprc +=
-			adapter->vfinfo[i].vfstats.mprc;
-		adapter->vfinfo[i].vfstats.mprc = 0;
-	}
+	rcu_read_lock();
+	vfinfo = rcu_dereference(adapter->vfinfo);
+	if (vfinfo)
+		for (i = 0; i < adapter->num_vfs; i++) {
+			vfinfo[i].last_vfstats.gprc =
+				IXGBE_READ_REG(hw, IXGBE_PVFGPRC(i));
+			vfinfo[i].saved_rst_vfstats.gprc +=
+				vfinfo[i].vfstats.gprc;
+			vfinfo[i].vfstats.gprc = 0;
+			vfinfo[i].last_vfstats.gptc =
+				IXGBE_READ_REG(hw, IXGBE_PVFGPTC(i));
+			vfinfo[i].saved_rst_vfstats.gptc +=
+				vfinfo[i].vfstats.gptc;
+			vfinfo[i].vfstats.gptc = 0;
+			vfinfo[i].last_vfstats.gorc =
+				IXGBE_READ_REG(hw, IXGBE_PVFGORC_LSB(i));
+			vfinfo[i].saved_rst_vfstats.gorc +=
+				vfinfo[i].vfstats.gorc;
+			vfinfo[i].vfstats.gorc = 0;
+			vfinfo[i].last_vfstats.gotc =
+				IXGBE_READ_REG(hw, IXGBE_PVFGOTC_LSB(i));
+			vfinfo[i].saved_rst_vfstats.gotc +=
+				vfinfo[i].vfstats.gotc;
+			vfinfo[i].vfstats.gotc = 0;
+			vfinfo[i].last_vfstats.mprc =
+				IXGBE_READ_REG(hw, IXGBE_PVFMPRC(i));
+			vfinfo[i].saved_rst_vfstats.mprc +=
+				vfinfo[i].vfstats.mprc;
+			vfinfo[i].vfstats.mprc = 0;
+		}
+	rcu_read_unlock();
 }
 
 static void ixgbe_setup_gpie(struct ixgbe_adapter *adapter)
@@ -6729,15 +6745,22 @@ void ixgbe_down(struct ixgbe_adapter *adapter)
 	timer_delete_sync(&adapter->service_timer);
 
 	if (adapter->num_vfs) {
+		struct vf_data_storage *vfinfo;
+
 		/* Clear EITR Select mapping */
 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EITRSEL, 0);
 
+		rcu_read_lock();
+		vfinfo = rcu_dereference(adapter->vfinfo);
 		/* Mark all the VFs as inactive */
-		for (i = 0 ; i < adapter->num_vfs; i++)
-			adapter->vfinfo[i].clear_to_send = false;
+		if (vfinfo) {
+			for (i = 0 ; i < adapter->num_vfs; i++)
+				vfinfo[i].clear_to_send = false;
 
-		/* update setting rx tx for all active vfs */
-		ixgbe_set_all_vfs(adapter);
+			/* update setting rx tx for all active vfs */
+			ixgbe_set_all_vfs(adapter);
+		}
+		rcu_read_unlock();
 	}
 
 	/* disable transmits in the hardware now that interrupts are off */
@@ -7001,9 +7024,6 @@ static int ixgbe_sw_init(struct ixgbe_adapter *adapter,
 	/* n-tuple support exists, always init our spinlock */
 	spin_lock_init(&adapter->fdir_perfect_lock);
 
-	/* init spinlock to avoid concurrency of VF resources */
-	spin_lock_init(&adapter->vfs_lock);
-
 #ifdef CONFIG_IXGBE_DCB
 	ixgbe_init_dcb(adapter);
 #endif
@@ -7905,25 +7925,31 @@ void ixgbe_update_stats(struct ixgbe_adapter *adapter)
 	 * crazy values.
 	 */
 	if (!test_bit(__IXGBE_RESETTING, &adapter->state)) {
-		for (i = 0; i < adapter->num_vfs; i++) {
-			UPDATE_VF_COUNTER_32bit(IXGBE_PVFGPRC(i),
-						adapter->vfinfo[i].last_vfstats.gprc,
-						adapter->vfinfo[i].vfstats.gprc);
-			UPDATE_VF_COUNTER_32bit(IXGBE_PVFGPTC(i),
-						adapter->vfinfo[i].last_vfstats.gptc,
-						adapter->vfinfo[i].vfstats.gptc);
-			UPDATE_VF_COUNTER_36bit(IXGBE_PVFGORC_LSB(i),
-						IXGBE_PVFGORC_MSB(i),
-						adapter->vfinfo[i].last_vfstats.gorc,
-						adapter->vfinfo[i].vfstats.gorc);
-			UPDATE_VF_COUNTER_36bit(IXGBE_PVFGOTC_LSB(i),
-						IXGBE_PVFGOTC_MSB(i),
-						adapter->vfinfo[i].last_vfstats.gotc,
-						adapter->vfinfo[i].vfstats.gotc);
-			UPDATE_VF_COUNTER_32bit(IXGBE_PVFMPRC(i),
-						adapter->vfinfo[i].last_vfstats.mprc,
-						adapter->vfinfo[i].vfstats.mprc);
-		}
+		struct vf_data_storage *vfinfo;
+
+		rcu_read_lock();
+		vfinfo = rcu_dereference(adapter->vfinfo);
+		if (vfinfo)
+			for (i = 0; i < adapter->num_vfs; i++) {
+				UPDATE_VF_COUNTER_32bit(IXGBE_PVFGPRC(i),
+							vfinfo[i].last_vfstats.gprc,
+							vfinfo[i].vfstats.gprc);
+				UPDATE_VF_COUNTER_32bit(IXGBE_PVFGPTC(i),
+							vfinfo[i].last_vfstats.gptc,
+							vfinfo[i].vfstats.gptc);
+				UPDATE_VF_COUNTER_36bit(IXGBE_PVFGORC_LSB(i),
+							IXGBE_PVFGORC_MSB(i),
+							vfinfo[i].last_vfstats.gorc,
+							vfinfo[i].vfstats.gorc);
+				UPDATE_VF_COUNTER_36bit(IXGBE_PVFGOTC_LSB(i),
+							IXGBE_PVFGOTC_MSB(i),
+							vfinfo[i].last_vfstats.gotc,
+							vfinfo[i].vfstats.gotc);
+				UPDATE_VF_COUNTER_32bit(IXGBE_PVFMPRC(i),
+							vfinfo[i].last_vfstats.mprc,
+							vfinfo[i].vfstats.mprc);
+			}
+		rcu_read_unlock();
 	}
 }
 
@@ -8267,22 +8293,27 @@ static void ixgbe_watchdog_flush_tx(struct ixgbe_adapter *adapter)
 static void ixgbe_bad_vf_abort(struct ixgbe_adapter *adapter, u32 vf)
 {
 	struct ixgbe_hw *hw = &adapter->hw;
+	struct vf_data_storage *vfinfo;
 
-	if (adapter->hw.mac.type == ixgbe_mac_82599EB &&
+	rcu_read_lock();
+	vfinfo = rcu_dereference(adapter->vfinfo);
+	if (vfinfo &&
+	    adapter->hw.mac.type == ixgbe_mac_82599EB &&
 	    adapter->flags2 & IXGBE_FLAG2_AUTO_DISABLE_VF) {
-		adapter->vfinfo[vf].primary_abort_count++;
-		if (adapter->vfinfo[vf].primary_abort_count ==
+		vfinfo[vf].primary_abort_count++;
+		if (vfinfo[vf].primary_abort_count ==
 		    IXGBE_PRIMARY_ABORT_LIMIT) {
 			ixgbe_set_vf_link_state(adapter, vf,
 						IFLA_VF_LINK_STATE_DISABLE);
-			adapter->vfinfo[vf].primary_abort_count = 0;
+			vfinfo[vf].primary_abort_count = 0;
 
 			e_info(drv,
 			       "Malicious Driver Detection event detected on PF %d VF %d MAC: %pM mdd-disable-vf=on",
 			       hw->bus.func, vf,
-			       adapter->vfinfo[vf].vf_mac_addresses);
+			       vfinfo[vf].vf_mac_addresses);
 		}
 	}
+	rcu_read_unlock();
 }
 
 static void ixgbe_check_for_bad_vf(struct ixgbe_adapter *adapter)
@@ -8309,9 +8340,15 @@ static void ixgbe_check_for_bad_vf(struct ixgbe_adapter *adapter)
 
 	/* check status reg for all VFs owned by this PF */
 	for (vf = 0; vf < adapter->num_vfs; ++vf) {
-		struct pci_dev *vfdev = adapter->vfinfo[vf].vfdev;
+		struct vf_data_storage *vfinfo;
+		struct pci_dev *vfdev = NULL;
 		u16 status_reg;
 
+		rcu_read_lock();
+		vfinfo = rcu_dereference(adapter->vfinfo);
+		if (vfinfo)
+			vfdev = vfinfo[vf].vfdev;
+		rcu_read_unlock();
 		if (!vfdev)
 			continue;
 		pci_read_config_word(vfdev, PCI_STATUS, &status_reg);
@@ -9744,17 +9781,23 @@ static int ixgbe_ndo_get_vf_stats(struct net_device *netdev, int vf,
 				  struct ifla_vf_stats *vf_stats)
 {
 	struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
+	struct vf_data_storage *vfinfo;
 
 	if (vf < 0 || vf >= adapter->num_vfs)
 		return -EINVAL;
 
-	vf_stats->rx_packets = adapter->vfinfo[vf].vfstats.gprc;
-	vf_stats->rx_bytes   = adapter->vfinfo[vf].vfstats.gorc;
-	vf_stats->tx_packets = adapter->vfinfo[vf].vfstats.gptc;
-	vf_stats->tx_bytes   = adapter->vfinfo[vf].vfstats.gotc;
-	vf_stats->multicast  = adapter->vfinfo[vf].vfstats.mprc;
+	rcu_read_lock();
+	vfinfo = rcu_dereference(adapter->vfinfo);
+	if (vfinfo) {
+		vf_stats->rx_packets = vfinfo[vf].vfstats.gprc;
+		vf_stats->rx_bytes   = vfinfo[vf].vfstats.gorc;
+		vf_stats->tx_packets = vfinfo[vf].vfstats.gptc;
+		vf_stats->tx_bytes   = vfinfo[vf].vfstats.gotc;
+		vf_stats->multicast  = vfinfo[vf].vfstats.mprc;
+	}
+	rcu_read_unlock();
 
-	return 0;
+	return vfinfo ? 0 : -EINVAL;
 }
 
 #ifdef CONFIG_IXGBE_DCB
@@ -10071,20 +10114,26 @@ static int handle_redirect_action(struct ixgbe_adapter *adapter, int ifindex,
 {
 	struct ixgbe_ring_feature *vmdq = &adapter->ring_feature[RING_F_VMDQ];
 	unsigned int num_vfs = adapter->num_vfs, vf;
+	struct vf_data_storage *vfinfo;
 	struct netdev_nested_priv priv;
 	struct upper_walk_data data;
 	struct net_device *upper;
 
 	/* redirect to a SRIOV VF */
-	for (vf = 0; vf < num_vfs; ++vf) {
-		upper = pci_get_drvdata(adapter->vfinfo[vf].vfdev);
-		if (upper->ifindex == ifindex) {
-			*queue = vf * __ALIGN_MASK(1, ~vmdq->mask);
-			*action = vf + 1;
-			*action <<= ETHTOOL_RX_FLOW_SPEC_RING_VF_OFF;
-			return 0;
+	rcu_read_lock();
+	vfinfo = rcu_dereference(adapter->vfinfo);
+	if (vfinfo)
+		for (vf = 0; vf < num_vfs; ++vf) {
+			upper = pci_get_drvdata(vfinfo[vf].vfdev);
+			if (upper->ifindex == ifindex) {
+				*queue = vf * __ALIGN_MASK(1, ~vmdq->mask);
+				*action = vf + 1;
+				*action <<= ETHTOOL_RX_FLOW_SPEC_RING_VF_OFF;
+				rcu_read_unlock();
+				return 0;
+			}
 		}
-	}
+	rcu_read_unlock();
 
 	/* redirect to a offloaded macvlan netdev */
 	data.adapter = adapter;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
index 431d77da15a5..80f22a8e7af4 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
@@ -44,7 +44,7 @@ static inline void ixgbe_alloc_vf_macvlans(struct ixgbe_adapter *adapter,
 			mv_list[i].free = true;
 			list_add(&mv_list[i].l, &adapter->vf_mvs.l);
 		}
-		adapter->mv_list = mv_list;
+		rcu_assign_pointer(adapter->mv_list, mv_list);
 	}
 }
 
@@ -52,6 +52,7 @@ static int __ixgbe_enable_sriov(struct ixgbe_adapter *adapter,
 				unsigned int num_vfs)
 {
 	struct ixgbe_hw *hw = &adapter->hw;
+	struct vf_data_storage *vfinfo;
 	int i;
 
 	if (adapter->xdp_prog) {
@@ -64,14 +65,11 @@ static int __ixgbe_enable_sriov(struct ixgbe_adapter *adapter,
 			  IXGBE_FLAG_VMDQ_ENABLED;
 
 	/* Allocate memory for per VF control structures */
-	adapter->vfinfo = kzalloc_objs(struct vf_data_storage, num_vfs);
-	if (!adapter->vfinfo)
+	vfinfo = kzalloc_objs(struct vf_data_storage, num_vfs);
+	if (!vfinfo)
 		return -ENOMEM;
 
-	adapter->num_vfs = num_vfs;
-
 	ixgbe_alloc_vf_macvlans(adapter, num_vfs);
-	adapter->ring_feature[RING_F_VMDQ].offset = num_vfs;
 
 	/* Initialize default switching mode VEB */
 	IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN);
@@ -95,23 +93,27 @@ static int __ixgbe_enable_sriov(struct ixgbe_adapter *adapter,
 
 	for (i = 0; i < num_vfs; i++) {
 		/* enable spoof checking for all VFs */
-		adapter->vfinfo[i].spoofchk_enabled = true;
-		adapter->vfinfo[i].link_enable = true;
+		vfinfo[i].spoofchk_enabled = true;
+		vfinfo[i].link_enable = true;
 
 		/* We support VF RSS querying only for 82599 and x540
 		 * devices at the moment. These devices share RSS
 		 * indirection table and RSS hash key with PF therefore
 		 * we want to disable the querying by default.
 		 */
-		adapter->vfinfo[i].rss_query_enabled = false;
+		vfinfo[i].rss_query_enabled = false;
 
 		/* Untrust all VFs */
-		adapter->vfinfo[i].trusted = false;
+		vfinfo[i].trusted = false;
 
 		/* set the default xcast mode */
-		adapter->vfinfo[i].xcast_mode = IXGBEVF_XCAST_MODE_NONE;
+		vfinfo[i].xcast_mode = IXGBEVF_XCAST_MODE_NONE;
 	}
 
+	rcu_assign_pointer(adapter->vfinfo, vfinfo);
+	adapter->num_vfs = num_vfs;
+	adapter->ring_feature[RING_F_VMDQ].offset = num_vfs;
+
 	e_info(probe, "SR-IOV enabled with %d VFs\n", num_vfs);
 	return 0;
 }
@@ -123,6 +125,7 @@ static int __ixgbe_enable_sriov(struct ixgbe_adapter *adapter,
 static void ixgbe_get_vfs(struct ixgbe_adapter *adapter)
 {
 	struct pci_dev *pdev = adapter->pdev;
+	struct vf_data_storage *vfinfo;
 	u16 vendor = pdev->vendor;
 	struct pci_dev *vfdev;
 	int vf = 0;
@@ -134,18 +137,23 @@ static void ixgbe_get_vfs(struct ixgbe_adapter *adapter)
 		return;
 	pci_read_config_word(pdev, pos + PCI_SRIOV_VF_DID, &vf_id);
 
-	vfdev = pci_get_device(vendor, vf_id, NULL);
-	for (; vfdev; vfdev = pci_get_device(vendor, vf_id, vfdev)) {
-		if (!vfdev->is_virtfn)
-			continue;
-		if (vfdev->physfn != pdev)
-			continue;
-		if (vf >= adapter->num_vfs)
-			continue;
-		pci_dev_get(vfdev);
-		adapter->vfinfo[vf].vfdev = vfdev;
-		++vf;
+	rcu_read_lock();
+	vfinfo = rcu_dereference(adapter->vfinfo);
+	if (vfinfo) {
+		vfdev = pci_get_device(vendor, vf_id, NULL);
+		for (; vfdev; vfdev = pci_get_device(vendor, vf_id, vfdev)) {
+			if (!vfdev->is_virtfn)
+				continue;
+			if (vfdev->physfn != pdev)
+				continue;
+			if (vf >= adapter->num_vfs)
+				continue;
+			pci_dev_get(vfdev);
+			vfinfo[vf].vfdev = vfdev;
+			++vf;
+		}
 	}
+	rcu_read_unlock();
 }
 
 /* Note this function is called when the user wants to enable SR-IOV
@@ -206,31 +214,28 @@ int ixgbe_disable_sriov(struct ixgbe_adapter *adapter)
 {
 	unsigned int num_vfs = adapter->num_vfs, vf;
 	struct ixgbe_hw *hw = &adapter->hw;
-	unsigned long flags;
+	struct vf_data_storage *vfinfo;
+	struct vf_macvlans *mv_list;
 	int rss;
 
-	spin_lock_irqsave(&adapter->vfs_lock, flags);
-	/* set num VFs to 0 to prevent access to vfinfo */
+	/* set num VFs to 0 so readers bail out early */
 	adapter->num_vfs = 0;
-	spin_unlock_irqrestore(&adapter->vfs_lock, flags);
+
+	vfinfo = rcu_replace_pointer(adapter->vfinfo, NULL, 1);
+	mv_list = rcu_replace_pointer(adapter->mv_list, NULL, 1);
 
 	/* put the reference to all of the vf devices */
 	for (vf = 0; vf < num_vfs; ++vf) {
-		struct pci_dev *vfdev = adapter->vfinfo[vf].vfdev;
+		struct pci_dev *vfdev = vfinfo[vf].vfdev;
 
 		if (!vfdev)
 			continue;
-		adapter->vfinfo[vf].vfdev = NULL;
+		vfinfo[vf].vfdev = NULL;
 		pci_dev_put(vfdev);
 	}
 
-	/* free VF control structures */
-	kfree(adapter->vfinfo);
-	adapter->vfinfo = NULL;
-
-	/* free macvlan list */
-	kfree(adapter->mv_list);
-	adapter->mv_list = NULL;
+	kfree_rcu(vfinfo, rcu_head);
+	kfree_rcu(mv_list, rcu_head);
 
 	/* if SR-IOV is already disabled then there is nothing to do */
 	if (!(adapter->flags & IXGBE_FLAG_SRIOV_ENABLED))
@@ -368,8 +373,8 @@ static int ixgbe_set_vf_multicasts(struct ixgbe_adapter *adapter,
 {
 	int entries = FIELD_GET(IXGBE_VT_MSGINFO_MASK, msgbuf[0]);
 	u16 *hash_list = (u16 *)&msgbuf[1];
-	struct vf_data_storage *vfinfo = &adapter->vfinfo[vf];
 	struct ixgbe_hw *hw = &adapter->hw;
+	struct vf_data_storage *vfinfo;
 	int i;
 	u32 vector_bit;
 	u32 vector_reg;
@@ -379,28 +384,34 @@ static int ixgbe_set_vf_multicasts(struct ixgbe_adapter *adapter,
 	/* only so many hash values supported */
 	entries = min(entries, IXGBE_MAX_VF_MC_ENTRIES);
 
+	lockdep_assert_in_rcu_read_lock();
+	vfinfo = rcu_dereference(adapter->vfinfo);
+	if (!vfinfo)
+		return 0;
+
 	/*
 	 * salt away the number of multi cast addresses assigned
 	 * to this VF for later use to restore when the PF multi cast
 	 * list changes
 	 */
-	vfinfo->num_vf_mc_hashes = entries;
+	vfinfo[vf].num_vf_mc_hashes = entries;
 
 	/*
 	 * VFs are limited to using the MTA hash table for their multicast
 	 * addresses
 	 */
 	for (i = 0; i < entries; i++) {
-		vfinfo->vf_mc_hashes[i] = hash_list[i];
+		vfinfo[vf].vf_mc_hashes[i] = hash_list[i];
 	}
 
-	for (i = 0; i < vfinfo->num_vf_mc_hashes; i++) {
-		vector_reg = (vfinfo->vf_mc_hashes[i] >> 5) & 0x7F;
-		vector_bit = vfinfo->vf_mc_hashes[i] & 0x1F;
+	for (i = 0; i < vfinfo[vf].num_vf_mc_hashes; i++) {
+		vector_reg = (vfinfo[vf].vf_mc_hashes[i] >> 5) & 0x7F;
+		vector_bit = vfinfo[vf].vf_mc_hashes[i] & 0x1F;
 		mta_reg = IXGBE_READ_REG(hw, IXGBE_MTA(vector_reg));
 		mta_reg |= BIT(vector_bit);
 		IXGBE_WRITE_REG(hw, IXGBE_MTA(vector_reg), mta_reg);
 	}
+
 	vmolr |= IXGBE_VMOLR_ROMPE;
 	IXGBE_WRITE_REG(hw, IXGBE_VMOLR(vf), vmolr);
 
@@ -410,32 +421,39 @@ static int ixgbe_set_vf_multicasts(struct ixgbe_adapter *adapter,
 #ifdef CONFIG_PCI_IOV
 void ixgbe_restore_vf_multicasts(struct ixgbe_adapter *adapter)
 {
-	struct ixgbe_hw *hw = &adapter->hw;
 	struct vf_data_storage *vfinfo;
+	struct ixgbe_hw *hw = &adapter->hw;
 	int i, j;
 	u32 vector_bit;
 	u32 vector_reg;
 	u32 mta_reg;
 
+	rcu_read_lock();
+	vfinfo = rcu_dereference(adapter->vfinfo);
+	if (!vfinfo)
+		goto no_vfs;
+
 	for (i = 0; i < adapter->num_vfs; i++) {
 		u32 vmolr = IXGBE_READ_REG(hw, IXGBE_VMOLR(i));
-		vfinfo = &adapter->vfinfo[i];
-		for (j = 0; j < vfinfo->num_vf_mc_hashes; j++) {
+		for (j = 0; j < vfinfo[i].num_vf_mc_hashes; j++) {
 			hw->addr_ctrl.mta_in_use++;
-			vector_reg = (vfinfo->vf_mc_hashes[j] >> 5) & 0x7F;
-			vector_bit = vfinfo->vf_mc_hashes[j] & 0x1F;
+			vector_reg = (vfinfo[i].vf_mc_hashes[j] >> 5) & 0x7F;
+			vector_bit = vfinfo[i].vf_mc_hashes[j] & 0x1F;
 			mta_reg = IXGBE_READ_REG(hw, IXGBE_MTA(vector_reg));
 			mta_reg |= BIT(vector_bit);
 			IXGBE_WRITE_REG(hw, IXGBE_MTA(vector_reg), mta_reg);
 		}
 
-		if (vfinfo->num_vf_mc_hashes)
+		if (vfinfo[i].num_vf_mc_hashes)
 			vmolr |= IXGBE_VMOLR_ROMPE;
 		else
 			vmolr &= ~IXGBE_VMOLR_ROMPE;
 		IXGBE_WRITE_REG(hw, IXGBE_VMOLR(i), vmolr);
 	}
 
+no_vfs:
+	rcu_read_unlock();
+
 	/* Restore any VF macvlans */
 	ixgbe_full_sync_mac_table(adapter);
 }
@@ -493,7 +511,9 @@ static int ixgbe_set_vf_lpe(struct ixgbe_adapter *adapter, u32 max_frame, u32 vf
 	 */
 	if (adapter->hw.mac.type == ixgbe_mac_82599EB) {
 		struct net_device *dev = adapter->netdev;
+		unsigned int vf_api = ixgbe_mbox_api_10;
 		int pf_max_frame = dev->mtu + ETH_HLEN;
+		struct vf_data_storage *vfinfo;
 		u32 reg_offset, vf_shift, vfre;
 		int err = 0;
 
@@ -503,7 +523,12 @@ static int ixgbe_set_vf_lpe(struct ixgbe_adapter *adapter, u32 max_frame, u32 vf
 					     IXGBE_FCOE_JUMBO_FRAME_SIZE);
 
 #endif /* CONFIG_FCOE */
-		switch (adapter->vfinfo[vf].vf_api) {
+		lockdep_assert_in_rcu_read_lock();
+		vfinfo = rcu_dereference(adapter->vfinfo);
+		if (vfinfo)
+			vf_api = vfinfo[vf].vf_api;
+
+		switch (vf_api) {
 		case ixgbe_mbox_api_11:
 		case ixgbe_mbox_api_12:
 		case ixgbe_mbox_api_13:
@@ -643,10 +668,16 @@ static void ixgbe_clear_vf_vlans(struct ixgbe_adapter *adapter, u32 vf)
 static int ixgbe_set_vf_macvlan(struct ixgbe_adapter *adapter,
 				int vf, int index, unsigned char *mac_addr)
 {
-	struct vf_macvlans *entry;
+	struct vf_macvlans *mv_list, *entry;
 	bool found = false;
 	int retval = 0;
 
+	lockdep_assert_in_rcu_read_lock();
+	/* vf_mvs entries point into the mv_list array */
+	mv_list = rcu_dereference(adapter->mv_list);
+	if (!mv_list)
+		return 0;
+
 	if (index <= 1) {
 		list_for_each_entry(entry, &adapter->vf_mvs.l, l) {
 			if (entry->vf == vf) {
@@ -700,7 +731,7 @@ static inline void ixgbe_vf_reset_event(struct ixgbe_adapter *adapter, u32 vf)
 {
 	struct ixgbe_hw *hw = &adapter->hw;
 	struct ixgbe_ring_feature *vmdq = &adapter->ring_feature[RING_F_VMDQ];
-	struct vf_data_storage *vfinfo = &adapter->vfinfo[vf];
+	struct vf_data_storage *vfinfo;
 	u32 q_per_pool = __ALIGN_MASK(1, ~vmdq->mask);
 	u8 num_tcs = adapter->hw_tcs;
 	u32 reg_val;
@@ -709,31 +740,36 @@ static inline void ixgbe_vf_reset_event(struct ixgbe_adapter *adapter, u32 vf)
 	/* remove VLAN filters belonging to this VF */
 	ixgbe_clear_vf_vlans(adapter, vf);
 
+	lockdep_assert_in_rcu_read_lock();
+	vfinfo = rcu_dereference(adapter->vfinfo);
+	if (!vfinfo)
+		return;
+
 	/* add back PF assigned VLAN or VLAN 0 */
-	ixgbe_set_vf_vlan(adapter, true, vfinfo->pf_vlan, vf);
+	ixgbe_set_vf_vlan(adapter, true, vfinfo[vf].pf_vlan, vf);
 
 	/* reset offloads to defaults */
-	ixgbe_set_vmolr(hw, vf, !vfinfo->pf_vlan);
+	ixgbe_set_vmolr(hw, vf, !vfinfo[vf].pf_vlan);
 
 	/* set outgoing tags for VFs */
-	if (!vfinfo->pf_vlan && !vfinfo->pf_qos && !num_tcs) {
+	if (!vfinfo[vf].pf_vlan && !vfinfo[vf].pf_qos && !num_tcs) {
 		ixgbe_clear_vmvir(adapter, vf);
 	} else {
-		if (vfinfo->pf_qos || !num_tcs)
-			ixgbe_set_vmvir(adapter, vfinfo->pf_vlan,
-					vfinfo->pf_qos, vf);
+		if (vfinfo[vf].pf_qos || !num_tcs)
+			ixgbe_set_vmvir(adapter, vfinfo[vf].pf_vlan,
+					vfinfo[vf].pf_qos, vf);
 		else
-			ixgbe_set_vmvir(adapter, vfinfo->pf_vlan,
+			ixgbe_set_vmvir(adapter, vfinfo[vf].pf_vlan,
 					adapter->default_up, vf);
 
-		if (vfinfo->spoofchk_enabled) {
+		if (vfinfo[vf].spoofchk_enabled) {
 			hw->mac.ops.set_vlan_anti_spoofing(hw, true, vf);
 			hw->mac.ops.set_mac_anti_spoofing(hw, true, vf);
 		}
 	}
 
 	/* reset multicast table array for vf */
-	adapter->vfinfo[vf].num_vf_mc_hashes = 0;
+	vfinfo[vf].num_vf_mc_hashes = 0;
 
 	/* clear any ipsec table info */
 	ixgbe_ipsec_vf_clear(adapter, vf);
@@ -741,11 +777,11 @@ static inline void ixgbe_vf_reset_event(struct ixgbe_adapter *adapter, u32 vf)
 	/* Flush and reset the mta with the new values */
 	ixgbe_set_rx_mode(adapter->netdev);
 
-	ixgbe_del_mac_filter(adapter, adapter->vfinfo[vf].vf_mac_addresses, vf);
+	ixgbe_del_mac_filter(adapter, vfinfo[vf].vf_mac_addresses, vf);
 	ixgbe_set_vf_macvlan(adapter, vf, 0, NULL);
 
 	/* reset VF api back to unknown */
-	adapter->vfinfo[vf].vf_api = ixgbe_mbox_api_10;
+	vfinfo[vf].vf_api = ixgbe_mbox_api_10;
 
 	/* Restart each queue for given VF */
 	for (queue = 0; queue < q_per_pool; queue++) {
@@ -780,16 +816,25 @@ static void ixgbe_vf_clear_mbx(struct ixgbe_adapter *adapter, u32 vf)
 static int ixgbe_set_vf_mac(struct ixgbe_adapter *adapter,
 			    int vf, unsigned char *mac_addr)
 {
+	struct vf_data_storage *vfinfo;
 	int retval;
 
-	ixgbe_del_mac_filter(adapter, adapter->vfinfo[vf].vf_mac_addresses, vf);
+	rcu_read_lock();
+	vfinfo = rcu_dereference(adapter->vfinfo);
+	if (!vfinfo) {
+		rcu_read_unlock();
+		return -EINVAL;
+	}
+
+	ixgbe_del_mac_filter(adapter, vfinfo[vf].vf_mac_addresses, vf);
 	retval = ixgbe_add_mac_filter(adapter, mac_addr, vf);
 	if (retval >= 0)
-		memcpy(adapter->vfinfo[vf].vf_mac_addresses, mac_addr,
+		memcpy(vfinfo[vf].vf_mac_addresses, mac_addr,
 		       ETH_ALEN);
 	else
-		eth_zero_addr(adapter->vfinfo[vf].vf_mac_addresses);
+		eth_zero_addr(vfinfo[vf].vf_mac_addresses);
 
+	rcu_read_unlock();
 	return retval;
 }
 
@@ -797,12 +842,17 @@ int ixgbe_vf_configuration(struct pci_dev *pdev, unsigned int event_mask)
 {
 	struct ixgbe_adapter *adapter = pci_get_drvdata(pdev);
 	unsigned int vfn = (event_mask & 0x3f);
+	struct vf_data_storage *vfinfo;
 
 	bool enable = ((event_mask & 0x10000000U) != 0);
 
-	if (enable)
-		eth_zero_addr(adapter->vfinfo[vfn].vf_mac_addresses);
-
+	if (enable) {
+		rcu_read_lock();
+		vfinfo = rcu_dereference(adapter->vfinfo);
+		if (vfinfo)
+			eth_zero_addr(vfinfo[vfn].vf_mac_addresses);
+		rcu_read_unlock();
+	}
 	return 0;
 }
 
@@ -838,6 +888,7 @@ static void ixgbe_set_vf_rx_tx(struct ixgbe_adapter *adapter, int vf)
 {
 	u32 reg_cur_tx, reg_cur_rx, reg_req_tx, reg_req_rx;
 	struct ixgbe_hw *hw = &adapter->hw;
+	struct vf_data_storage *vfinfo;
 	u32 reg_offset, vf_shift;
 
 	vf_shift = vf % 32;
@@ -846,7 +897,9 @@ static void ixgbe_set_vf_rx_tx(struct ixgbe_adapter *adapter, int vf)
 	reg_cur_tx = IXGBE_READ_REG(hw, IXGBE_VFTE(reg_offset));
 	reg_cur_rx = IXGBE_READ_REG(hw, IXGBE_VFRE(reg_offset));
 
-	if (adapter->vfinfo[vf].link_enable) {
+	lockdep_assert_in_rcu_read_lock();
+	vfinfo = rcu_dereference(adapter->vfinfo);
+	if (vfinfo && vfinfo[vf].link_enable) {
 		reg_req_tx = reg_cur_tx | 1 << vf_shift;
 		reg_req_rx = reg_cur_rx | 1 << vf_shift;
 	} else {
@@ -882,11 +935,12 @@ static int ixgbe_vf_reset_msg(struct ixgbe_adapter *adapter, u32 vf)
 {
 	struct ixgbe_ring_feature *vmdq = &adapter->ring_feature[RING_F_VMDQ];
 	struct ixgbe_hw *hw = &adapter->hw;
-	unsigned char *vf_mac = adapter->vfinfo[vf].vf_mac_addresses;
+	struct vf_data_storage *vfinfo;
 	u32 reg, reg_offset, vf_shift;
 	u32 msgbuf[4] = {0, 0, 0, 0};
 	u8 *addr = (u8 *)(&msgbuf[1]);
 	u32 q_per_pool = __ALIGN_MASK(1, ~vmdq->mask);
+	unsigned char *vf_mac;
 	int i;
 
 	e_info(probe, "VF Reset msg received from vf %d\n", vf);
@@ -896,6 +950,13 @@ static int ixgbe_vf_reset_msg(struct ixgbe_adapter *adapter, u32 vf)
 
 	ixgbe_vf_clear_mbx(adapter, vf);
 
+	lockdep_assert_in_rcu_read_lock();
+	vfinfo = rcu_dereference(adapter->vfinfo);
+	if (!vfinfo)
+		return 0;
+
+	vf_mac = vfinfo[vf].vf_mac_addresses;
+
 	/* set vf mac address */
 	if (!is_zero_ether_addr(vf_mac))
 		ixgbe_set_vf_mac(adapter, vf, vf_mac);
@@ -905,7 +966,7 @@ static int ixgbe_vf_reset_msg(struct ixgbe_adapter *adapter, u32 vf)
 
 	/* force drop enable for all VF Rx queues */
 	reg = IXGBE_QDE_ENABLE;
-	if (adapter->vfinfo[vf].pf_vlan)
+	if (vfinfo[vf].pf_vlan)
 		reg |= IXGBE_QDE_HIDE_VLAN;
 
 	ixgbe_write_qde(adapter, vf, reg);
@@ -913,7 +974,7 @@ static int ixgbe_vf_reset_msg(struct ixgbe_adapter *adapter, u32 vf)
 	ixgbe_set_vf_rx_tx(adapter, vf);
 
 	/* enable VF mailbox for further messages */
-	adapter->vfinfo[vf].clear_to_send = true;
+	vfinfo[vf].clear_to_send = true;
 
 	/* Enable counting of spoofed packets in the SSVPC register */
 	reg = IXGBE_READ_REG(hw, IXGBE_VMECM(reg_offset));
@@ -931,7 +992,7 @@ static int ixgbe_vf_reset_msg(struct ixgbe_adapter *adapter, u32 vf)
 
 	/* reply to reset with ack and vf mac address */
 	msgbuf[0] = IXGBE_VF_RESET;
-	if (!is_zero_ether_addr(vf_mac) && adapter->vfinfo[vf].pf_set_mac) {
+	if (!is_zero_ether_addr(vf_mac) && vfinfo[vf].pf_set_mac) {
 		msgbuf[0] |= IXGBE_VT_MSGTYPE_ACK;
 		memcpy(addr, vf_mac, ETH_ALEN);
 	} else {
@@ -952,14 +1013,20 @@ static int ixgbe_set_vf_mac_addr(struct ixgbe_adapter *adapter,
 				 u32 *msgbuf, u32 vf)
 {
 	u8 *new_mac = ((u8 *)(&msgbuf[1]));
+	struct vf_data_storage *vfinfo;
 
 	if (!is_valid_ether_addr(new_mac)) {
 		e_warn(drv, "VF %d attempted to set invalid mac\n", vf);
 		return -1;
 	}
 
-	if (adapter->vfinfo[vf].pf_set_mac && !adapter->vfinfo[vf].trusted &&
-	    !ether_addr_equal(adapter->vfinfo[vf].vf_mac_addresses, new_mac)) {
+	lockdep_assert_in_rcu_read_lock();
+	vfinfo = rcu_dereference(adapter->vfinfo);
+	if (!vfinfo)
+		return 0;
+
+	if (vfinfo[vf].pf_set_mac && !vfinfo[vf].trusted &&
+	    !ether_addr_equal(vfinfo[vf].vf_mac_addresses, new_mac)) {
 		e_warn(drv,
 		       "VF %d attempted to override administratively set MAC address\n"
 		       "Reload the VF driver to resume operations\n",
@@ -975,9 +1042,15 @@ static int ixgbe_set_vf_vlan_msg(struct ixgbe_adapter *adapter,
 {
 	u32 add = FIELD_GET(IXGBE_VT_MSGINFO_MASK, msgbuf[0]);
 	u32 vid = (msgbuf[1] & IXGBE_VLVF_VLANID_MASK);
+	struct vf_data_storage *vfinfo;
 	u8 tcs = adapter->hw_tcs;
 
-	if (adapter->vfinfo[vf].pf_vlan || tcs) {
+	lockdep_assert_in_rcu_read_lock();
+	vfinfo = rcu_dereference(adapter->vfinfo);
+	if (!vfinfo)
+		return 0;
+
+	if (vfinfo[vf].pf_vlan || tcs) {
 		e_warn(drv,
 		       "VF %d attempted to override administratively set VLAN configuration\n"
 		       "Reload the VF driver to resume operations\n",
@@ -997,9 +1070,15 @@ static int ixgbe_set_vf_macvlan_msg(struct ixgbe_adapter *adapter,
 {
 	u8 *new_mac = ((u8 *)(&msgbuf[1]));
 	int index = FIELD_GET(IXGBE_VT_MSGINFO_MASK, msgbuf[0]);
+	struct vf_data_storage *vfinfo;
 	int err;
 
-	if (adapter->vfinfo[vf].pf_set_mac && !adapter->vfinfo[vf].trusted &&
+	lockdep_assert_in_rcu_read_lock();
+	vfinfo = rcu_dereference(adapter->vfinfo);
+	if (!vfinfo)
+		return 0;
+
+	if (vfinfo[vf].pf_set_mac && !vfinfo[vf].trusted &&
 	    index > 0) {
 		e_warn(drv,
 		       "VF %d requested MACVLAN filter but is administratively denied\n",
@@ -1018,7 +1097,7 @@ static int ixgbe_set_vf_macvlan_msg(struct ixgbe_adapter *adapter,
 		 * If the VF is allowed to set MAC filters then turn off
 		 * anti-spoofing to avoid false positives.
 		 */
-		if (adapter->vfinfo[vf].spoofchk_enabled) {
+		if (vfinfo[vf].spoofchk_enabled) {
 			struct ixgbe_hw *hw = &adapter->hw;
 
 			hw->mac.ops.set_mac_anti_spoofing(hw, false, vf);
@@ -1038,6 +1117,7 @@ static int ixgbe_set_vf_macvlan_msg(struct ixgbe_adapter *adapter,
 static int ixgbe_negotiate_vf_api(struct ixgbe_adapter *adapter,
 				  u32 *msgbuf, u32 vf)
 {
+	struct vf_data_storage *vfinfo;
 	int api = msgbuf[1];
 
 	switch (api) {
@@ -1048,7 +1128,10 @@ static int ixgbe_negotiate_vf_api(struct ixgbe_adapter *adapter,
 	case ixgbe_mbox_api_14:
 	case ixgbe_mbox_api_16:
 	case ixgbe_mbox_api_17:
-		adapter->vfinfo[vf].vf_api = api;
+		lockdep_assert_in_rcu_read_lock();
+		vfinfo = rcu_dereference(adapter->vfinfo);
+		if (vfinfo)
+			vfinfo[vf].vf_api = api;
 		return 0;
 	default:
 		break;
@@ -1064,11 +1147,17 @@ static int ixgbe_get_vf_queues(struct ixgbe_adapter *adapter,
 {
 	struct net_device *dev = adapter->netdev;
 	struct ixgbe_ring_feature *vmdq = &adapter->ring_feature[RING_F_VMDQ];
+	struct vf_data_storage *vfinfo;
 	unsigned int default_tc = 0;
 	u8 num_tcs = adapter->hw_tcs;
 
+	lockdep_assert_in_rcu_read_lock();
+	vfinfo = rcu_dereference(adapter->vfinfo);
+	if (!vfinfo)
+		return 0;
+
 	/* verify the PF is supporting the correct APIs */
-	switch (adapter->vfinfo[vf].vf_api) {
+	switch (vfinfo[vf].vf_api) {
 	case ixgbe_mbox_api_20:
 	case ixgbe_mbox_api_11:
 	case ixgbe_mbox_api_12:
@@ -1092,7 +1181,7 @@ static int ixgbe_get_vf_queues(struct ixgbe_adapter *adapter,
 	/* notify VF of need for VLAN tag stripping, and correct queue */
 	if (num_tcs)
 		msgbuf[IXGBE_VF_TRANS_VLAN] = num_tcs;
-	else if (adapter->vfinfo[vf].pf_vlan || adapter->vfinfo[vf].pf_qos)
+	else if (vfinfo[vf].pf_vlan || vfinfo[vf].pf_qos)
 		msgbuf[IXGBE_VF_TRANS_VLAN] = 1;
 	else
 		msgbuf[IXGBE_VF_TRANS_VLAN] = 0;
@@ -1105,17 +1194,23 @@ static int ixgbe_get_vf_queues(struct ixgbe_adapter *adapter,
 
 static int ixgbe_get_vf_reta(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf)
 {
-	u32 i, j;
-	u32 *out_buf = &msgbuf[1];
-	const u8 *reta = adapter->rss_indir_tbl;
 	u32 reta_size = ixgbe_rss_indir_tbl_entries(adapter);
+	const u8 *reta = adapter->rss_indir_tbl;
+	struct vf_data_storage *vfinfo;
+	u32 *out_buf = &msgbuf[1];
+	u32 i, j;
+
+	lockdep_assert_in_rcu_read_lock();
+	vfinfo = rcu_dereference(adapter->vfinfo);
+	if (!vfinfo)
+		return 0;
 
 	/* Check if operation is permitted */
-	if (!adapter->vfinfo[vf].rss_query_enabled)
+	if (!vfinfo[vf].rss_query_enabled)
 		return -EPERM;
 
 	/* verify the PF is supporting the correct API */
-	switch (adapter->vfinfo[vf].vf_api) {
+	switch (vfinfo[vf].vf_api) {
 	case ixgbe_mbox_api_17:
 	case ixgbe_mbox_api_16:
 	case ixgbe_mbox_api_14:
@@ -1143,14 +1238,20 @@ static int ixgbe_get_vf_reta(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf)
 static int ixgbe_get_vf_rss_key(struct ixgbe_adapter *adapter,
 				u32 *msgbuf, u32 vf)
 {
+	struct vf_data_storage *vfinfo;
 	u32 *rss_key = &msgbuf[1];
 
+	lockdep_assert_in_rcu_read_lock();
+	vfinfo = rcu_dereference(adapter->vfinfo);
+	if (!vfinfo)
+		return 0;
+
 	/* Check if the operation is permitted */
-	if (!adapter->vfinfo[vf].rss_query_enabled)
+	if (!vfinfo[vf].rss_query_enabled)
 		return -EPERM;
 
 	/* verify the PF is supporting the correct API */
-	switch (adapter->vfinfo[vf].vf_api) {
+	switch (vfinfo[vf].vf_api) {
 	case ixgbe_mbox_api_17:
 	case ixgbe_mbox_api_16:
 	case ixgbe_mbox_api_14:
@@ -1170,11 +1271,17 @@ static int ixgbe_update_vf_xcast_mode(struct ixgbe_adapter *adapter,
 				      u32 *msgbuf, u32 vf)
 {
 	struct ixgbe_hw *hw = &adapter->hw;
+	struct vf_data_storage *vfinfo;
 	int xcast_mode = msgbuf[1];
 	u32 vmolr, fctrl, disable, enable;
 
+	lockdep_assert_in_rcu_read_lock();
+	vfinfo = rcu_dereference(adapter->vfinfo);
+	if (!vfinfo)
+		return 0;
+
 	/* verify the PF is supporting the correct APIs */
-	switch (adapter->vfinfo[vf].vf_api) {
+	switch (vfinfo[vf].vf_api) {
 	case ixgbe_mbox_api_12:
 		/* promisc introduced in 1.3 version */
 		if (xcast_mode == IXGBEVF_XCAST_MODE_PROMISC)
@@ -1190,11 +1297,11 @@ static int ixgbe_update_vf_xcast_mode(struct ixgbe_adapter *adapter,
 	}
 
 	if (xcast_mode > IXGBEVF_XCAST_MODE_MULTI &&
-	    !adapter->vfinfo[vf].trusted) {
+	    !vfinfo[vf].trusted) {
 		xcast_mode = IXGBEVF_XCAST_MODE_MULTI;
 	}
 
-	if (adapter->vfinfo[vf].xcast_mode == xcast_mode)
+	if (vfinfo[vf].xcast_mode == xcast_mode)
 		goto out;
 
 	switch (xcast_mode) {
@@ -1236,7 +1343,7 @@ static int ixgbe_update_vf_xcast_mode(struct ixgbe_adapter *adapter,
 	vmolr |= enable;
 	IXGBE_WRITE_REG(hw, IXGBE_VMOLR(vf), vmolr);
 
-	adapter->vfinfo[vf].xcast_mode = xcast_mode;
+	vfinfo[vf].xcast_mode = xcast_mode;
 
 out:
 	msgbuf[1] = xcast_mode;
@@ -1247,10 +1354,16 @@ static int ixgbe_update_vf_xcast_mode(struct ixgbe_adapter *adapter,
 static int ixgbe_get_vf_link_state(struct ixgbe_adapter *adapter,
 				   u32 *msgbuf, u32 vf)
 {
+	struct vf_data_storage *vfinfo;
 	u32 *link_state = &msgbuf[1];
 
+	lockdep_assert_in_rcu_read_lock();
+	vfinfo = rcu_dereference(adapter->vfinfo);
+	if (!vfinfo)
+		return 0;
+
 	/* verify the PF is supporting the correct API */
-	switch (adapter->vfinfo[vf].vf_api) {
+	switch (vfinfo[vf].vf_api) {
 	case ixgbe_mbox_api_12:
 	case ixgbe_mbox_api_13:
 	case ixgbe_mbox_api_14:
@@ -1261,7 +1374,7 @@ static int ixgbe_get_vf_link_state(struct ixgbe_adapter *adapter,
 		return -EOPNOTSUPP;
 	}
 
-	*link_state = adapter->vfinfo[vf].link_enable;
+	*link_state = vfinfo[vf].link_enable;
 
 	return 0;
 }
@@ -1280,8 +1393,14 @@ static int ixgbe_send_vf_link_status(struct ixgbe_adapter *adapter,
 				     u32 *msgbuf, u32 vf)
 {
 	struct ixgbe_hw *hw = &adapter->hw;
+	struct vf_data_storage *vfinfo;
+
+	lockdep_assert_in_rcu_read_lock();
+	vfinfo = rcu_dereference(adapter->vfinfo);
+	if (!vfinfo)
+		return 0;
 
-	switch (adapter->vfinfo[vf].vf_api) {
+	switch (vfinfo[vf].vf_api) {
 	case ixgbe_mbox_api_16:
 	case ixgbe_mbox_api_17:
 		if (hw->mac.type != ixgbe_mac_e610)
@@ -1310,9 +1429,15 @@ static int ixgbe_send_vf_link_status(struct ixgbe_adapter *adapter,
 static int ixgbe_negotiate_vf_features(struct ixgbe_adapter *adapter,
 				       u32 *msgbuf, u32 vf)
 {
+	struct vf_data_storage *vfinfo;
 	u32 features = msgbuf[1];
 
-	switch (adapter->vfinfo[vf].vf_api) {
+	lockdep_assert_in_rcu_read_lock();
+	vfinfo = rcu_dereference(adapter->vfinfo);
+	if (!vfinfo)
+		return 0;
+
+	switch (vfinfo[vf].vf_api) {
 	case ixgbe_mbox_api_17:
 		break;
 	default:
@@ -1330,6 +1455,7 @@ static int ixgbe_rcv_msg_from_vf(struct ixgbe_adapter *adapter, u32 vf)
 	u32 mbx_size = IXGBE_VFMAILBOX_SIZE;
 	u32 msgbuf[IXGBE_VFMAILBOX_SIZE];
 	struct ixgbe_hw *hw = &adapter->hw;
+	struct vf_data_storage *vfinfo;
 	int retval;
 
 	retval = ixgbe_read_mbx(hw, msgbuf, mbx_size, vf);
@@ -1349,11 +1475,16 @@ static int ixgbe_rcv_msg_from_vf(struct ixgbe_adapter *adapter, u32 vf)
 	if (msgbuf[0] == IXGBE_VF_RESET)
 		return ixgbe_vf_reset_msg(adapter, vf);
 
+	lockdep_assert_in_rcu_read_lock();
+	vfinfo = rcu_dereference(adapter->vfinfo);
+	if (!vfinfo)
+		return 0;
+
 	/*
 	 * until the vf completes a virtual function reset it should not be
 	 * allowed to start any configuration.
 	 */
-	if (!adapter->vfinfo[vf].clear_to_send) {
+	if (!vfinfo[vf].clear_to_send) {
 		msgbuf[0] |= IXGBE_VT_MSGTYPE_NACK;
 		ixgbe_write_mbx(hw, msgbuf, 1, vf);
 		return 0;
@@ -1426,11 +1557,12 @@ static int ixgbe_rcv_msg_from_vf(struct ixgbe_adapter *adapter, u32 vf)
 
 static void ixgbe_rcv_ack_from_vf(struct ixgbe_adapter *adapter, u32 vf)
 {
+	struct vf_data_storage *vfinfo = rcu_dereference(adapter->vfinfo);
 	struct ixgbe_hw *hw = &adapter->hw;
 	u32 msg = IXGBE_VT_MSGTYPE_NACK;
 
 	/* if device isn't clear to send it shouldn't be reading either */
-	if (!adapter->vfinfo[vf].clear_to_send)
+	if (vfinfo && !vfinfo[vf].clear_to_send)
 		ixgbe_write_mbx(hw, &msg, 1, vf);
 }
 
@@ -1462,15 +1594,21 @@ bool ixgbe_check_mdd_event(struct ixgbe_adapter *adapter)
 			 IXGBE_READ_REG(hw, IXGBE_LVMMC_RX));
 
 		if (hw->mac.ops.restore_mdd_vf) {
+			struct vf_data_storage *vfinfo;
 			u32 ping;
 
 			hw->mac.ops.restore_mdd_vf(hw, i);
 
 			/* get the VF to rebuild its queues */
-			adapter->vfinfo[i].clear_to_send = 0;
-			ping = IXGBE_PF_CONTROL_MSG |
-			       IXGBE_VT_MSGTYPE_CTS;
-			ixgbe_write_mbx(hw, &ping, 1, i);
+			rcu_read_lock();
+			vfinfo = rcu_dereference(adapter->vfinfo);
+			if (vfinfo) {
+				vfinfo[i].clear_to_send = false;
+				ping = IXGBE_PF_CONTROL_MSG |
+				       IXGBE_VT_MSGTYPE_CTS;
+				ixgbe_write_mbx(hw, &ping, 1, i);
+			}
+			rcu_read_unlock();
 		}
 
 		ret = true;
@@ -1482,12 +1620,11 @@ bool ixgbe_check_mdd_event(struct ixgbe_adapter *adapter)
 void ixgbe_msg_task(struct ixgbe_adapter *adapter)
 {
 	struct ixgbe_hw *hw = &adapter->hw;
-	unsigned long flags;
 	u32 vf;
 
 	ixgbe_check_mdd_event(adapter);
 
-	spin_lock_irqsave(&adapter->vfs_lock, flags);
+	rcu_read_lock();
 	for (vf = 0; vf < adapter->num_vfs; vf++) {
 		/* process any reset requests */
 		if (!ixgbe_check_for_rst(hw, vf))
@@ -1501,7 +1638,7 @@ void ixgbe_msg_task(struct ixgbe_adapter *adapter)
 		if (!ixgbe_check_for_ack(hw, vf))
 			ixgbe_rcv_ack_from_vf(adapter, vf);
 	}
-	spin_unlock_irqrestore(&adapter->vfs_lock, flags);
+	rcu_read_unlock();
 }
 
 static inline void ixgbe_ping_vf(struct ixgbe_adapter *adapter, int vf)
@@ -1510,23 +1647,26 @@ static inline void ixgbe_ping_vf(struct ixgbe_adapter *adapter, int vf)
 	u32 ping;
 
 	ping = IXGBE_PF_CONTROL_MSG;
-	if (adapter->vfinfo[vf].clear_to_send)
-		ping |= IXGBE_VT_MSGTYPE_CTS;
 	ixgbe_write_mbx(hw, &ping, 1, vf);
 }
 
 void ixgbe_ping_all_vfs(struct ixgbe_adapter *adapter)
 {
 	struct ixgbe_hw *hw = &adapter->hw;
+	struct vf_data_storage *vfinfo;
 	u32 ping;
 	int i;
 
-	for (i = 0 ; i < adapter->num_vfs; i++) {
-		ping = IXGBE_PF_CONTROL_MSG;
-		if (adapter->vfinfo[i].clear_to_send)
-			ping |= IXGBE_VT_MSGTYPE_CTS;
-		ixgbe_write_mbx(hw, &ping, 1, i);
-	}
+	rcu_read_lock();
+	vfinfo = rcu_dereference(adapter->vfinfo);
+	if (vfinfo)
+		for (i = 0 ; i < adapter->num_vfs; i++) {
+			ping = IXGBE_PF_CONTROL_MSG;
+			if (vfinfo[i].clear_to_send)
+				ping |= IXGBE_VT_MSGTYPE_CTS;
+			ixgbe_write_mbx(hw, &ping, 1, i);
+		}
+	rcu_read_unlock();
 }
 
 /**
@@ -1537,21 +1677,34 @@ void ixgbe_ping_all_vfs(struct ixgbe_adapter *adapter)
  **/
 void ixgbe_set_all_vfs(struct ixgbe_adapter *adapter)
 {
+	struct vf_data_storage *vfinfo;
 	int i;
 
-	for (i = 0 ; i < adapter->num_vfs; i++)
-		ixgbe_set_vf_link_state(adapter, i,
-					adapter->vfinfo[i].link_state);
+	rcu_read_lock();
+	vfinfo = rcu_dereference(adapter->vfinfo);
+	if (vfinfo)
+		for (i = 0 ; i < adapter->num_vfs; i++)
+			ixgbe_set_vf_link_state(adapter, i,
+						vfinfo[i].link_state);
+	rcu_read_unlock();
 }
 
 int ixgbe_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
 {
 	struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
+	struct vf_data_storage *vfinfo;
 	int retval;
 
 	if (vf >= adapter->num_vfs)
 		return -EINVAL;
 
+	rcu_read_lock();
+	vfinfo = rcu_dereference(adapter->vfinfo);
+	if (!vfinfo) {
+		rcu_read_unlock();
+		return 0;
+	}
+
 	if (is_valid_ether_addr(mac)) {
 		dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n",
 			 mac, vf);
@@ -1559,7 +1712,7 @@ int ixgbe_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
 
 		retval = ixgbe_set_vf_mac(adapter, vf, mac);
 		if (retval >= 0) {
-			adapter->vfinfo[vf].pf_set_mac = true;
+			vfinfo[vf].pf_set_mac = true;
 
 			if (test_bit(__IXGBE_DOWN, &adapter->state)) {
 				dev_warn(&adapter->pdev->dev, "The VF MAC address has been set, but the PF device is not up.\n");
@@ -1569,18 +1722,19 @@ int ixgbe_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
 			dev_warn(&adapter->pdev->dev, "The VF MAC address was NOT set due to invalid or duplicate MAC address.\n");
 		}
 	} else if (is_zero_ether_addr(mac)) {
-		unsigned char *vf_mac_addr =
-					   adapter->vfinfo[vf].vf_mac_addresses;
+		unsigned char *vf_mac_addr = vfinfo[vf].vf_mac_addresses;
 
 		/* nothing to do */
-		if (is_zero_ether_addr(vf_mac_addr))
+		if (is_zero_ether_addr(vf_mac_addr)) {
+			rcu_read_unlock();
 			return 0;
+		}
 
 		dev_info(&adapter->pdev->dev, "removing MAC on VF %d\n", vf);
 
 		retval = ixgbe_del_mac_filter(adapter, vf_mac_addr, vf);
 		if (retval >= 0) {
-			adapter->vfinfo[vf].pf_set_mac = false;
+			vfinfo[vf].pf_set_mac = false;
 			memcpy(vf_mac_addr, mac, ETH_ALEN);
 		} else {
 			dev_warn(&adapter->pdev->dev, "Could NOT remove the VF MAC address.\n");
@@ -1589,10 +1743,12 @@ int ixgbe_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
 		retval = -EINVAL;
 	}
 
+	rcu_read_unlock();
 	return retval;
 }
 
 static int ixgbe_enable_port_vlan(struct ixgbe_adapter *adapter, int vf,
+				  struct vf_data_storage *vfinfo,
 				  u16 vlan, u8 qos)
 {
 	struct ixgbe_hw *hw = &adapter->hw;
@@ -1613,8 +1769,8 @@ static int ixgbe_enable_port_vlan(struct ixgbe_adapter *adapter, int vf,
 		ixgbe_write_qde(adapter, vf, IXGBE_QDE_ENABLE |
 				IXGBE_QDE_HIDE_VLAN);
 
-	adapter->vfinfo[vf].pf_vlan = vlan;
-	adapter->vfinfo[vf].pf_qos = qos;
+	vfinfo[vf].pf_vlan = vlan;
+	vfinfo[vf].pf_qos = qos;
 	dev_info(&adapter->pdev->dev,
 		 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
 	if (test_bit(__IXGBE_DOWN, &adapter->state)) {
@@ -1628,13 +1784,14 @@ static int ixgbe_enable_port_vlan(struct ixgbe_adapter *adapter, int vf,
 	return err;
 }
 
-static int ixgbe_disable_port_vlan(struct ixgbe_adapter *adapter, int vf)
+static int ixgbe_disable_port_vlan(struct ixgbe_adapter *adapter, int vf,
+				   struct vf_data_storage *vfinfo)
 {
 	struct ixgbe_hw *hw = &adapter->hw;
 	int err;
 
 	err = ixgbe_set_vf_vlan(adapter, false,
-				adapter->vfinfo[vf].pf_vlan, vf);
+				vfinfo[vf].pf_vlan, vf);
 	/* Restore tagless access via VLAN 0 */
 	ixgbe_set_vf_vlan(adapter, true, 0, vf);
 	ixgbe_clear_vmvir(adapter, vf);
@@ -1644,8 +1801,8 @@ static int ixgbe_disable_port_vlan(struct ixgbe_adapter *adapter, int vf)
 	if (hw->mac.type >= ixgbe_mac_X550)
 		ixgbe_write_qde(adapter, vf, IXGBE_QDE_ENABLE);
 
-	adapter->vfinfo[vf].pf_vlan = 0;
-	adapter->vfinfo[vf].pf_qos = 0;
+	vfinfo[vf].pf_vlan = 0;
+	vfinfo[vf].pf_qos = 0;
 
 	return err;
 }
@@ -1653,13 +1810,20 @@ static int ixgbe_disable_port_vlan(struct ixgbe_adapter *adapter, int vf)
 int ixgbe_ndo_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan,
 			  u8 qos, __be16 vlan_proto)
 {
-	int err = 0;
 	struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
+	struct vf_data_storage *vfinfo;
+	int err = 0;
 
 	if ((vf >= adapter->num_vfs) || (vlan > 4095) || (qos > 7))
 		return -EINVAL;
 	if (vlan_proto != htons(ETH_P_8021Q))
 		return -EPROTONOSUPPORT;
+
+	rcu_read_lock();
+	vfinfo = rcu_dereference(adapter->vfinfo);
+	if (!vfinfo)
+		goto out;
+
 	if (vlan || qos) {
 		/* Check if there is already a port VLAN set, if so
 		 * we have to delete the old one first before we
@@ -1668,16 +1832,17 @@ int ixgbe_ndo_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan,
 		 * old port VLAN before setting a new one but this
 		 * is not necessarily the case.
 		 */
-		if (adapter->vfinfo[vf].pf_vlan)
-			err = ixgbe_disable_port_vlan(adapter, vf);
+		if (vfinfo[vf].pf_vlan)
+			err = ixgbe_disable_port_vlan(adapter, vf, vfinfo);
 		if (err)
 			goto out;
-		err = ixgbe_enable_port_vlan(adapter, vf, vlan, qos);
+		err = ixgbe_enable_port_vlan(adapter, vf, vfinfo, vlan, qos);
 	} else {
-		err = ixgbe_disable_port_vlan(adapter, vf);
+		err = ixgbe_disable_port_vlan(adapter, vf, vfinfo);
 	}
 
 out:
+	rcu_read_unlock();
 	return err;
 }
 
@@ -1695,13 +1860,13 @@ int ixgbe_link_mbps(struct ixgbe_adapter *adapter)
 	}
 }
 
-static void ixgbe_set_vf_rate_limit(struct ixgbe_adapter *adapter, int vf)
+static void ixgbe_set_vf_rate_limit(struct ixgbe_adapter *adapter, int vf,
+				    u16 tx_rate)
 {
 	struct ixgbe_ring_feature *vmdq = &adapter->ring_feature[RING_F_VMDQ];
 	struct ixgbe_hw *hw = &adapter->hw;
 	u32 bcnrc_val = 0;
 	u16 queue, queues_per_pool;
-	u16 tx_rate = adapter->vfinfo[vf].tx_rate;
 
 	if (tx_rate) {
 		/* start with base link speed value */
@@ -1749,6 +1914,7 @@ static void ixgbe_set_vf_rate_limit(struct ixgbe_adapter *adapter, int vf)
 
 void ixgbe_check_vf_rate_limit(struct ixgbe_adapter *adapter)
 {
+	struct vf_data_storage *vfinfo;
 	int i;
 
 	/* VF Tx rate limit was not set */
@@ -1761,18 +1927,23 @@ void ixgbe_check_vf_rate_limit(struct ixgbe_adapter *adapter)
 			 "Link speed has been changed. VF Transmit rate is disabled\n");
 	}
 
-	for (i = 0; i < adapter->num_vfs; i++) {
-		if (!adapter->vf_rate_link_speed)
-			adapter->vfinfo[i].tx_rate = 0;
+	rcu_read_lock();
+	vfinfo = rcu_dereference(adapter->vfinfo);
+	if (vfinfo)
+		for (i = 0; i < adapter->num_vfs; i++) {
+			if (!adapter->vf_rate_link_speed)
+				vfinfo[i].tx_rate = 0;
 
-		ixgbe_set_vf_rate_limit(adapter, i);
-	}
+			ixgbe_set_vf_rate_limit(adapter, i, vfinfo[i].tx_rate);
+		}
+	rcu_read_unlock();
 }
 
 int ixgbe_ndo_set_vf_bw(struct net_device *netdev, int vf, int min_tx_rate,
 			int max_tx_rate)
 {
 	struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
+	struct vf_data_storage *vfinfo;
 	int link_speed;
 
 	/* verify VF is active */
@@ -1795,12 +1966,17 @@ int ixgbe_ndo_set_vf_bw(struct net_device *netdev, int vf, int min_tx_rate,
 	if (max_tx_rate && ((max_tx_rate <= 10) || (max_tx_rate > link_speed)))
 		return -EINVAL;
 
-	/* store values */
-	adapter->vf_rate_link_speed = link_speed;
-	adapter->vfinfo[vf].tx_rate = max_tx_rate;
+	rcu_read_lock();
+	vfinfo = rcu_dereference(adapter->vfinfo);
+	if (vfinfo) {
+		/* store values */
+		adapter->vf_rate_link_speed = link_speed;
+		vfinfo[vf].tx_rate = max_tx_rate;
 
-	/* update hardware configuration */
-	ixgbe_set_vf_rate_limit(adapter, vf);
+		/* update hardware configuration */
+		ixgbe_set_vf_rate_limit(adapter, vf, vfinfo[vf].tx_rate);
+	}
+	rcu_read_unlock();
 
 	return 0;
 }
@@ -1809,11 +1985,18 @@ int ixgbe_ndo_set_vf_spoofchk(struct net_device *netdev, int vf, bool setting)
 {
 	struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
 	struct ixgbe_hw *hw = &adapter->hw;
+	struct vf_data_storage *vfinfo;
 
 	if (vf >= adapter->num_vfs)
 		return -EINVAL;
 
-	adapter->vfinfo[vf].spoofchk_enabled = setting;
+	rcu_read_lock();
+	vfinfo = rcu_dereference(adapter->vfinfo);
+	if (vfinfo)
+		vfinfo[vf].spoofchk_enabled = setting;
+	rcu_read_unlock();
+	if (!vfinfo)
+		return 0;
 
 	/* configure MAC spoofing */
 	hw->mac.ops.set_mac_anti_spoofing(hw, setting, vf);
@@ -1851,28 +2034,37 @@ int ixgbe_ndo_set_vf_spoofchk(struct net_device *netdev, int vf, bool setting)
  **/
 void ixgbe_set_vf_link_state(struct ixgbe_adapter *adapter, int vf, int state)
 {
-	adapter->vfinfo[vf].link_state = state;
+	struct vf_data_storage *vfinfo;
+
+	rcu_read_lock();
+	vfinfo = rcu_dereference(adapter->vfinfo);
+	if (!vfinfo) {
+		rcu_read_unlock();
+		return;
+	}
+	vfinfo[vf].link_state = state;
 
 	switch (state) {
 	case IFLA_VF_LINK_STATE_AUTO:
 		if (test_bit(__IXGBE_DOWN, &adapter->state))
-			adapter->vfinfo[vf].link_enable = false;
+			vfinfo[vf].link_enable = false;
 		else
-			adapter->vfinfo[vf].link_enable = true;
+			vfinfo[vf].link_enable = true;
 		break;
 	case IFLA_VF_LINK_STATE_ENABLE:
-		adapter->vfinfo[vf].link_enable = true;
+		vfinfo[vf].link_enable = true;
 		break;
 	case IFLA_VF_LINK_STATE_DISABLE:
-		adapter->vfinfo[vf].link_enable = false;
+		vfinfo[vf].link_enable = false;
 		break;
 	}
 
 	ixgbe_set_vf_rx_tx(adapter, vf);
 
 	/* restart the VF */
-	adapter->vfinfo[vf].clear_to_send = false;
+	vfinfo[vf].clear_to_send = false;
 	ixgbe_ping_vf(adapter, vf);
+	rcu_read_unlock();
 }
 
 /**
@@ -1923,6 +2115,7 @@ int ixgbe_ndo_set_vf_rss_query_en(struct net_device *netdev, int vf,
 				  bool setting)
 {
 	struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
+	struct vf_data_storage *vfinfo;
 
 	/* This operation is currently supported only for 82599 and x540
 	 * devices.
@@ -1934,7 +2127,11 @@ int ixgbe_ndo_set_vf_rss_query_en(struct net_device *netdev, int vf,
 	if (vf >= adapter->num_vfs)
 		return -EINVAL;
 
-	adapter->vfinfo[vf].rss_query_enabled = setting;
+	rcu_read_lock();
+	vfinfo = rcu_dereference(adapter->vfinfo);
+	if (vfinfo)
+		vfinfo[vf].rss_query_enabled = setting;
+	rcu_read_unlock();
 
 	return 0;
 }
@@ -1942,18 +2139,31 @@ int ixgbe_ndo_set_vf_rss_query_en(struct net_device *netdev, int vf,
 int ixgbe_ndo_set_vf_trust(struct net_device *netdev, int vf, bool setting)
 {
 	struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
+	struct vf_data_storage *vfinfo;
 
 	if (vf >= adapter->num_vfs)
 		return -EINVAL;
 
+	rcu_read_lock();
+	vfinfo = rcu_dereference(adapter->vfinfo);
+	if (!vfinfo) {
+		rcu_read_unlock();
+		return 0;
+	}
+
 	/* nothing to do */
-	if (adapter->vfinfo[vf].trusted == setting)
+	if (vfinfo[vf].trusted == setting) {
+		rcu_read_unlock();
 		return 0;
+	}
 
-	adapter->vfinfo[vf].trusted = setting;
+	vfinfo[vf].trusted = setting;
 
 	/* reset VF to reconfigure features */
-	adapter->vfinfo[vf].clear_to_send = false;
+	vfinfo[vf].clear_to_send = false;
+
+	rcu_read_unlock();
+
 	ixgbe_ping_vf(adapter, vf);
 
 	e_info(drv, "VF %u is %strusted\n", vf, setting ? "" : "not ");
@@ -1965,17 +2175,30 @@ int ixgbe_ndo_get_vf_config(struct net_device *netdev,
 			    int vf, struct ifla_vf_info *ivi)
 {
 	struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
+	struct vf_data_storage *vfinfo;
+
 	if (vf >= adapter->num_vfs)
 		return -EINVAL;
 	ivi->vf = vf;
-	memcpy(&ivi->mac, adapter->vfinfo[vf].vf_mac_addresses, ETH_ALEN);
-	ivi->max_tx_rate = adapter->vfinfo[vf].tx_rate;
+
+	rcu_read_lock();
+	vfinfo = rcu_dereference(adapter->vfinfo);
+	if (!vfinfo) {
+		rcu_read_unlock();
+		return -EINVAL;
+	}
+
+	memcpy(&ivi->mac, vfinfo[vf].vf_mac_addresses, ETH_ALEN);
+	ivi->max_tx_rate = vfinfo[vf].tx_rate;
 	ivi->min_tx_rate = 0;
-	ivi->vlan = adapter->vfinfo[vf].pf_vlan;
-	ivi->qos = adapter->vfinfo[vf].pf_qos;
-	ivi->spoofchk = adapter->vfinfo[vf].spoofchk_enabled;
-	ivi->rss_query_en = adapter->vfinfo[vf].rss_query_enabled;
-	ivi->trusted = adapter->vfinfo[vf].trusted;
-	ivi->linkstate = adapter->vfinfo[vf].link_state;
+	ivi->vlan = vfinfo[vf].pf_vlan;
+	ivi->qos = vfinfo[vf].pf_qos;
+	ivi->spoofchk = vfinfo[vf].spoofchk_enabled;
+	ivi->rss_query_en = vfinfo[vf].rss_query_enabled;
+	ivi->trusted = vfinfo[vf].trusted;
+	ivi->linkstate = vfinfo[vf].link_state;
+
+	rcu_read_unlock();
+
 	return 0;
 }
-- 
2.53.0


^ permalink raw reply related

* Re: [PATCH net] net: airoha: Fix possible TX queue stall in airoha_qdma_tx_napi_poll()
From: Paolo Abeni @ 2026-04-16  8:44 UTC (permalink / raw)
  To: Lorenzo Bianconi, Andrew Lunn, David S. Miller, Eric Dumazet,
	Jakub Kicinski
  Cc: linux-arm-kernel, linux-mediatek, netdev
In-Reply-To: <20260413-airoha-txq-potential-stall-v1-1-7830363b1543@kernel.org>

On 4/13/26 10:29 AM, Lorenzo Bianconi wrote:
> Since multiple net_device TX queues can share the same hw QDMA TX queue,
> there is no guarantee we have inflight packets queued in hw belonging to a
> net_device TX queue stopped in the xmit path because hw QDMA TX queue
> can be full. In this corner case the net_device TX queue will never be
> re-activated. In order to avoid any potential net_device TX queue stall,
> we need to wake all the net_device TX queues feeding the same hw QDMA TX
> queue in airoha_qdma_tx_napi_poll routine.
> 
> Fixes: 23020f0493270 ("net: airoha: Introduce ethernet support for EN7581 SoC")
> Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
> ---
>  drivers/net/ethernet/airoha/airoha_eth.c | 30 ++++++++++++++++++++++++++----
>  1 file changed, 26 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c
> index 9e995094c32a..e7610f36b8e4 100644
> --- a/drivers/net/ethernet/airoha/airoha_eth.c
> +++ b/drivers/net/ethernet/airoha/airoha_eth.c
> @@ -855,6 +855,19 @@ static int airoha_qdma_init_rx(struct airoha_qdma *qdma)
>  	return 0;
>  }
>  
> +static void airoha_qdma_wake_tx_queues(struct airoha_qdma *qdma)
> +{
> +	struct airoha_eth *eth = qdma->eth;
> +	int i;
> +
> +	for (i = 0; i < ARRAY_SIZE(eth->ports); i++) {
> +		struct airoha_gdm_port *port = eth->ports[i];
> +
> +		if (port && port->qdma == qdma)
> +			netif_tx_wake_all_queues(port->dev);
> +	}
> +}
> +
>  static int airoha_qdma_tx_napi_poll(struct napi_struct *napi, int budget)
>  {
>  	struct airoha_tx_irq_queue *irq_q;
> @@ -931,12 +944,21 @@ static int airoha_qdma_tx_napi_poll(struct napi_struct *napi, int budget)
>  
>  			txq = netdev_get_tx_queue(skb->dev, queue);
>  			netdev_tx_completed_queue(txq, 1, skb->len);
> -			if (netif_tx_queue_stopped(txq) &&
> -			    q->ndesc - q->queued >= q->free_thr)
> -				netif_tx_wake_queue(txq);
> -
>  			dev_kfree_skb_any(skb);
>  		}
> +
> +		if (q->ndesc - q->queued == q->free_thr) {

Sashiko says:

---
Can this exact equality check cause a permanent TX queue stall?
The previous logic checked if the free space was greater than or equal
to q->free_thr. If the xmit path stops the queue because the free space
drops to exactly q->free_thr, the hardware queue will have exactly
q->free_thr free slots.
When the NAPI poll routine subsequently reaps a completed descriptor,
q->queued is decremented, increasing the free space to q->free_thr + 1.
Since the free space is no longer exactly equal to the threshold, this
condition evaluates to false.
As NAPI continues to reap more descriptors, the free space strictly
increases, meaning the exact equality check will never evaluate to true
and the netdev TX queue will remain permanently stalled.
---

Please, try to triage sashiko comments proactively. Especially on NIC
drivers, validating the AI statements is extremely cumbersome for the
maintainers.

Thanks,

Paolo


^ permalink raw reply

* Re: [PATCH net-next v2 13/14] net: macb: use context swapping in .set_ringparam()
From: Théo Lebrun @ 2026-04-16  8:54 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: Nicolas Ferre, Claudiu Beznea, Andrew Lunn, David S. Miller,
	Eric Dumazet, Paolo Abeni, Richard Cochran, Russell King,
	Paolo Valerio, Conor Dooley, Nicolai Buchwitz,
	Vladimir Kondratiev, Gregory CLEMENT, Benoît Monin,
	Tawfik Bayouk, Thomas Petazzoni, Maxime Chevallier, netdev,
	linux-kernel
In-Reply-To: <20260413175040.352378c5@kernel.org>

Hello Jakub,

On Tue Apr 14, 2026 at 2:50 AM CEST, Jakub Kicinski wrote:
> On Fri, 10 Apr 2026 21:52:01 +0200 Théo Lebrun wrote:
>> ethtool_ops.set_ringparam() is implemented using the primitive close /
>> update ring size / reopen sequence. Under memory pressure this does not
>> fly: we free our buffers at close and cannot reallocate new ones at
>> open. Also, it triggers a slow PHY reinit.
>> 
>> Instead, exploit the new context mechanism and improve our sequence to:
>>  - allocate a new context (including buffers) first
>>  - if it fails, early return without any impact to the interface
>>  - stop interface
>>  - update global state (bp, netdev, etc)
>>  - pass buffer pointers to the hardware
>>  - start interface
>>  - free old context.
>> 
>> The HW disable sequence is inspired by macb_reset_hw() but avoids
>> (1) setting NCR bit CLRSTAT and (2) clearing register PBUFRXCUT.
>> 
>> The HW re-enable sequence is inspired by macb_mac_link_up(), skipping
>> over register writes which would be redundant (because values have not
>> changed).
>> 
>> The generic context swapping parts are isolated into helper functions
>> macb_context_swap_start|end(), reusable by other operations (change_mtu,
>> set_channels, etc).
>
>> diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
>> index 81beb67b206a..340ae7d881c6 100644
>> --- a/drivers/net/ethernet/cadence/macb_main.c
>> +++ b/drivers/net/ethernet/cadence/macb_main.c
>> @@ -3081,6 +3081,89 @@ static void macb_configure_dma(struct macb *bp)
>>  	}
>>  }
>>  
>> +static void macb_context_swap_start(struct macb *bp)
>> +{
>> +	struct macb_queue *queue;
>> +	unsigned long flags;
>> +	unsigned int q;
>> +	u32 ctrl;
>> +
>> +	/* Disable software Tx, disable HW Tx/Rx and disable NAPI. */
>> +
>> +	netif_tx_disable(bp->netdev);
>
> AFAIR netif_tx_disable() just stops all the queues, if the NAPIs and
> whatever else may wake queues is still running the queues may get
> restarted right away.

Your memory appears correct (unsurprisingly). Ordering was wrong, it
must be (1) NAPI disabling followed by (2) disabling of Tx queues.

The tx queue wakeup is possible in NAPI poll function through this call
stack: netif_wake_subqueue() <- macb_tx_complete() <- macb_tx_poll().

There is also macb_tx_error_task() that disables Tx queues at start and
re-enables them at the end. Meaning we need to disable Tx queues after
we disabled queue->tx_error_task. (Note that tx_error_task probably
races with NAPI, but that is outside our scope.)

Thanks,

--
Théo Lebrun, Bootlin
Embedded Linux and Kernel engineering
https://bootlin.com


^ permalink raw reply

* Re: [PATCH net-next v8 4/4] tun/tap & vhost-net: avoid ptr_ring tail-drop when a qdisc is present
From: Simon Schippers @ 2026-04-16  8:54 UTC (permalink / raw)
  To: Jason Wang, Michael S. Tsirkin
  Cc: willemdebruijn.kernel, andrew+netdev, davem, edumazet, kuba,
	pabeni, mst, eperezma, leiyang, stephen, jon, tim.gebauer, netdev,
	linux-kernel, kvm, virtualization
In-Reply-To: <b9d84d88-46d5-4fd3-a5b2-d914f54766f6@tu-dortmund.de>

To summarize the discussion from my POV:

Open point: __ptr_ring_zero_tail() is only called after
            consuming ring.batch elements.
1) Consumer wakes up the producer but the slot is not cleaned.
--> I disagree, the consumer only wakes after consuming ring.size/2.
    Then __ptr_ring_zero_tail() was called at least once.
2) Producer is woken up but see the ring is full, so it need to
   drop the packet.
--> I disagree, because then NETDEV_TX_BUSY is returned. This is
    noticeable as qdisc requeue and only happens very rarely.

Points I will address:
- Minor nit on patch 2 by MST.
- Rebase patch 3 because of commit d748047
  ("ptr_ring: disable KCSAN warnings").
- Document the pair of the smp_mb__after_atomic() in tun_net_xmit
  with tun_ring_consume().
- Use 1 ptr_ring spinlock instead of 2 (currently used for consume
  and empty check), not sure how to implement it pretty rn.
- Run pktgen benchmarks with pg_set SHARED.


^ permalink raw reply

* [PATCH bpf-next v4 1/6] bpf: name the enum for BPF_FUNC_skb_adjust_room flags
From: Nick Hudson @ 2026-04-16  7:55 UTC (permalink / raw)
  To: bpf, netdev, Willem de Bruijn, Martin KaFai Lau
  Cc: Nick Hudson, Max Tottenham, Anna Glasgall, Alexei Starovoitov,
	Daniel Borkmann, Andrii Nakryiko, Eduard Zingerman,
	Kumar Kartikeya Dwivedi, linux-kernel
In-Reply-To: <20260416075514.927101-1-nhudson@akamai.com>

The existing anonymous enum for BPF_FUNC_skb_adjust_room flags is
named to enum bpf_adj_room_flags to enable CO-RE (Compile Once -
Run Everywhere) lookups in BPF programs.

Co-developed-by: Max Tottenham <mtottenh@akamai.com>
Signed-off-by: Max Tottenham <mtottenh@akamai.com>
Co-developed-by: Anna Glasgall <aglasgal@akamai.com>
Signed-off-by: Anna Glasgall <aglasgal@akamai.com>
Signed-off-by: Nick Hudson <nhudson@akamai.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
---
 include/uapi/linux/bpf.h       | 2 +-
 tools/include/uapi/linux/bpf.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 552bc5d9afbd..c021ed8d7b44 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -6211,7 +6211,7 @@ enum {
 };
 
 /* BPF_FUNC_skb_adjust_room flags. */
-enum {
+enum bpf_adj_room_flags {
 	BPF_F_ADJ_ROOM_FIXED_GSO	= (1ULL << 0),
 	BPF_F_ADJ_ROOM_ENCAP_L3_IPV4	= (1ULL << 1),
 	BPF_F_ADJ_ROOM_ENCAP_L3_IPV6	= (1ULL << 2),
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 677be9a47347..ca35ed622ed5 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -6211,7 +6211,7 @@ enum {
 };
 
 /* BPF_FUNC_skb_adjust_room flags. */
-enum {
+enum bpf_adj_room_flags {
 	BPF_F_ADJ_ROOM_FIXED_GSO	= (1ULL << 0),
 	BPF_F_ADJ_ROOM_ENCAP_L3_IPV4	= (1ULL << 1),
 	BPF_F_ADJ_ROOM_ENCAP_L3_IPV6	= (1ULL << 2),
-- 
2.34.1


^ permalink raw reply related

* RE: [Intel-wired-lan] [PATCH iwl-next v2 1/3] igc: remove unused autoneg_failed field
From: Loktionov, Aleksandr @ 2026-04-16  9:04 UTC (permalink / raw)
  To: KhaiWenTan, Nguyen, Anthony L, Kitszel, Przemyslaw,
	andrew+netdev@lunn.ch, davem@davemloft.net, edumazet@google.com,
	kuba@kernel.org, pabeni@redhat.com
  Cc: intel-wired-lan@lists.osuosl.org, netdev@vger.kernel.org,
	linux-kernel@vger.kernel.org, Abdul Rahim, Faizal, Looi, Hong Aun,
	Tan, Khai Wen, Faizal Rahim, Looi, Alan Chia Wei
In-Reply-To: <20260416015520.6090-2-khai.wen.tan@linux.intel.com>



> -----Original Message-----
> From: Intel-wired-lan <intel-wired-lan-bounces@osuosl.org> On Behalf
> Of KhaiWenTan
> Sent: Thursday, April 16, 2026 3:55 AM
> To: Nguyen, Anthony L <anthony.l.nguyen@intel.com>; Kitszel,
> Przemyslaw <przemyslaw.kitszel@intel.com>; andrew+netdev@lunn.ch;
> davem@davemloft.net; edumazet@google.com; kuba@kernel.org;
> pabeni@redhat.com
> Cc: intel-wired-lan@lists.osuosl.org; netdev@vger.kernel.org; linux-
> kernel@vger.kernel.org; Abdul Rahim, Faizal
> <faizal.abdul.rahim@intel.com>; Looi, Hong Aun
> <hong.aun.looi@intel.com>; Tan, Khai Wen <khai.wen.tan@intel.com>;
> Faizal Rahim <faizal.abdul.rahim@linux.intel.com>; Looi; KhaiWenTan
> <khai.wen.tan@linux.intel.com>
> Subject: [Intel-wired-lan] [PATCH iwl-next v2 1/3] igc: remove unused
> autoneg_failed field
> 
> From: Faizal Rahim <faizal.abdul.rahim@linux.intel.com>
> 
> autoneg_failed in struct igc_mac_info is never set in the igc driver.
> Remove the field and the dead code checking it in
> igc_config_fc_after_link_up().
> 
> Reviewed-by: Looi, Hong Aun <hong.aun.looi@intel.com>
> Signed-off-by: Faizal Rahim <faizal.abdul.rahim@linux.intel.com>
> Signed-off-by: KhaiWenTan <khai.wen.tan@linux.intel.com>
> ---
>  drivers/net/ethernet/intel/igc/igc_hw.h  |  1 -
> drivers/net/ethernet/intel/igc/igc_mac.c | 16 +---------------
>  2 files changed, 1 insertion(+), 16 deletions(-)
> 
> diff --git a/drivers/net/ethernet/intel/igc/igc_hw.h
> b/drivers/net/ethernet/intel/igc/igc_hw.h
> index be8a49a86d09..86ab8f566f44 100644
> --- a/drivers/net/ethernet/intel/igc/igc_hw.h
> +++ b/drivers/net/ethernet/intel/igc/igc_hw.h
> @@ -92,7 +92,6 @@ struct igc_mac_info {
>  	bool asf_firmware_present;
>  	bool arc_subsystem_valid;
> 
> -	bool autoneg_failed;
>  	bool get_link_status;
>  };
> 
> diff --git a/drivers/net/ethernet/intel/igc/igc_mac.c
> b/drivers/net/ethernet/intel/igc/igc_mac.c
> index 7ac6637f8db7..142beb9ae557 100644
> --- a/drivers/net/ethernet/intel/igc/igc_mac.c
> +++ b/drivers/net/ethernet/intel/igc/igc_mac.c
> @@ -438,28 +438,14 @@ void igc_config_collision_dist(struct igc_hw
> *hw)
>   * Checks the status of auto-negotiation after link up to ensure that
> the
>   * speed and duplex were not forced.  If the link needed to be
> forced, then
>   * flow control needs to be forced also.  If auto-negotiation is
> enabled
> - * and did not fail, then we configure flow control based on our link
> - * partner.
> + * then we configure flow control based on our link partner.
>   */
>  s32 igc_config_fc_after_link_up(struct igc_hw *hw)  {
>  	u16 mii_status_reg, mii_nway_adv_reg, mii_nway_lp_ability_reg;
> -	struct igc_mac_info *mac = &hw->mac;
>  	u16 speed, duplex;
>  	s32 ret_val = 0;
> 
> -	/* Check for the case where we have fiber media and auto-neg
> failed
> -	 * so we had to force link.  In this case, we need to force the
> -	 * configuration of the MAC to match the "fc" parameter.
> -	 */
> -	if (mac->autoneg_failed)
> -		ret_val = igc_force_mac_fc(hw);
> -
> -	if (ret_val) {
> -		hw_dbg("Error forcing flow control settings\n");
> -		goto out;
> -	}
> -
>  	/* In auto-neg, we need to check and see if Auto-Neg has
> completed,
>  	 * and if so, how the PHY and link partner has flow control
>  	 * configured.
> --
> 2.43.0

Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>

^ permalink raw reply

* RE: [Intel-wired-lan] [PATCH iwl-next v2 2/3] igc: move autoneg-enabled settings into igc_handle_autoneg_enabled()
From: Loktionov, Aleksandr @ 2026-04-16  9:05 UTC (permalink / raw)
  To: KhaiWenTan, Nguyen, Anthony L, Kitszel, Przemyslaw,
	andrew+netdev@lunn.ch, davem@davemloft.net, edumazet@google.com,
	kuba@kernel.org, pabeni@redhat.com
  Cc: intel-wired-lan@lists.osuosl.org, netdev@vger.kernel.org,
	linux-kernel@vger.kernel.org, Abdul Rahim, Faizal, Looi, Hong Aun,
	Tan, Khai Wen, Faizal Rahim, Looi, Alan Chia Wei
In-Reply-To: <20260416015520.6090-3-khai.wen.tan@linux.intel.com>



> -----Original Message-----
> From: Intel-wired-lan <intel-wired-lan-bounces@osuosl.org> On Behalf
> Of KhaiWenTan
> Sent: Thursday, April 16, 2026 3:55 AM
> To: Nguyen, Anthony L <anthony.l.nguyen@intel.com>; Kitszel,
> Przemyslaw <przemyslaw.kitszel@intel.com>; andrew+netdev@lunn.ch;
> davem@davemloft.net; edumazet@google.com; kuba@kernel.org;
> pabeni@redhat.com
> Cc: intel-wired-lan@lists.osuosl.org; netdev@vger.kernel.org; linux-
> kernel@vger.kernel.org; Abdul Rahim, Faizal
> <faizal.abdul.rahim@intel.com>; Looi, Hong Aun
> <hong.aun.looi@intel.com>; Tan, Khai Wen <khai.wen.tan@intel.com>;
> Faizal Rahim <faizal.abdul.rahim@linux.intel.com>; Looi; KhaiWenTan
> <khai.wen.tan@linux.intel.com>
> Subject: [Intel-wired-lan] [PATCH iwl-next v2 2/3] igc: move autoneg-
> enabled settings into igc_handle_autoneg_enabled()
> 
> From: Faizal Rahim <faizal.abdul.rahim@linux.intel.com>
> 
> Move the advertised link modes and flow control configuration from
> igc_ethtool_set_link_ksettings() into igc_handle_autoneg_enabled().
> 
> No functional change.
> 
> Reviewed-by: Looi, Hong Aun <hong.aun.looi@intel.com>
> Signed-off-by: Faizal Rahim <faizal.abdul.rahim@linux.intel.com>
> Signed-off-by: KhaiWenTan <khai.wen.tan@linux.intel.com>
> ---
>  drivers/net/ethernet/intel/igc/igc_ethtool.c | 72 ++++++++++++-------
> -
>  1 file changed, 44 insertions(+), 28 deletions(-)
> 
> diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c
> b/drivers/net/ethernet/intel/igc/igc_ethtool.c
> index 0122009bedd0..cfcbf2fdad6e 100644
> --- a/drivers/net/ethernet/intel/igc/igc_ethtool.c
> +++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c
> @@ -2000,6 +2000,49 @@ static int
> igc_ethtool_get_link_ksettings(struct net_device *netdev,
>  	return 0;
>  }
> 
> +/**
> + * igc_handle_autoneg_enabled - Configure autonegotiation
> advertisement
> + * @adapter: private driver structure
> + * @cmd: ethtool link ksettings from user
> + *
> + * Records advertised speeds and flow control settings when autoneg
> + * is enabled.
> + */
> +static void igc_handle_autoneg_enabled(struct igc_adapter *adapter,
> +				       const struct ethtool_link_ksettings
> *cmd) {
> +	struct igc_hw *hw = &adapter->hw;
> +	u16 advertised = 0;
> +
> +	if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
> +						  2500baseT_Full))
> +		advertised |= ADVERTISE_2500_FULL;
> +
> +	if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
> +						  1000baseT_Full))
> +		advertised |= ADVERTISE_1000_FULL;
> +
> +	if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
> +						  100baseT_Full))
> +		advertised |= ADVERTISE_100_FULL;
> +
> +	if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
> +						  100baseT_Half))
> +		advertised |= ADVERTISE_100_HALF;
> +
> +	if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
> +						  10baseT_Full))
> +		advertised |= ADVERTISE_10_FULL;
> +
> +	if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
> +						  10baseT_Half))
> +		advertised |= ADVERTISE_10_HALF;
> +
> +	hw->phy.autoneg_advertised = advertised;
> +	if (adapter->fc_autoneg)
> +		hw->fc.requested_mode = igc_fc_default; }
> +
>  static int
>  igc_ethtool_set_link_ksettings(struct net_device *netdev,
>  			       const struct ethtool_link_ksettings *cmd)
> @@ -2007,7 +2050,6 @@ igc_ethtool_set_link_ksettings(struct net_device
> *netdev,
>  	struct igc_adapter *adapter = netdev_priv(netdev);
>  	struct net_device *dev = adapter->netdev;
>  	struct igc_hw *hw = &adapter->hw;
> -	u16 advertised = 0;
> 
>  	/* When adapter in resetting mode, autoneg/speed/duplex
>  	 * cannot be changed
> @@ -2032,34 +2074,8 @@ igc_ethtool_set_link_ksettings(struct
> net_device *netdev,
>  	while (test_and_set_bit(__IGC_RESETTING, &adapter->state))
>  		usleep_range(1000, 2000);
> 
> -	if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
> -						  2500baseT_Full))
> -		advertised |= ADVERTISE_2500_FULL;
> -
> -	if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
> -						  1000baseT_Full))
> -		advertised |= ADVERTISE_1000_FULL;
> -
> -	if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
> -						  100baseT_Full))
> -		advertised |= ADVERTISE_100_FULL;
> -
> -	if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
> -						  100baseT_Half))
> -		advertised |= ADVERTISE_100_HALF;
> -
> -	if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
> -						  10baseT_Full))
> -		advertised |= ADVERTISE_10_FULL;
> -
> -	if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
> -						  10baseT_Half))
> -		advertised |= ADVERTISE_10_HALF;
> -
>  	if (cmd->base.autoneg == AUTONEG_ENABLE) {
> -		hw->phy.autoneg_advertised = advertised;
> -		if (adapter->fc_autoneg)
> -			hw->fc.requested_mode = igc_fc_default;
> +		igc_handle_autoneg_enabled(adapter, cmd);
>  	} else {
>  		netdev_info(dev, "Force mode currently not
> supported\n");
>  	}
> --
> 2.43.0


Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox