Netdev List
 help / color / mirror / Atom feed
* [PATCH] net: 3com: 3c59x: Use dev_get_drvdata
From: Chuhong Yuan @ 2019-07-23 13:18 UTC (permalink / raw)
  Cc: Steffen Klassert, David S . Miller, netdev, linux-kernel,
	Chuhong Yuan

Instead of using to_pci_dev + pci_get_drvdata,
use dev_get_drvdata to make code simpler.

Signed-off-by: Chuhong Yuan <hslester96@gmail.com>
---
 drivers/net/ethernet/3com/3c59x.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c
index 147051404194..8f897828869f 100644
--- a/drivers/net/ethernet/3com/3c59x.c
+++ b/drivers/net/ethernet/3com/3c59x.c
@@ -847,8 +847,7 @@ static void poll_vortex(struct net_device *dev)
 
 static int vortex_suspend(struct device *dev)
 {
-	struct pci_dev *pdev = to_pci_dev(dev);
-	struct net_device *ndev = pci_get_drvdata(pdev);
+	struct net_device *ndev = dev_get_drvdata(dev);
 
 	if (!ndev || !netif_running(ndev))
 		return 0;
@@ -861,8 +860,7 @@ static int vortex_suspend(struct device *dev)
 
 static int vortex_resume(struct device *dev)
 {
-	struct pci_dev *pdev = to_pci_dev(dev);
-	struct net_device *ndev = pci_get_drvdata(pdev);
+	struct net_device *ndev = dev_get_drvdata(dev);
 	int err;
 
 	if (!ndev || !netif_running(ndev))
-- 
2.20.1


^ permalink raw reply related

* [PATCH] net: atheros: Use dev_get_drvdata
From: Chuhong Yuan @ 2019-07-23 13:18 UTC (permalink / raw)
  Cc: Jay Cliburn, Chris Snook, David S . Miller, netdev, linux-kernel,
	Chuhong Yuan

Instead of using to_pci_dev + pci_get_drvdata,
use dev_get_drvdata to make code simpler.

Signed-off-by: Chuhong Yuan <hslester96@gmail.com>
---
 drivers/net/ethernet/atheros/alx/main.c         | 6 ++----
 drivers/net/ethernet/atheros/atl1c/atl1c_main.c | 8 +++-----
 drivers/net/ethernet/atheros/atlx/atl1.c        | 8 +++-----
 3 files changed, 8 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c
index e3538ba7d0e7..af41a41c27f0 100644
--- a/drivers/net/ethernet/atheros/alx/main.c
+++ b/drivers/net/ethernet/atheros/alx/main.c
@@ -1879,8 +1879,7 @@ static void alx_remove(struct pci_dev *pdev)
 #ifdef CONFIG_PM_SLEEP
 static int alx_suspend(struct device *dev)
 {
-	struct pci_dev *pdev = to_pci_dev(dev);
-	struct alx_priv *alx = pci_get_drvdata(pdev);
+	struct alx_priv *alx = dev_get_drvdata(dev);
 
 	if (!netif_running(alx->dev))
 		return 0;
@@ -1891,8 +1890,7 @@ static int alx_suspend(struct device *dev)
 
 static int alx_resume(struct device *dev)
 {
-	struct pci_dev *pdev = to_pci_dev(dev);
-	struct alx_priv *alx = pci_get_drvdata(pdev);
+	struct alx_priv *alx = dev_get_drvdata(dev);
 	struct alx_hw *hw = &alx->hw;
 	int err;
 
diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
index be7f9cebb675..2fd6bf6cb8f7 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
@@ -2422,8 +2422,7 @@ static int atl1c_close(struct net_device *netdev)
 
 static int atl1c_suspend(struct device *dev)
 {
-	struct pci_dev *pdev = to_pci_dev(dev);
-	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct net_device *netdev = dev_get_drvdata(dev);
 	struct atl1c_adapter *adapter = netdev_priv(netdev);
 	struct atl1c_hw *hw = &adapter->hw;
 	u32 wufc = adapter->wol;
@@ -2437,7 +2436,7 @@ static int atl1c_suspend(struct device *dev)
 
 	if (wufc)
 		if (atl1c_phy_to_ps_link(hw) != 0)
-			dev_dbg(&pdev->dev, "phy power saving failed");
+			dev_dbg(dev, "phy power saving failed");
 
 	atl1c_power_saving(hw, wufc);
 
@@ -2447,8 +2446,7 @@ static int atl1c_suspend(struct device *dev)
 #ifdef CONFIG_PM_SLEEP
 static int atl1c_resume(struct device *dev)
 {
-	struct pci_dev *pdev = to_pci_dev(dev);
-	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct net_device *netdev = dev_get_drvdata(dev);
 	struct atl1c_adapter *adapter = netdev_priv(netdev);
 
 	AT_WRITE_REG(&adapter->hw, REG_WOL_CTRL, 0);
diff --git a/drivers/net/ethernet/atheros/atlx/atl1.c b/drivers/net/ethernet/atheros/atlx/atl1.c
index b5c6dc914720..8b9df5f8795b 100644
--- a/drivers/net/ethernet/atheros/atlx/atl1.c
+++ b/drivers/net/ethernet/atheros/atlx/atl1.c
@@ -2754,8 +2754,7 @@ static int atl1_close(struct net_device *netdev)
 #ifdef CONFIG_PM_SLEEP
 static int atl1_suspend(struct device *dev)
 {
-	struct pci_dev *pdev = to_pci_dev(dev);
-	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct net_device *netdev = dev_get_drvdata(dev);
 	struct atl1_adapter *adapter = netdev_priv(netdev);
 	struct atl1_hw *hw = &adapter->hw;
 	u32 ctrl = 0;
@@ -2780,7 +2779,7 @@ static int atl1_suspend(struct device *dev)
 		val = atl1_get_speed_and_duplex(hw, &speed, &duplex);
 		if (val) {
 			if (netif_msg_ifdown(adapter))
-				dev_printk(KERN_DEBUG, &pdev->dev,
+				dev_printk(KERN_DEBUG, dev,
 					"error getting speed/duplex\n");
 			goto disable_wol;
 		}
@@ -2837,8 +2836,7 @@ static int atl1_suspend(struct device *dev)
 
 static int atl1_resume(struct device *dev)
 {
-	struct pci_dev *pdev = to_pci_dev(dev);
-	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct net_device *netdev = dev_get_drvdata(dev);
 	struct atl1_adapter *adapter = netdev_priv(netdev);
 
 	iowrite32(0, adapter->hw.hw_addr + REG_WOL_CTRL);
-- 
2.20.1


^ permalink raw reply related

* [PATCH] net: broadcom: Use dev_get_drvdata
From: Chuhong Yuan @ 2019-07-23 13:19 UTC (permalink / raw)
  Cc: Rasesh Mody, GR-Linux-NIC-Dev, David S . Miller, Michael Chan,
	Siva Reddy Kallam, Prashant Sreedharan, netdev, linux-kernel,
	Chuhong Yuan

Instead of using to_pci_dev + pci_get_drvdata,
use dev_get_drvdata to make code simpler.

Signed-off-by: Chuhong Yuan <hslester96@gmail.com>
---
 drivers/net/ethernet/broadcom/bnx2.c      | 6 ++----
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 6 ++----
 drivers/net/ethernet/broadcom/tg3.c       | 6 ++----
 3 files changed, 6 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c
index dfdd14eadd57..fbc196b480b6 100644
--- a/drivers/net/ethernet/broadcom/bnx2.c
+++ b/drivers/net/ethernet/broadcom/bnx2.c
@@ -8673,8 +8673,7 @@ bnx2_remove_one(struct pci_dev *pdev)
 static int
 bnx2_suspend(struct device *device)
 {
-	struct pci_dev *pdev = to_pci_dev(device);
-	struct net_device *dev = pci_get_drvdata(pdev);
+	struct net_device *dev = dev_get_drvdata(device);
 	struct bnx2 *bp = netdev_priv(dev);
 
 	if (netif_running(dev)) {
@@ -8693,8 +8692,7 @@ bnx2_suspend(struct device *device)
 static int
 bnx2_resume(struct device *device)
 {
-	struct pci_dev *pdev = to_pci_dev(device);
-	struct net_device *dev = pci_get_drvdata(pdev);
+	struct net_device *dev = dev_get_drvdata(device);
 	struct bnx2 *bp = netdev_priv(dev);
 
 	if (!netif_running(dev))
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 7134d2c3eb1c..1aad59b8a413 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -10920,8 +10920,7 @@ static void bnxt_shutdown(struct pci_dev *pdev)
 #ifdef CONFIG_PM_SLEEP
 static int bnxt_suspend(struct device *device)
 {
-	struct pci_dev *pdev = to_pci_dev(device);
-	struct net_device *dev = pci_get_drvdata(pdev);
+	struct net_device *dev = dev_get_drvdata(device);
 	struct bnxt *bp = netdev_priv(dev);
 	int rc = 0;
 
@@ -10937,8 +10936,7 @@ static int bnxt_suspend(struct device *device)
 
 static int bnxt_resume(struct device *device)
 {
-	struct pci_dev *pdev = to_pci_dev(device);
-	struct net_device *dev = pci_get_drvdata(pdev);
+	struct net_device *dev = dev_get_drvdata(device);
 	struct bnxt *bp = netdev_priv(dev);
 	int rc = 0;
 
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index 4c404d2213f9..77f3511b97de 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -18041,8 +18041,7 @@ static void tg3_remove_one(struct pci_dev *pdev)
 #ifdef CONFIG_PM_SLEEP
 static int tg3_suspend(struct device *device)
 {
-	struct pci_dev *pdev = to_pci_dev(device);
-	struct net_device *dev = pci_get_drvdata(pdev);
+	struct net_device *dev = dev_get_drvdata(device);
 	struct tg3 *tp = netdev_priv(dev);
 	int err = 0;
 
@@ -18098,8 +18097,7 @@ static int tg3_suspend(struct device *device)
 
 static int tg3_resume(struct device *device)
 {
-	struct pci_dev *pdev = to_pci_dev(device);
-	struct net_device *dev = pci_get_drvdata(pdev);
+	struct net_device *dev = dev_get_drvdata(device);
 	struct tg3 *tp = netdev_priv(dev);
 	int err = 0;
 
-- 
2.20.1


^ permalink raw reply related

* Re: [PATCH 5/6] vhost: mark dirty pages during map uninit
From: Jason Wang @ 2019-07-23 13:19 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: kvm, virtualization, netdev, linux-kernel
In-Reply-To: <20190723041702-mutt-send-email-mst@kernel.org>


On 2019/7/23 下午5:17, Michael S. Tsirkin wrote:
> On Tue, Jul 23, 2019 at 03:57:17AM -0400, Jason Wang wrote:
>> We don't mark dirty pages if the map was teared down outside MMU
>> notifier. This will lead untracked dirty pages. Fixing by marking
>> dirty pages during map uninit.
>>
>> Reported-by: Michael S. Tsirkin<mst@redhat.com>
>> Fixes: 7f466032dc9e ("vhost: access vq metadata through kernel virtual address")
>> Signed-off-by: Jason Wang<jasowang@redhat.com>
>> ---
>>   drivers/vhost/vhost.c | 22 ++++++++++++++++------
>>   1 file changed, 16 insertions(+), 6 deletions(-)
>>
>> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
>> index 89c9f08b5146..5b8821d00fe4 100644
>> --- a/drivers/vhost/vhost.c
>> +++ b/drivers/vhost/vhost.c
>> @@ -306,6 +306,18 @@ static void vhost_map_unprefetch(struct vhost_map *map)
>>   	kfree(map);
>>   }
>>   
>> +static void vhost_set_map_dirty(struct vhost_virtqueue *vq,
>> +				struct vhost_map *map, int index)
>> +{
>> +	struct vhost_uaddr *uaddr = &vq->uaddrs[index];
>> +	int i;
>> +
>> +	if (uaddr->write) {
>> +		for (i = 0; i < map->npages; i++)
>> +			set_page_dirty(map->pages[i]);
>> +	}
>> +}
>> +
>>   static void vhost_uninit_vq_maps(struct vhost_virtqueue *vq)
>>   {
>>   	struct vhost_map *map[VHOST_NUM_ADDRS];
>> @@ -315,8 +327,10 @@ static void vhost_uninit_vq_maps(struct vhost_virtqueue *vq)
>>   	for (i = 0; i < VHOST_NUM_ADDRS; i++) {
>>   		map[i] = rcu_dereference_protected(vq->maps[i],
>>   				  lockdep_is_held(&vq->mmu_lock));
>> -		if (map[i])
>> +		if (map[i]) {
>> +			vhost_set_map_dirty(vq, map[i], i);
>>   			rcu_assign_pointer(vq->maps[i], NULL);
>> +		}
>>   	}
>>   	spin_unlock(&vq->mmu_lock);
>>   
>> @@ -354,7 +368,6 @@ static void vhost_invalidate_vq_start(struct vhost_virtqueue *vq,
>>   {
>>   	struct vhost_uaddr *uaddr = &vq->uaddrs[index];
>>   	struct vhost_map *map;
>> -	int i;
>>   
>>   	if (!vhost_map_range_overlap(uaddr, start, end))
>>   		return;
>> @@ -365,10 +378,7 @@ static void vhost_invalidate_vq_start(struct vhost_virtqueue *vq,
>>   	map = rcu_dereference_protected(vq->maps[index],
>>   					lockdep_is_held(&vq->mmu_lock));
>>   	if (map) {
>> -		if (uaddr->write) {
>> -			for (i = 0; i < map->npages; i++)
>> -				set_page_dirty(map->pages[i]);
>> -		}
>> +		vhost_set_map_dirty(vq, map, index);
>>   		rcu_assign_pointer(vq->maps[index], NULL);
>>   	}
>>   	spin_unlock(&vq->mmu_lock);
> OK and the reason it's safe is because the invalidate counter
> got incremented so we know page will not get mapped again.
>
> But we*do*  need to wait for page not to be mapped.
> And if that means waiting for VQ processing to finish,
> then I worry that is a very log time.
>

I'm not sure I get you here. If we don't have such map, we will fall 
back to normal uaccess helper. And in the memory accessor, the rcu 
critical section is pretty small.

Thanks




^ permalink raw reply

* Re: [PATCH net-next 3/3] net: stmmac: Introducing support for Page Pool
From: Robin Murphy @ 2019-07-23 13:19 UTC (permalink / raw)
  To: Jon Hunter, Jose Abreu, Lars Persson, Ilias Apalodimas
  Cc: Joao Pinto, Alexandre Torgue, Maxime Ripard,
	netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-stm32@st-md-mailman.stormreply.com, Chen-Yu Tsai,
	Maxime Coquelin, linux-tegra, Giuseppe Cavallaro,
	David S . Miller, linux-arm-kernel@lists.infradead.org
In-Reply-To: <8756d681-e167-fe4a-c6f0-47ae2dcbb100@nvidia.com>

On 23/07/2019 13:09, Jon Hunter wrote:
> 
> On 23/07/2019 11:29, Robin Murphy wrote:
>> On 23/07/2019 11:07, Jose Abreu wrote:
>>> From: Jon Hunter <jonathanh@nvidia.com>
>>> Date: Jul/23/2019, 11:01:24 (UTC+00:00)
>>>
>>>> This appears to be a winner and by disabling the SMMU for the ethernet
>>>> controller and reverting commit 954a03be033c7cef80ddc232e7cbdb17df735663
>>>> this worked! So yes appears to be related to the SMMU being enabled. We
>>>> had to enable the SMMU for ethernet recently due to commit
>>>> 954a03be033c7cef80ddc232e7cbdb17df735663.
>>>
>>> Finally :)
>>>
>>> However, from "git show 954a03be033c7cef80ddc232e7cbdb17df735663":
>>>
>>> +         There are few reasons to allow unmatched stream bypass, and
>>> +         even fewer good ones.  If saying YES here breaks your board
>>> +         you should work on fixing your board.
>>>
>>> So, how can we fix this ? Is your ethernet DT node marked as
>>> "dma-coherent;" ?
>>
>> The first thing to try would be booting the failing setup with
>> "iommu.passthrough=1" (or using CONFIG_IOMMU_DEFAULT_PASSTHROUGH) - if
>> that makes things seem OK, then the problem is likely related to address
>> translation; if not, then it's probably time to start looking at nasties
>> like coherency and ordering, although in principle I wouldn't expect the
>> SMMU to have too much impact there.
> 
> Setting "iommu.passthrough=1" works for me. However, I am not sure where
> to go from here, so any ideas you have would be great.

OK, so that really implies it's something to do with the addresses. From 
a quick skim of the patch, I'm wondering if it's possible for buf->addr 
and buf->page->dma_addr to get out-of-sync at any point. The nature of 
the IOVA allocator makes it quite likely that a stale DMA address will 
have been reused for a new mapping, so putting the wrong address in a 
descriptor may well mean the DMA still ends up hitting a valid 
translation, but which is now pointing to a different page.

>> Do you know if the SMMU interrupts are working correctly? If not, it's
>> possible that an incorrect address or mapping direction could lead to
>> the DMA transaction just being silently terminated without any fault
>> indication, which generally presents as inexplicable weirdness (I've
>> certainly seen that on another platform with the mix of an unsupported
>> interrupt controller and an 'imperfect' ethernet driver).
> 
> If I simply remove the iommu node for the ethernet controller, then I
> see lots of ...
> 
> [    6.296121] arm-smmu 12000000.iommu: Unexpected global fault, this could be serious
> [    6.296125] arm-smmu 12000000.iommu:         GFSR 0x00000002, GFSYNR0 0x00000000, GFSYNR1 0x00000014, GFSYNR2 0x00000000
> 
> So I assume that this is triggering the SMMU interrupt correctly.

According to tegra186.dtsi it appears you're using the MMU-500 combined 
interrupt, so if global faults are being delivered then context faults 
*should* also, but I'd be inclined to try a quick hack of the relevant 
stmmac_desc_ops::set_addr callback to write some bogus unmapped address 
just to make sure arm_smmu_context_fault() then screams as expected, and 
we're not missing anything else.

Robin.

^ permalink raw reply

* Re: [PATCH 4/6] vhost: reset invalidate_count in vhost_set_vring_num_addr()
From: Jason Wang @ 2019-07-23 13:25 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: kvm, virtualization, netdev, linux-kernel
In-Reply-To: <20190723042143-mutt-send-email-mst@kernel.org>


On 2019/7/23 下午5:17, Michael S. Tsirkin wrote:
> On Tue, Jul 23, 2019 at 03:57:16AM -0400, Jason Wang wrote:
>> The vhost_set_vring_num_addr() could be called in the middle of
>> invalidate_range_start() and invalidate_range_end(). If we don't reset
>> invalidate_count after the un-registering of MMU notifier, the
>> invalidate_cont will run out of sync (e.g never reach zero). This will
>> in fact disable the fast accessor path. Fixing by reset the count to
>> zero.
>>
>> Reported-by: Michael S. Tsirkin <mst@redhat.com>
>> Fixes: 7f466032dc9e ("vhost: access vq metadata through kernel virtual address")
>> Signed-off-by: Jason Wang <jasowang@redhat.com>
>> ---
>>   drivers/vhost/vhost.c | 4 ++++
>>   1 file changed, 4 insertions(+)
>>
>> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
>> index 03666b702498..89c9f08b5146 100644
>> --- a/drivers/vhost/vhost.c
>> +++ b/drivers/vhost/vhost.c
>> @@ -2074,6 +2074,10 @@ static long vhost_vring_set_num_addr(struct vhost_dev *d,
>>   		d->has_notifier = false;
>>   	}
>>   
>> +	/* reset invalidate_count in case we are in the middle of
>> +	 * invalidate_start() and invalidate_end().
>> +	 */
>> +	vq->invalidate_count = 0;
> I think that the code is ok but the comments are not very clear:
> - we are never in the middle since we just removed the notifier


If I read the code correctly, mmu_notifier_unregister() can only 
guarantee to wait for the pending method to complete. So we can have:

invalidate_start()

mmu_notifier_unregister()

invalidate_end()


> - the result is not just disabling optimization:
>    if notifier becomes negative, then later we
>    can think it's ok to map when it isn't since
>    notifier is active.


I don't get how it could be negative, the only possible thing is to have 
a positive value.

Thanks


>
>>   	vhost_uninit_vq_maps(vq);
>>   #endif
>>   
>> -- 
>> 2.18.1

^ permalink raw reply

* Re: [PATCH 2/6] vhost: validate MMU notifier registration
From: Jason Wang @ 2019-07-23 13:30 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: kvm, virtualization, netdev, linux-kernel
In-Reply-To: <20190723042428-mutt-send-email-mst@kernel.org>


On 2019/7/23 下午5:17, Michael S. Tsirkin wrote:
> On Tue, Jul 23, 2019 at 03:57:14AM -0400, Jason Wang wrote:
>> The return value of mmu_notifier_register() is not checked in
>> vhost_vring_set_num_addr(). This will cause an out of sync between mm
>> and MMU notifier thus a double free. To solve this, introduce a
>> boolean flag to track whether MMU notifier is registered and only do
>> unregistering when it was true.
>>
>> Reported-and-tested-by:
>> syzbot+e58112d71f77113ddb7b@syzkaller.appspotmail.com
>> Fixes: 7f466032dc9e ("vhost: access vq metadata through kernel virtual address")
>> Signed-off-by: Jason Wang <jasowang@redhat.com>
> Right. This fixes the bug.
> But it's not great that simple things like
> setting vq address put pressure on memory allocator.
> Also, if we get a single during processing
> notifier register will fail, disabling optimization permanently.


Yes, but do we really care about this case. E.g we even fail for -ENOMEM 
for set owner.


>
> In fact, see below:
>
>
>> ---
>>   drivers/vhost/vhost.c | 19 +++++++++++++++----
>>   drivers/vhost/vhost.h |  1 +
>>   2 files changed, 16 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
>> index 34c0d970bcbc..058191d5efad 100644
>> --- a/drivers/vhost/vhost.c
>> +++ b/drivers/vhost/vhost.c
>> @@ -630,6 +630,7 @@ void vhost_dev_init(struct vhost_dev *dev,
>>   	dev->iov_limit = iov_limit;
>>   	dev->weight = weight;
>>   	dev->byte_weight = byte_weight;
>> +	dev->has_notifier = false;
>>   	init_llist_head(&dev->work_list);
>>   	init_waitqueue_head(&dev->wait);
>>   	INIT_LIST_HEAD(&dev->read_list);
>> @@ -731,6 +732,7 @@ long vhost_dev_set_owner(struct vhost_dev *dev)
>>   	if (err)
>>   		goto err_mmu_notifier;
>>   #endif
>> +	dev->has_notifier = true;
>>   
>>   	return 0;
>>   
> I just noticed that set owner now fails if we get a signal.
> Userspace could retry in theory but it does not:
> this is userspace abi breakage since it used to only
> fail on invalid input.


Well, at least kthread_create() and vhost_dev_alloc_iovecs() will 
allocate memory.

Thanks


>
>> @@ -960,7 +962,11 @@ void vhost_dev_cleanup(struct vhost_dev *dev)
>>   	}
>>   	if (dev->mm) {
>>   #if VHOST_ARCH_CAN_ACCEL_UACCESS
>> -		mmu_notifier_unregister(&dev->mmu_notifier, dev->mm);
>> +		if (dev->has_notifier) {
>> +			mmu_notifier_unregister(&dev->mmu_notifier,
>> +						dev->mm);
>> +			dev->has_notifier = false;
>> +		}
>>   #endif
>>   		mmput(dev->mm);
>>   	}
>> @@ -2065,8 +2071,10 @@ static long vhost_vring_set_num_addr(struct vhost_dev *d,
>>   	/* Unregister MMU notifer to allow invalidation callback
>>   	 * can access vq->uaddrs[] without holding a lock.
>>   	 */
>> -	if (d->mm)
>> +	if (d->has_notifier) {
>>   		mmu_notifier_unregister(&d->mmu_notifier, d->mm);
>> +		d->has_notifier = false;
>> +	}
>>   
>>   	vhost_uninit_vq_maps(vq);
>>   #endif
>> @@ -2086,8 +2094,11 @@ static long vhost_vring_set_num_addr(struct vhost_dev *d,
>>   	if (r == 0)
>>   		vhost_setup_vq_uaddr(vq);
>>   
>> -	if (d->mm)
>> -		mmu_notifier_register(&d->mmu_notifier, d->mm);
>> +	if (d->mm) {
>> +		r = mmu_notifier_register(&d->mmu_notifier, d->mm);
>> +		if (!r)
>> +			d->has_notifier = true;
>> +	}
>>   #endif
>>   
>>   	mutex_unlock(&vq->mutex);
>> diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
>> index 819296332913..a62f56a4cf72 100644
>> --- a/drivers/vhost/vhost.h
>> +++ b/drivers/vhost/vhost.h
>> @@ -214,6 +214,7 @@ struct vhost_dev {
>>   	int iov_limit;
>>   	int weight;
>>   	int byte_weight;
>> +	bool has_notifier;
>>   };
>>   
>>   bool vhost_exceeds_weight(struct vhost_virtqueue *vq, int pkts, int total_len);
>> -- 
>> 2.18.1

^ permalink raw reply

* Re: [PATCH net-next 3/3] net: stmmac: Introducing support for Page Pool
From: Jon Hunter @ 2019-07-23 13:34 UTC (permalink / raw)
  To: Jose Abreu, Lars Persson, Ilias Apalodimas
  Cc: Joao Pinto, Alexandre Torgue, Maxime Ripard,
	netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-stm32@st-md-mailman.stormreply.com, Chen-Yu Tsai,
	Maxime Coquelin, linux-tegra, Giuseppe Cavallaro,
	David S . Miller, linux-arm-kernel@lists.infradead.org
In-Reply-To: <BYAPR12MB3269EC45ABAF8F279288B003D3C70@BYAPR12MB3269.namprd12.prod.outlook.com>


On 23/07/2019 13:51, Jose Abreu wrote:
> From: Jon Hunter <jonathanh@nvidia.com>
> Date: Jul/23/2019, 12:58:55 (UTC+00:00)
> 
>>
>> On 23/07/2019 11:49, Jose Abreu wrote:
>>> From: Jon Hunter <jonathanh@nvidia.com>
>>> Date: Jul/23/2019, 11:38:33 (UTC+00:00)
>>>
>>>>
>>>> On 23/07/2019 11:07, Jose Abreu wrote:
>>>>> From: Jon Hunter <jonathanh@nvidia.com>
>>>>> Date: Jul/23/2019, 11:01:24 (UTC+00:00)
>>>>>
>>>>>> This appears to be a winner and by disabling the SMMU for the ethernet
>>>>>> controller and reverting commit 954a03be033c7cef80ddc232e7cbdb17df735663
>>>>>> this worked! So yes appears to be related to the SMMU being enabled. We
>>>>>> had to enable the SMMU for ethernet recently due to commit
>>>>>> 954a03be033c7cef80ddc232e7cbdb17df735663.
>>>>>
>>>>> Finally :)
>>>>>
>>>>> However, from "git show 954a03be033c7cef80ddc232e7cbdb17df735663":
>>>>>
>>>>> +         There are few reasons to allow unmatched stream bypass, and
>>>>> +         even fewer good ones.  If saying YES here breaks your board
>>>>> +         you should work on fixing your board.
>>>>>
>>>>> So, how can we fix this ? Is your ethernet DT node marked as 
>>>>> "dma-coherent;" ?
>>>>
>>>> TBH I have no idea. I can't say I fully understand your change or how it
>>>> is breaking things for us.
>>>>
>>>> Currently, the Tegra DT binding does not have 'dma-coherent' set. I see
>>>> this is optional, but I am not sure how you determine whether or not
>>>> this should be set.
>>>
>>> From my understanding it means that your device / IP DMA accesses are coherent regarding the CPU point of view. I think it will be the case if GMAC is not behind any kind of IOMMU in the HW arch.
>>
>> I understand what coherency is, I just don't know how you tell if this
>> implementation of the ethernet controller is coherent or not.
> 
> Do you have any detailed diagram of your HW ? Such as blocks / IPs 
> connection, address space wiring , ...

Yes, this can be found in the Tegra X2 Technical Reference Manual [0].
Unfortunately, you need to create an account to download it.

Jon

[0] https://developer.nvidia.com/embedded/dlc/parker-series-trm

-- 
nvpublic

^ permalink raw reply

* Re: [PATCH net-next] net: dsa: mv88e6xxx: avoid some redundant vtu load/purge operations
From: Andrew Lunn @ 2019-07-23 13:40 UTC (permalink / raw)
  To: Rasmus Villemoes
  Cc: Vivien Didelot, Florian Fainelli, David S. Miller,
	Rasmus Villemoes, netdev@vger.kernel.org,
	linux-kernel@vger.kernel.org
In-Reply-To: <20190722233713.31396-1-rasmus.villemoes@prevas.dk>

On Mon, Jul 22, 2019 at 11:37:26PM +0000, Rasmus Villemoes wrote:
> We have an ERPS (Ethernet Ring Protection Switching) setup involving
> mv88e6250 switches which we're in the process of switching to a BSP
> based on the mainline driver. Breaking any link in the ring works as
> expected, with the ring reconfiguring itself quickly and traffic
> continuing with almost no noticable drops. However, when plugging back
> the cable, we see 5+ second stalls.

Hi Rasmus

I would prefer Vivien reviews this patch. But he is away at the
moment. Are you O.K. to wait a few days?

	Andrew

^ permalink raw reply

* [PATCHv2] libertas: add terminating entry to fw_table
From: Oliver Neukum @ 2019-07-23 13:52 UTC (permalink / raw)
  To: davem, netdev; +Cc: Oliver Neukum

In case no firmware was found, the system would happily read
and try to load garbage. Terminate the table properly.

V2: change style issues

Fixes: ce84bb69f50e6 ("libertas USB: convert to asynchronous firmware loading")
Signed-off-by: Oliver Neukum <oneukum@suse.com>
Reported-by: syzbot+8a8f48672560c8ca59dd@syzkaller.appspotmail.com
---
 drivers/net/wireless/marvell/libertas/if_usb.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/marvell/libertas/if_usb.c b/drivers/net/wireless/marvell/libertas/if_usb.c
index afac2481909b..fedafdc8b989 100644
--- a/drivers/net/wireless/marvell/libertas/if_usb.c
+++ b/drivers/net/wireless/marvell/libertas/if_usb.c
@@ -50,7 +50,10 @@ static const struct lbs_fw_table fw_table[] = {
 	{ MODEL_8388, "libertas/usb8388_v5.bin", NULL },
 	{ MODEL_8388, "libertas/usb8388.bin", NULL },
 	{ MODEL_8388, "usb8388.bin", NULL },
-	{ MODEL_8682, "libertas/usb8682.bin", NULL }
+	{ MODEL_8682, "libertas/usb8682.bin", NULL },
+
+	/* terminating entry - keep at end */
+	{ MODEL_8388, NULL, NULL }
 };
 
 static const struct usb_device_id if_usb_table[] = {
-- 
2.16.4


^ permalink raw reply related

* Re: [PATCH v1] tun: mark small packets as owned by the tap sock
From: Jason Wang @ 2019-07-23 13:53 UTC (permalink / raw)
  To: Alexis Bauvin, stephen, davem; +Cc: netdev
In-Reply-To: <20190723130151.36745-1-abauvin@scaleway.com>


On 2019/7/23 下午9:01, Alexis Bauvin wrote:
> Small packets going out of a tap device go through an optimized code
> path that uses build_skb() rather than sock_alloc_send_pskb(). The
> latter calls skb_set_owner_w(), but the small packet code path does not.
>
> The net effect is that small packets are not owned by the userland
> application's socket (e.g. QEMU), while large packets are.
> This can be seen with a TCP session, where packets are not owned when
> the window size is small enough (around PAGE_SIZE), while they are once
> the window grows (note that this requires the host to support virtio
> tso for the guest to offload segmentation).
> All this leads to inconsistent behaviour in the kernel, especially on
> netfilter modules that uses sk->socket (e.g. xt_owner).
>
> Signed-off-by: Alexis Bauvin <abauvin@scaleway.com>
> Fixes: 66ccbc9c87c2 ("tap: use build_skb() for small packet")
> ---
>   drivers/net/tun.c | 71 ++++++++++++++++++++++++-----------------------
>   1 file changed, 37 insertions(+), 34 deletions(-)
>
> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> index 3d443597bd04..ac56b6a29eb2 100644
> --- a/drivers/net/tun.c
> +++ b/drivers/net/tun.c
> @@ -1656,6 +1656,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
>   {
>   	struct page_frag *alloc_frag = &current->task_frag;
>   	struct bpf_prog *xdp_prog;
> +	struct sk_buff *skb;
>   	int buflen = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
>   	char *buf;
>   	size_t copied;
> @@ -1686,44 +1687,46 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
>   	 */
>   	if (hdr->gso_type || !xdp_prog) {
>   		*skb_xdp = 1;
> -		return __tun_build_skb(alloc_frag, buf, buflen, len, pad);
> -	}
> -
> -	*skb_xdp = 0;
> +	} else {
> +		*skb_xdp = 0;
>   
> -	local_bh_disable();
> -	rcu_read_lock();
> -	xdp_prog = rcu_dereference(tun->xdp_prog);
> -	if (xdp_prog) {
> -		struct xdp_buff xdp;
> -		u32 act;
> -
> -		xdp.data_hard_start = buf;
> -		xdp.data = buf + pad;
> -		xdp_set_data_meta_invalid(&xdp);
> -		xdp.data_end = xdp.data + len;
> -		xdp.rxq = &tfile->xdp_rxq;
> -
> -		act = bpf_prog_run_xdp(xdp_prog, &xdp);
> -		if (act == XDP_REDIRECT || act == XDP_TX) {
> -			get_page(alloc_frag->page);
> -			alloc_frag->offset += buflen;
> +		local_bh_disable();
> +		rcu_read_lock();
> +		xdp_prog = rcu_dereference(tun->xdp_prog);
> +		if (xdp_prog) {
> +			struct xdp_buff xdp;
> +			u32 act;
> +
> +			xdp.data_hard_start = buf;
> +			xdp.data = buf + pad;
> +			xdp_set_data_meta_invalid(&xdp);
> +			xdp.data_end = xdp.data + len;
> +			xdp.rxq = &tfile->xdp_rxq;
> +
> +			act = bpf_prog_run_xdp(xdp_prog, &xdp);
> +			if (act == XDP_REDIRECT || act == XDP_TX) {
> +				get_page(alloc_frag->page);
> +				alloc_frag->offset += buflen;
> +			}
> +			err = tun_xdp_act(tun, xdp_prog, &xdp, act);
> +			if (err < 0)
> +				goto err_xdp;
> +			if (err == XDP_REDIRECT)
> +				xdp_do_flush_map();
> +			if (err != XDP_PASS)
> +				goto out;
> +
> +			pad = xdp.data - xdp.data_hard_start;
> +			len = xdp.data_end - xdp.data;
>   		}
> -		err = tun_xdp_act(tun, xdp_prog, &xdp, act);
> -		if (err < 0)
> -			goto err_xdp;
> -		if (err == XDP_REDIRECT)
> -			xdp_do_flush_map();
> -		if (err != XDP_PASS)
> -			goto out;
> -
> -		pad = xdp.data - xdp.data_hard_start;
> -		len = xdp.data_end - xdp.data;
> +		rcu_read_unlock();
> +		local_bh_enable();
>   	}
> -	rcu_read_unlock();
> -	local_bh_enable();
>   
> -	return __tun_build_skb(alloc_frag, buf, buflen, len, pad);
> +	skb = __tun_build_skb(alloc_frag, buf, buflen, len, pad);
> +	if (skb)
> +		skb_set_owner_w(skb, tfile->socket.sk);
> +	return skb;
>   
>   err_xdp:
>   	put_page(alloc_frag->page);


To reduce the change set, anyhow you can move the skb_set_owner_w() to 
__tun_build_skb()?

Thanks



^ permalink raw reply

* Re: [PATCH v1] tun: mark small packets as owned by the tap sock
From: Alexis Bauvin @ 2019-07-23 13:59 UTC (permalink / raw)
  To: Jason Wang; +Cc: stephen, davem, netdev
In-Reply-To: <359225ef-9bc2-220b-ec93-cf671b705e65@redhat.com>


> Le 23 juil. 2019 à 15:53, Jason Wang <jasowang@redhat.com> a écrit :
> On 2019/7/23 下午9:01, Alexis Bauvin wrote:
>> Small packets going out of a tap device go through an optimized code
>> path that uses build_skb() rather than sock_alloc_send_pskb(). The
>> latter calls skb_set_owner_w(), but the small packet code path does not.
>> 
>> The net effect is that small packets are not owned by the userland
>> application's socket (e.g. QEMU), while large packets are.
>> This can be seen with a TCP session, where packets are not owned when
>> the window size is small enough (around PAGE_SIZE), while they are once
>> the window grows (note that this requires the host to support virtio
>> tso for the guest to offload segmentation).
>> All this leads to inconsistent behaviour in the kernel, especially on
>> netfilter modules that uses sk->socket (e.g. xt_owner).
>> 
>> Signed-off-by: Alexis Bauvin <abauvin@scaleway.com>
>> Fixes: 66ccbc9c87c2 ("tap: use build_skb() for small packet")
>> ---
>>  drivers/net/tun.c | 71 ++++++++++++++++++++++++-----------------------
>>  1 file changed, 37 insertions(+), 34 deletions(-)
>> 
>> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
>> index 3d443597bd04..ac56b6a29eb2 100644
>> --- a/drivers/net/tun.c
>> +++ b/drivers/net/tun.c
>> @@ -1656,6 +1656,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
>>  {
>>  	struct page_frag *alloc_frag = &current->task_frag;
>>  	struct bpf_prog *xdp_prog;
>> +	struct sk_buff *skb;
>>  	int buflen = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
>>  	char *buf;
>>  	size_t copied;
>> @@ -1686,44 +1687,46 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
>>  	 */
>>  	if (hdr->gso_type || !xdp_prog) {
>>  		*skb_xdp = 1;
>> -		return __tun_build_skb(alloc_frag, buf, buflen, len, pad);
>> -	}
>> -
>> -	*skb_xdp = 0;
>> +	} else {
>> +		*skb_xdp = 0;
>>  -	local_bh_disable();
>> -	rcu_read_lock();
>> -	xdp_prog = rcu_dereference(tun->xdp_prog);
>> -	if (xdp_prog) {
>> -		struct xdp_buff xdp;
>> -		u32 act;
>> -
>> -		xdp.data_hard_start = buf;
>> -		xdp.data = buf + pad;
>> -		xdp_set_data_meta_invalid(&xdp);
>> -		xdp.data_end = xdp.data + len;
>> -		xdp.rxq = &tfile->xdp_rxq;
>> -
>> -		act = bpf_prog_run_xdp(xdp_prog, &xdp);
>> -		if (act == XDP_REDIRECT || act == XDP_TX) {
>> -			get_page(alloc_frag->page);
>> -			alloc_frag->offset += buflen;
>> +		local_bh_disable();
>> +		rcu_read_lock();
>> +		xdp_prog = rcu_dereference(tun->xdp_prog);
>> +		if (xdp_prog) {
>> +			struct xdp_buff xdp;
>> +			u32 act;
>> +
>> +			xdp.data_hard_start = buf;
>> +			xdp.data = buf + pad;
>> +			xdp_set_data_meta_invalid(&xdp);
>> +			xdp.data_end = xdp.data + len;
>> +			xdp.rxq = &tfile->xdp_rxq;
>> +
>> +			act = bpf_prog_run_xdp(xdp_prog, &xdp);
>> +			if (act == XDP_REDIRECT || act == XDP_TX) {
>> +				get_page(alloc_frag->page);
>> +				alloc_frag->offset += buflen;
>> +			}
>> +			err = tun_xdp_act(tun, xdp_prog, &xdp, act);
>> +			if (err < 0)
>> +				goto err_xdp;
>> +			if (err == XDP_REDIRECT)
>> +				xdp_do_flush_map();
>> +			if (err != XDP_PASS)
>> +				goto out;
>> +
>> +			pad = xdp.data - xdp.data_hard_start;
>> +			len = xdp.data_end - xdp.data;
>>  		}
>> -		err = tun_xdp_act(tun, xdp_prog, &xdp, act);
>> -		if (err < 0)
>> -			goto err_xdp;
>> -		if (err == XDP_REDIRECT)
>> -			xdp_do_flush_map();
>> -		if (err != XDP_PASS)
>> -			goto out;
>> -
>> -		pad = xdp.data - xdp.data_hard_start;
>> -		len = xdp.data_end - xdp.data;
>> +		rcu_read_unlock();
>> +		local_bh_enable();
>>  	}
>> -	rcu_read_unlock();
>> -	local_bh_enable();
>>  -	return __tun_build_skb(alloc_frag, buf, buflen, len, pad);
>> +	skb = __tun_build_skb(alloc_frag, buf, buflen, len, pad);
>> +	if (skb)
>> +		skb_set_owner_w(skb, tfile->socket.sk);
>> +	return skb;
>>    err_xdp:
>>  	put_page(alloc_frag->page);
> 
> 
> To reduce the change set, anyhow you can move the skb_set_owner_w() to __tun_build_skb()?
> 
> Thanks

Sure, doing and sending v2

^ permalink raw reply

* Re: [PATCH net-next] net: dsa: mv88e6xxx: avoid some redundant vtu load/purge operations
From: Rasmus Villemoes @ 2019-07-23 14:09 UTC (permalink / raw)
  To: Andrew Lunn
  Cc: Vivien Didelot, Florian Fainelli, David S. Miller,
	Rasmus Villemoes, netdev@vger.kernel.org,
	linux-kernel@vger.kernel.org
In-Reply-To: <20190723134037.GA2381@lunn.ch>

On 23/07/2019 15.40, Andrew Lunn wrote:
> On Mon, Jul 22, 2019 at 11:37:26PM +0000, Rasmus Villemoes wrote:
>> We have an ERPS (Ethernet Ring Protection Switching) setup involving
>> mv88e6250 switches which we're in the process of switching to a BSP
>> based on the mainline driver. Breaking any link in the ring works as
>> expected, with the ring reconfiguring itself quickly and traffic
>> continuing with almost no noticable drops. However, when plugging back
>> the cable, we see 5+ second stalls.
> 
> Hi Rasmus
> 
> I would prefer Vivien reviews this patch. But he is away at the
> moment. Are you O.K. to wait a few days?

Sure, no rush.

Rasmus

^ permalink raw reply

* [PATCH] e1000e: Use dev_get_drvdata where possible
From: Chuhong Yuan @ 2019-07-23 14:15 UTC (permalink / raw)
  Cc: Jeff Kirsher, David S . Miller, intel-wired-lan, netdev,
	linux-kernel, Chuhong Yuan

Instead of using to_pci_dev + pci_get_drvdata,
use dev_get_drvdata to make code simpler.

Signed-off-by: Chuhong Yuan <hslester96@gmail.com>
---
 drivers/net/ethernet/intel/e1000e/netdev.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index e4baa13b3cda..fa2755849c54 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -6297,7 +6297,7 @@ static void e1000e_flush_lpic(struct pci_dev *pdev)
 
 static int e1000e_pm_freeze(struct device *dev)
 {
-	struct net_device *netdev = pci_get_drvdata(to_pci_dev(dev));
+	struct net_device *netdev = dev_get_drvdata(dev);
 	struct e1000_adapter *adapter = netdev_priv(netdev);
 
 	netif_device_detach(netdev);
@@ -6630,7 +6630,7 @@ static int __e1000_resume(struct pci_dev *pdev)
 #ifdef CONFIG_PM_SLEEP
 static int e1000e_pm_thaw(struct device *dev)
 {
-	struct net_device *netdev = pci_get_drvdata(to_pci_dev(dev));
+	struct net_device *netdev = dev_get_drvdata(dev);
 	struct e1000_adapter *adapter = netdev_priv(netdev);
 
 	e1000e_set_interrupt_capability(adapter);
@@ -6679,8 +6679,7 @@ static int e1000e_pm_resume(struct device *dev)
 
 static int e1000e_pm_runtime_idle(struct device *dev)
 {
-	struct pci_dev *pdev = to_pci_dev(dev);
-	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct net_device *netdev = dev_get_drvdata(dev);
 	struct e1000_adapter *adapter = netdev_priv(netdev);
 	u16 eee_lp;
 
-- 
2.20.1


^ permalink raw reply related

* [PATCH] fm10k: Use dev_get_drvdata
From: Chuhong Yuan @ 2019-07-23 14:15 UTC (permalink / raw)
  Cc: Jeff Kirsher, David S . Miller, intel-wired-lan, netdev,
	linux-kernel, Chuhong Yuan

Instead of using to_pci_dev + pci_get_drvdata,
use dev_get_drvdata to make code simpler.

Signed-off-by: Chuhong Yuan <hslester96@gmail.com>
---
 drivers/net/ethernet/intel/fm10k/fm10k_pci.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
index e49fb51d3613..7bfc8a5b6f55 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
@@ -2352,7 +2352,7 @@ static int fm10k_handle_resume(struct fm10k_intfc *interface)
  **/
 static int __maybe_unused fm10k_resume(struct device *dev)
 {
-	struct fm10k_intfc *interface = pci_get_drvdata(to_pci_dev(dev));
+	struct fm10k_intfc *interface = dev_get_drvdata(dev);
 	struct net_device *netdev = interface->netdev;
 	struct fm10k_hw *hw = &interface->hw;
 	int err;
@@ -2379,7 +2379,7 @@ static int __maybe_unused fm10k_resume(struct device *dev)
  **/
 static int __maybe_unused fm10k_suspend(struct device *dev)
 {
-	struct fm10k_intfc *interface = pci_get_drvdata(to_pci_dev(dev));
+	struct fm10k_intfc *interface = dev_get_drvdata(dev);
 	struct net_device *netdev = interface->netdev;
 
 	netif_device_detach(netdev);
-- 
2.20.1


^ permalink raw reply related

* [PATCH] i40e: Use dev_get_drvdata
From: Chuhong Yuan @ 2019-07-23 14:15 UTC (permalink / raw)
  Cc: Jeff Kirsher, David S . Miller, intel-wired-lan, netdev,
	linux-kernel, Chuhong Yuan

Instead of using to_pci_dev + pci_get_drvdata,
use dev_get_drvdata to make code simpler.

Signed-off-by: Chuhong Yuan <hslester96@gmail.com>
---
 drivers/net/ethernet/intel/i40e/i40e_main.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 9ebbe3da61bb..44da407e0bf9 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -15605,8 +15605,7 @@ static void i40e_shutdown(struct pci_dev *pdev)
  **/
 static int __maybe_unused i40e_suspend(struct device *dev)
 {
-	struct pci_dev *pdev = to_pci_dev(dev);
-	struct i40e_pf *pf = pci_get_drvdata(pdev);
+	struct i40e_pf *pf = dev_get_drvdata(dev);
 	struct i40e_hw *hw = &pf->hw;
 
 	/* If we're already suspended, then there is nothing to do */
@@ -15656,8 +15655,7 @@ static int __maybe_unused i40e_suspend(struct device *dev)
  **/
 static int __maybe_unused i40e_resume(struct device *dev)
 {
-	struct pci_dev *pdev = to_pci_dev(dev);
-	struct i40e_pf *pf = pci_get_drvdata(pdev);
+	struct i40e_pf *pf = dev_get_drvdata(dev);
 	int err;
 
 	/* If we're not suspended, then there is nothing to do */
@@ -15674,7 +15672,7 @@ static int __maybe_unused i40e_resume(struct device *dev)
 	 */
 	err = i40e_restore_interrupt_scheme(pf);
 	if (err) {
-		dev_err(&pdev->dev, "Cannot restore interrupt scheme: %d\n",
+		dev_err(dev, "Cannot restore interrupt scheme: %d\n",
 			err);
 	}
 
-- 
2.20.1


^ permalink raw reply related

* [PATCH] igb: Use dev_get_drvdata where possible
From: Chuhong Yuan @ 2019-07-23 14:16 UTC (permalink / raw)
  Cc: Jeff Kirsher, David S . Miller, intel-wired-lan, netdev,
	linux-kernel, Chuhong Yuan

Instead of using to_pci_dev + pci_get_drvdata,
use dev_get_drvdata to make code simpler.

Signed-off-by: Chuhong Yuan <hslester96@gmail.com>
---
 drivers/net/ethernet/intel/igb/igb_main.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index b4df3e319467..145f58ee0451 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -8879,8 +8879,7 @@ static int __maybe_unused igb_resume(struct device *dev)
 
 static int __maybe_unused igb_runtime_idle(struct device *dev)
 {
-	struct pci_dev *pdev = to_pci_dev(dev);
-	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct net_device *netdev = dev_get_drvdata(dev);
 	struct igb_adapter *adapter = netdev_priv(netdev);
 
 	if (!igb_has_link(adapter))
-- 
2.20.1


^ permalink raw reply related

* Re: [RFC PATCH net-next 10/12] drop_monitor: Add packet alert mode
From: Ido Schimmel @ 2019-07-23 14:16 UTC (permalink / raw)
  To: Neil Horman
  Cc: netdev, davem, dsahern, roopa, nikolay, jakub.kicinski, toke,
	andy, f.fainelli, andrew, vivien.didelot, mlxsw, Ido Schimmel
In-Reply-To: <20190723124340.GA10377@hmswarspite.think-freely.org>

On Tue, Jul 23, 2019 at 08:43:40AM -0400, Neil Horman wrote:
> On Mon, Jul 22, 2019 at 09:31:32PM +0300, Ido Schimmel wrote:
> > +static void net_dm_packet_work(struct work_struct *work)
> > +{
> > +	struct per_cpu_dm_data *data;
> > +	struct sk_buff_head list;
> > +	struct sk_buff *skb;
> > +	unsigned long flags;
> > +
> > +	data = container_of(work, struct per_cpu_dm_data, dm_alert_work);
> > +
> > +	__skb_queue_head_init(&list);
> > +
> > +	spin_lock_irqsave(&data->drop_queue.lock, flags);
> > +	skb_queue_splice_tail_init(&data->drop_queue, &list);
> > +	spin_unlock_irqrestore(&data->drop_queue.lock, flags);
> > +
> These functions are all executed in a per-cpu context.  While theres nothing
> wrong with using a spinlock here, I think you can get away with just doing
> local_irqsave and local_irq_restore.

Hi Neil,

Thanks a lot for reviewing. I might be missing something, but please
note that this function is executed from a workqueue and therefore the
CPU it is running on does not have to be the same CPU to which 'data'
belongs to. If so, I'm not sure how I can avoid taking the spinlock, as
otherwise two different CPUs can modify the list concurrently.

> 
> Neil
> 
> > +	while ((skb = __skb_dequeue(&list)))
> > +		net_dm_packet_report(skb);
> > +}

^ permalink raw reply

* [PATCH] net: jme: Use dev_get_drvdata
From: Chuhong Yuan @ 2019-07-23 14:16 UTC (permalink / raw)
  Cc: Guo-Fu Tseng, David S . Miller, netdev, linux-kernel,
	Chuhong Yuan

Instead of using to_pci_dev + pci_get_drvdata,
use dev_get_drvdata to make code simpler.

Signed-off-by: Chuhong Yuan <hslester96@gmail.com>
---
 drivers/net/ethernet/jme.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c
index 0b668357db4d..db7e10e23310 100644
--- a/drivers/net/ethernet/jme.c
+++ b/drivers/net/ethernet/jme.c
@@ -3193,8 +3193,7 @@ jme_shutdown(struct pci_dev *pdev)
 static int
 jme_suspend(struct device *dev)
 {
-	struct pci_dev *pdev = to_pci_dev(dev);
-	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct net_device *netdev = dev_get_drvdata(dev);
 	struct jme_adapter *jme = netdev_priv(netdev);
 
 	if (!netif_running(netdev))
@@ -3236,8 +3235,7 @@ jme_suspend(struct device *dev)
 static int
 jme_resume(struct device *dev)
 {
-	struct pci_dev *pdev = to_pci_dev(dev);
-	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct net_device *netdev = dev_get_drvdata(dev);
 	struct jme_adapter *jme = netdev_priv(netdev);
 
 	if (!netif_running(netdev))
-- 
2.20.1


^ permalink raw reply related

* Re: [PATCH] net: atheros: Use dev_get_drvdata
From: Joe Perches @ 2019-07-23 14:17 UTC (permalink / raw)
  To: Chuhong Yuan
  Cc: Jay Cliburn, Chris Snook, David S . Miller, netdev, linux-kernel
In-Reply-To: <20190723131856.31932-1-hslester96@gmail.com>

On Tue, 2019-07-23 at 21:18 +0800, Chuhong Yuan wrote:
> Instead of using to_pci_dev + pci_get_drvdata,
> use dev_get_drvdata to make code simpler.

unrelated trivia:

> diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
[]
> @@ -2422,8 +2422,7 @@ static int atl1c_close(struct net_device *netdev)
>  
>  static int atl1c_suspend(struct device *dev)
>  {
> -	struct pci_dev *pdev = to_pci_dev(dev);
> -	struct net_device *netdev = pci_get_drvdata(pdev);
> +	struct net_device *netdev = dev_get_drvdata(dev);
>  	struct atl1c_adapter *adapter = netdev_priv(netdev);
>  	struct atl1c_hw *hw = &adapter->hw;
>  	u32 wufc = adapter->wol;
> @@ -2437,7 +2436,7 @@ static int atl1c_suspend(struct device *dev)
>  
>  	if (wufc)
>  		if (atl1c_phy_to_ps_link(hw) != 0)
> -			dev_dbg(&pdev->dev, "phy power saving failed");
> +			dev_dbg(dev, "phy power saving failed");

These and similar uses could/should use netdev_dbg

			netdev_dbg(netdev, "phy power saving failed\n");

with the terminating newline too

> diff --git a/drivers/net/ethernet/atheros/atlx/atl1.c b/drivers/net/ethernet/atheros/atlx/atl1.c
[]
> @@ -2780,7 +2779,7 @@ static int atl1_suspend(struct device *dev)
>  		val = atl1_get_speed_and_duplex(hw, &speed, &duplex);
>  		if (val) {
>  			if (netif_msg_ifdown(adapter))
> -				dev_printk(KERN_DEBUG, &pdev->dev,
> +				dev_printk(KERN_DEBUG, dev,
>  					"error getting speed/duplex\n");

netdev_printk(KERN_DEBUG, netdev, etc...);



^ permalink raw reply

* [PATCH v2] tun: mark small packets as owned by the tap sock
From: Alexis Bauvin @ 2019-07-23 14:23 UTC (permalink / raw)
  To: stephen, davem, jasowang; +Cc: netdev, abauvin

- v1 -> v2: Move skb_set_owner_w to __tun_build_skb to reduce patch size

Small packets going out of a tap device go through an optimized code
path that uses build_skb() rather than sock_alloc_send_pskb(). The
latter calls skb_set_owner_w(), but the small packet code path does not.

The net effect is that small packets are not owned by the userland
application's socket (e.g. QEMU), while large packets are.
This can be seen with a TCP session, where packets are not owned when
the window size is small enough (around PAGE_SIZE), while they are once
the window grows (note that this requires the host to support virtio
tso for the guest to offload segmentation).
All this leads to inconsistent behaviour in the kernel, especially on
netfilter modules that uses sk->socket (e.g. xt_owner).

Signed-off-by: Alexis Bauvin <abauvin@scaleway.com>
Fixes: 66ccbc9c87c2 ("tap: use build_skb() for small packet")
---
 drivers/net/tun.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 3d443597bd04..db16d7a13e00 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1599,7 +1599,8 @@ static bool tun_can_build_skb(struct tun_struct *tun, struct tun_file *tfile,
 	return true;
 }
 
-static struct sk_buff *__tun_build_skb(struct page_frag *alloc_frag, char *buf,
+static struct sk_buff *__tun_build_skb(struct tun_file *tfile,
+				       struct page_frag *alloc_frag, char *buf,
 				       int buflen, int len, int pad)
 {
 	struct sk_buff *skb = build_skb(buf, buflen);
@@ -1609,6 +1610,7 @@ static struct sk_buff *__tun_build_skb(struct page_frag *alloc_frag, char *buf,
 
 	skb_reserve(skb, pad);
 	skb_put(skb, len);
+	skb_set_owner_w(skb, tfile->socket.sk);
 
 	get_page(alloc_frag->page);
 	alloc_frag->offset += buflen;
@@ -1686,7 +1688,8 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
 	 */
 	if (hdr->gso_type || !xdp_prog) {
 		*skb_xdp = 1;
-		return __tun_build_skb(alloc_frag, buf, buflen, len, pad);
+		return __tun_build_skb(tfile, alloc_frag, buf, buflen, len,
+				       pad);
 	}
 
 	*skb_xdp = 0;
@@ -1723,7 +1726,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
 	rcu_read_unlock();
 	local_bh_enable();
 
-	return __tun_build_skb(alloc_frag, buf, buflen, len, pad);
+	return __tun_build_skb(tfile, alloc_frag, buf, buflen, len, pad);
 
 err_xdp:
 	put_page(alloc_frag->page);
-- 


^ permalink raw reply related

* Re: b53 DSA : vlan tagging broken ?
From: Anand Raj Manickam @ 2019-07-23 14:26 UTC (permalink / raw)
  To: f.fainelli, netdev, andrew
In-Reply-To: <CAEyr1FS-8uBEMBS+7U4K8wBLJgPZD0Lxa4FyzuvYZ0RGhTH8fA@mail.gmail.com>

The issue is resolved by enabling vlan_filtering for the bridge and
fix the phy-mode to "rgmii" from "rgmii-txid" in the dts file.


On Mon, Jul 22, 2019 at 6:57 PM Anand Raj Manickam <anandrm@gmail.com> wrote:
>
> Hi ,
> I had working DSA with 4.9.184 kernel, with BCM53125, rev 4 hardware .
> It had 2 bridges with
> br0            8000.00       no              lan1
>                                                         lan2
>                                                         lan3
>                                                         eth0.101
>
> br1            8000.01     no             eth0.102
>                                                     wan
> # bridge vlan
> port    vlan ids
> wan      102 PVID Egress Untagged
> wan      102 PVID Egress Untagged
> lan3     101 PVID Egress Untagged
> lan3     101 PVID Egress Untagged
> lan2     101 PVID Egress Untagged
> lan2     101 PVID Egress Untagged
> lan1     101 PVID Egress Untagged
> lan1     101 PVID Egress Untagged
> eth0.102  102 PVID
> eth0.102
> br1     1 PVID Egress Untagged
> eth0.101  101 PVID
> eth0.101
> br0     1 PVID Egress Untagged
>
> I upgrade the kernel to 5.2 . The behavior is broken. I had to rip the
> config and check what was broken from the init scripts.
> the bridge vlan commands failed to add , as the newer kernel requires
> the vlan interfaces to be up .
> https://lkml.org/lkml/2018/5/22/887  - i had the same behaviour as this thread .
> I re added them manually  , so the we have the same bridge to vlan
> mapping as the previous kernel .
> but the ingress packets for WAN where going to LAN(bridge) and the
> egress packets where on WAN(bridge)  but the packets never leaves the
> interface .
>
> I test this with a simple config :
>  ip link add link eth0 name eth0.101 type vlan id 101
>  ip link add link eth0 name eth0.102 type vlan id 102
>  ip link set eth0.101 up
>  ip link set eth0.102 up
>  ip link add br0 type bridge
>   ip link add br1 type bridge
>   ip link set lan1 master br1
>   ip link set lan2 master br1
>   ip link set lan3 master br1
>   ip link set wan master br0
>   bridge vlan add vid 101 dev lan1 pvid untagged
>   bridge vlan add vid 101 dev lan2 pvid untagged
>   bridge vlan add vid 101 dev lan3 pvid untagged
>   bridge vlan add vid 102 dev wan pvid untagged
>   bridge vlan del vid 1 dev wan
>   bridge vlan del vid 1 dev lan1
>   bridge vlan del vid 1 dev lan2
>   bridge vlan del vid 1 dev lan3
>   ip link set eth0.101 master br1
>   ip link set eth0.102 master br0
>   bridge vlan del vid 1 dev eth0.102
>  bridge vlan del vid 1 dev eth0.101
>   bridge vlan add vid 102 dev eth0.102 pvid
>   bridge vlan add vid 101 dev eth0.101 pvid
>   ifconfig br0 up
>   ifconfig br1 up
>   ifconfig wan up
>   ifconfig lan1 up
>   ifconfig lan2 up
>   ifconfig lan3 up
>
> I donot see any packets with a tag on eth0
> ~# bridge vlan
> port    vlan ids
> wan      102 PVID Egress Untagged
> lan3     101 PVID Egress Untagged
> lan2     101 PVID Egress Untagged
> lan1     101 PVID Egress Untagged
> eth0.101         101 PVID
> eth0.102         102 PVID
> br0      1 PVID Egress Untagged
> br1      1 PVID Egress Untagged
>
> These are the loaded modules:
> # lsmod
> Module                  Size  Used by
> b53_mdio               16384  0
> b53_mmap               16384  0
> b53_common             28672  2 b53_mdio,b53_mmap
> tag_8021q              16384  0
> dsa_core               32768  9 b53_mdio,b53_common,b53_mmap,tag_8021q
> phylink                20480  2 b53_common,dsa_core
>
> if i re config
> #bridge vlan add vid 102 dev wan pvid untagged
> #bridge vlan add vid 102 dev eth0.102 pvid
> Then i see the tags for ingress packets . but no packets are
> transmitted out on the wire , but the stats in ifconfig show as
> transmitted .
> # ifconfig br0
> br0: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1500
>         inet 10.17.33.137  netmask 255.255.255.0  broadcast 10.17.33.255
>         inet6 fe80::3ef8:4aff:fe9c:5a04  prefixlen 64  scopeid 0x20<link>
>         ether 3c:f8:4a:9c:5a:04  txqueuelen 1000  (Ethernet)
>         RX packets 616  bytes 32351 (31.5 KiB)
>         RX errors 0  dropped 0  overruns 0  frame 0
>         TX packets 679  bytes 30286 (29.5 KiB)
>         TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0
>
> #ifconfig eth0
> eth0: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1500
>         inet6 fe80::d6:5ff:fec2:93af  prefixlen 64  scopeid 0x20<link>
>         ether 02:d6:05:c2:93:af  txqueuelen 1000  (Ethernet)
>         RX packets 58017  bytes 4004093 (3.8 MiB)
>         RX errors 0  dropped 0  overruns 0  frame 0
>         TX packets 4322  bytes 301365 (294.3 KiB)
>         TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0
>         device interrupt 56
>
> Can some shed some light on this config .
> -Anand

^ permalink raw reply

* [PATCH net-next 0/4] nfp: Offload MPLS actions
From: John Hurley @ 2019-07-23 14:33 UTC (permalink / raw)
  To: netdev; +Cc: davem, simon.horman, jakub.kicinski, oss-drivers, John Hurley

The module act_mpls has recently been added to the kernel. This allows the
manipulation of MPLS headers on packets including push, pop and modify.
Add these new actions and parameters to the intermediate representation
API for hardware offload. Follow this by implementing the offload of these
MPLS actions in the NFP driver.

John Hurley (4):
  net: sched: include mpls actions in hardware intermediate
    representation
  nfp: flower: offload MPLS push action
  nfp: flower: offload MPLS pop action
  nfp: flower: offload MPLS set action

 drivers/net/ethernet/netronome/nfp/flower/action.c | 120 +++++++++++++++++++++
 drivers/net/ethernet/netronome/nfp/flower/cmsg.h   |  21 ++++
 include/net/flow_offload.h                         |  19 ++++
 include/net/tc_act/tc_mpls.h                       |  75 +++++++++++++
 net/sched/cls_api.c                                |  25 +++++
 5 files changed, 260 insertions(+)

-- 
2.7.4


^ permalink raw reply

* [PATCH net-next 1/4] net: sched: include mpls actions in hardware intermediate representation
From: John Hurley @ 2019-07-23 14:33 UTC (permalink / raw)
  To: netdev; +Cc: davem, simon.horman, jakub.kicinski, oss-drivers, John Hurley
In-Reply-To: <1563892442-4654-1-git-send-email-john.hurley@netronome.com>

A recent addition to TC actions is the ability to manipulate the MPLS
headers on packets.

In preparation to offload such actions to hardware, update the IR code to
accept and prepare the new actions.

Note that no driver currently impliments the MPLS dec_ttl action so this
is not included.

Signed-off-by: John Hurley <john.hurley@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 include/net/flow_offload.h   | 19 +++++++++++
 include/net/tc_act/tc_mpls.h | 75 ++++++++++++++++++++++++++++++++++++++++++++
 net/sched/cls_api.c          | 25 +++++++++++++++
 3 files changed, 119 insertions(+)

diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
index b16d216..00b9aab 100644
--- a/include/net/flow_offload.h
+++ b/include/net/flow_offload.h
@@ -131,6 +131,9 @@ enum flow_action_id {
 	FLOW_ACTION_SAMPLE,
 	FLOW_ACTION_POLICE,
 	FLOW_ACTION_CT,
+	FLOW_ACTION_MPLS_PUSH,
+	FLOW_ACTION_MPLS_POP,
+	FLOW_ACTION_MPLS_MANGLE,
 };
 
 /* This is mirroring enum pedit_header_type definition for easy mapping between
@@ -184,6 +187,22 @@ struct flow_action_entry {
 			int action;
 			u16 zone;
 		} ct;
+		struct {				/* FLOW_ACTION_MPLS_PUSH */
+			u32		label;
+			__be16		proto;
+			u8		tc;
+			u8		bos;
+			u8		ttl;
+		} mpls_push;
+		struct {				/* FLOW_ACTION_MPLS_POP */
+			__be16		proto;
+		} mpls_pop;
+		struct {				/* FLOW_ACTION_MPLS_MANGLE */
+			u32		label;
+			u8		tc;
+			u8		bos;
+			u8		ttl;
+		} mpls_mangle;
 	};
 };
 
diff --git a/include/net/tc_act/tc_mpls.h b/include/net/tc_act/tc_mpls.h
index 4bc3d92..721de4f 100644
--- a/include/net/tc_act/tc_mpls.h
+++ b/include/net/tc_act/tc_mpls.h
@@ -27,4 +27,79 @@ struct tcf_mpls {
 };
 #define to_mpls(a) ((struct tcf_mpls *)a)
 
+static inline bool is_tcf_mpls(const struct tc_action *a)
+{
+#ifdef CONFIG_NET_CLS_ACT
+	if (a->ops && a->ops->id == TCA_ID_MPLS)
+		return true;
+#endif
+	return false;
+}
+
+static inline u32 tcf_mpls_action(const struct tc_action *a)
+{
+	u32 tcfm_action;
+
+	rcu_read_lock();
+	tcfm_action = rcu_dereference(to_mpls(a)->mpls_p)->tcfm_action;
+	rcu_read_unlock();
+
+	return tcfm_action;
+}
+
+static inline __be16 tcf_mpls_proto(const struct tc_action *a)
+{
+	__be16 tcfm_proto;
+
+	rcu_read_lock();
+	tcfm_proto = rcu_dereference(to_mpls(a)->mpls_p)->tcfm_proto;
+	rcu_read_unlock();
+
+	return tcfm_proto;
+}
+
+static inline u32 tcf_mpls_label(const struct tc_action *a)
+{
+	u32 tcfm_label;
+
+	rcu_read_lock();
+	tcfm_label = rcu_dereference(to_mpls(a)->mpls_p)->tcfm_label;
+	rcu_read_unlock();
+
+	return tcfm_label;
+}
+
+static inline u8 tcf_mpls_tc(const struct tc_action *a)
+{
+	u8 tcfm_tc;
+
+	rcu_read_lock();
+	tcfm_tc = rcu_dereference(to_mpls(a)->mpls_p)->tcfm_tc;
+	rcu_read_unlock();
+
+	return tcfm_tc;
+}
+
+static inline u8 tcf_mpls_bos(const struct tc_action *a)
+{
+	u8 tcfm_bos;
+
+	rcu_read_lock();
+	tcfm_bos = rcu_dereference(to_mpls(a)->mpls_p)->tcfm_bos;
+	rcu_read_unlock();
+
+	return tcfm_bos;
+}
+
+static inline u8 tcf_mpls_ttl(const struct tc_action *a)
+{
+	u8 tcfm_ttl;
+
+	rcu_read_lock();
+	tcfm_ttl = rcu_dereference(to_mpls(a)->mpls_p)->tcfm_ttl;
+	rcu_read_unlock();
+
+	return tcfm_ttl;
+}
+
 #endif /* __NET_TC_MPLS_H */
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index efd3cfb..3565d9a 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -36,6 +36,7 @@
 #include <net/tc_act/tc_sample.h>
 #include <net/tc_act/tc_skbedit.h>
 #include <net/tc_act/tc_ct.h>
+#include <net/tc_act/tc_mpls.h>
 
 extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1];
 
@@ -3269,6 +3270,30 @@ int tc_setup_flow_action(struct flow_action *flow_action,
 			entry->id = FLOW_ACTION_CT;
 			entry->ct.action = tcf_ct_action(act);
 			entry->ct.zone = tcf_ct_zone(act);
+		} else if (is_tcf_mpls(act)) {
+			switch (tcf_mpls_action(act)) {
+			case TCA_MPLS_ACT_PUSH:
+				entry->id = FLOW_ACTION_MPLS_PUSH;
+				entry->mpls_push.proto = tcf_mpls_proto(act);
+				entry->mpls_push.label = tcf_mpls_label(act);
+				entry->mpls_push.tc = tcf_mpls_tc(act);
+				entry->mpls_push.bos = tcf_mpls_bos(act);
+				entry->mpls_push.ttl = tcf_mpls_ttl(act);
+				break;
+			case TCA_MPLS_ACT_POP:
+				entry->id = FLOW_ACTION_MPLS_POP;
+				entry->mpls_pop.proto = tcf_mpls_proto(act);
+				break;
+			case TCA_MPLS_ACT_MODIFY:
+				entry->id = FLOW_ACTION_MPLS_MANGLE;
+				entry->mpls_mangle.label = tcf_mpls_label(act);
+				entry->mpls_mangle.tc = tcf_mpls_tc(act);
+				entry->mpls_mangle.bos = tcf_mpls_bos(act);
+				entry->mpls_mangle.ttl = tcf_mpls_ttl(act);
+				break;
+			default:
+				goto err_out;
+			}
 		} else {
 			goto err_out;
 		}
-- 
2.7.4


^ permalink raw reply related

* [PATCH net-next 2/4] nfp: flower: offload MPLS push action
From: John Hurley @ 2019-07-23 14:34 UTC (permalink / raw)
  To: netdev; +Cc: davem, simon.horman, jakub.kicinski, oss-drivers, John Hurley
In-Reply-To: <1563892442-4654-1-git-send-email-john.hurley@netronome.com>

Recent additions to the kernel include a TC action module to manipulate
MPLS headers on packets. Such actions are available to offload via the
flow_offload intermediate representation API.

Modify the NFP driver to allow the offload of MPLS push actions to
firmware.

Signed-off-by: John Hurley <john.hurley@netronome.com>
Reviewed-by: Simon Horman <simon.horman@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 drivers/net/ethernet/netronome/nfp/flower/action.c | 50 ++++++++++++++++++++++
 drivers/net/ethernet/netronome/nfp/flower/cmsg.h   |  7 +++
 2 files changed, 57 insertions(+)

diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c
index 5a54fe8..9e18bec 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/action.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/action.c
@@ -2,10 +2,12 @@
 /* Copyright (C) 2017-2018 Netronome Systems, Inc. */
 
 #include <linux/bitfield.h>
+#include <linux/mpls.h>
 #include <net/pkt_cls.h>
 #include <net/tc_act/tc_csum.h>
 #include <net/tc_act/tc_gact.h>
 #include <net/tc_act/tc_mirred.h>
+#include <net/tc_act/tc_mpls.h>
 #include <net/tc_act/tc_pedit.h>
 #include <net/tc_act/tc_vlan.h>
 #include <net/tc_act/tc_tunnel_key.h>
@@ -25,6 +27,38 @@
 						 NFP_FL_TUNNEL_KEY | \
 						 NFP_FL_TUNNEL_GENEVE_OPT)
 
+static int
+nfp_fl_push_mpls(struct nfp_fl_push_mpls *push_mpls,
+		 const struct flow_action_entry *act,
+		 struct netlink_ext_ack *extack)
+{
+	size_t act_size = sizeof(struct nfp_fl_push_mpls);
+	u32 mpls_lse = 0;
+
+	push_mpls->head.jump_id = NFP_FL_ACTION_OPCODE_PUSH_MPLS;
+	push_mpls->head.len_lw = act_size >> NFP_FL_LW_SIZ;
+
+	/* BOS is optional in the TC action but required for offload. */
+	if (act->mpls_push.bos != ACT_MPLS_BOS_NOT_SET) {
+		mpls_lse |= act->mpls_push.bos << MPLS_LS_S_SHIFT;
+	} else {
+		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: BOS field must explicitly be set for MPLS push");
+		return -EOPNOTSUPP;
+	}
+
+	/* Leave MPLS TC as a default value of 0 if not explicitly set. */
+	if (act->mpls_push.tc != ACT_MPLS_TC_NOT_SET)
+		mpls_lse |= act->mpls_push.tc << MPLS_LS_TC_SHIFT;
+
+	/* Proto, label and TTL are enforced and verified for MPLS push. */
+	mpls_lse |= act->mpls_push.label << MPLS_LS_LABEL_SHIFT;
+	mpls_lse |= act->mpls_push.ttl << MPLS_LS_TTL_SHIFT;
+	push_mpls->ethtype = act->mpls_push.proto;
+	push_mpls->lse = cpu_to_be32(mpls_lse);
+
+	return 0;
+}
+
 static void nfp_fl_pop_vlan(struct nfp_fl_pop_vlan *pop_vlan)
 {
 	size_t act_size = sizeof(struct nfp_fl_pop_vlan);
@@ -869,6 +903,7 @@ nfp_flower_loop_action(struct nfp_app *app, const struct flow_action_entry *act,
 	struct nfp_fl_set_ipv4_tun *set_tun;
 	struct nfp_fl_pre_tunnel *pre_tun;
 	struct nfp_fl_push_vlan *psh_v;
+	struct nfp_fl_push_mpls *psh_m;
 	struct nfp_fl_pop_vlan *pop_v;
 	int err;
 
@@ -975,6 +1010,21 @@ nfp_flower_loop_action(struct nfp_app *app, const struct flow_action_entry *act,
 		 */
 		*csum_updated &= ~act->csum_flags;
 		break;
+	case FLOW_ACTION_MPLS_PUSH:
+		if (*a_len +
+		    sizeof(struct nfp_fl_push_mpls) > NFP_FL_MAX_A_SIZ) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: maximum allowed action list size exceeded at push MPLS");
+			return -EOPNOTSUPP;
+		}
+
+		psh_m = (struct nfp_fl_push_mpls *)&nfp_fl->action_data[*a_len];
+		nfp_fl->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_NULL);
+
+		err = nfp_fl_push_mpls(psh_m, act, extack);
+		if (err)
+			return err;
+		*a_len += sizeof(struct nfp_fl_push_mpls);
+		break;
 	default:
 		/* Currently we do not handle any other actions. */
 		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: unsupported action in action list");
diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
index 0f1706a..91af0fa 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
@@ -68,6 +68,7 @@
 #define NFP_FL_ACTION_OPCODE_OUTPUT		0
 #define NFP_FL_ACTION_OPCODE_PUSH_VLAN		1
 #define NFP_FL_ACTION_OPCODE_POP_VLAN		2
+#define NFP_FL_ACTION_OPCODE_PUSH_MPLS		3
 #define NFP_FL_ACTION_OPCODE_SET_IPV4_TUNNEL	6
 #define NFP_FL_ACTION_OPCODE_SET_ETHERNET	7
 #define NFP_FL_ACTION_OPCODE_SET_IPV4_ADDRS	9
@@ -232,6 +233,12 @@ struct nfp_fl_push_geneve {
 	u8 opt_data[];
 };
 
+struct nfp_fl_push_mpls {
+	struct nfp_fl_act_head head;
+	__be16 ethtype;
+	__be32 lse;
+};
+
 /* Metadata with L2 (1W/4B)
  * ----------------------------------------------------------------
  *    3                   2                   1
-- 
2.7.4


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox