Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCHv2 2/5] sh_eth: enable wake-on-lan for Gen2 devices
From: Niklas Söderlund @ 2016-12-12 16:09 UTC (permalink / raw)
  To: Sergei Shtylyov, Simon Horman, netdev, linux-renesas-soc
  Cc: Geert Uytterhoeven, Niklas Söderlund
In-Reply-To: <20161212160931.6478-1-niklas.soderlund+renesas@ragnatech.se>

Tested on Gen2 r8a7791/Koelsch.

Signed-off-by: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
---
 drivers/net/ethernet/renesas/sh_eth.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index 87640b9..348ed22 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -624,8 +624,9 @@ static struct sh_eth_cpu_data r8a779x_data = {
 
 	.register_type	= SH_ETH_REG_FAST_RCAR,
 
-	.ecsr_value	= ECSR_PSRTO | ECSR_LCHNG | ECSR_ICD,
-	.ecsipr_value	= ECSIPR_PSRTOIP | ECSIPR_LCHNGIP | ECSIPR_ICDIP,
+	.ecsr_value	= ECSR_PSRTO | ECSR_LCHNG | ECSR_ICD | ECSR_MPD,
+	.ecsipr_value	= ECSIPR_PSRTOIP | ECSIPR_LCHNGIP | ECSIPR_ICDIP |
+			  ECSIPR_MPDIP,
 	.eesipr_value	= 0x01ff009f,
 
 	.tx_check	= EESR_FTC | EESR_CND | EESR_DLC | EESR_CD | EESR_RTO,
@@ -641,6 +642,7 @@ static struct sh_eth_cpu_data r8a779x_data = {
 	.tpauser	= 1,
 	.hw_swap	= 1,
 	.rmiimode	= 1,
+	.magic		= 1,
 };
 #endif /* CONFIG_OF */
 
-- 
2.10.2

^ permalink raw reply related

* [PATCHv2 1/5] sh_eth: add generic wake-on-lan support via magic packet
From: Niklas Söderlund @ 2016-12-12 16:09 UTC (permalink / raw)
  To: Sergei Shtylyov, Simon Horman, netdev, linux-renesas-soc
  Cc: Geert Uytterhoeven, Niklas Söderlund
In-Reply-To: <20161212160931.6478-1-niklas.soderlund+renesas@ragnatech.se>

Add generic functionality to support Wake-on-Lan using MagicPacket which
are supported by at least a few versions of sh_eth. Only add
functionality for WoL, no specific sh_eth version are marked to support
WoL yet.

WoL is enabled in the suspend callback by setting MagicPacket detection
and disabling all interrupts expect MagicPacket. In the resume path the
driver needs to reset the hardware to rearm the WoL logic, this prevents
the driver from simply restoring the registers and to take advantage of
that sh_eth was not suspended to reduce resume time. To reset the
hardware the driver close and reopens the device just like it would do
in a normal suspend/resume scenario without WoL enabled, but it both
close and open the device in the resume callback since the device needs
to be open for WoL to work.

One quirk needed for WoL is that the module clock needs to be prevented
from being switched off by Runtime PM. To keep the clock alive the
suspend callback need to call clk_enable() directly to increase the
usage count of the clock. Then when Runtime PM decreases the clock usage
count it won't reach 0 and be switched off.

Signed-off-by: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
---
 drivers/net/ethernet/renesas/sh_eth.c | 112 +++++++++++++++++++++++++++++++---
 drivers/net/ethernet/renesas/sh_eth.h |   3 +
 2 files changed, 107 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index 05b0dc5..87640b9 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -2199,6 +2199,33 @@ static int sh_eth_set_ringparam(struct net_device *ndev,
 	return 0;
 }
 
+static void sh_eth_get_wol(struct net_device *ndev, struct ethtool_wolinfo *wol)
+{
+	struct sh_eth_private *mdp = netdev_priv(ndev);
+
+	wol->supported = 0;
+	wol->wolopts = 0;
+
+	if (mdp->cd->magic && mdp->clk) {
+		wol->supported = WAKE_MAGIC;
+		wol->wolopts = mdp->wol_enabled ? WAKE_MAGIC : 0;
+	}
+}
+
+static int sh_eth_set_wol(struct net_device *ndev, struct ethtool_wolinfo *wol)
+{
+	struct sh_eth_private *mdp = netdev_priv(ndev);
+
+	if (!mdp->cd->magic || !mdp->clk || wol->wolopts & ~WAKE_MAGIC)
+		return -EOPNOTSUPP;
+
+	mdp->wol_enabled = !!(wol->wolopts & WAKE_MAGIC);
+
+	device_set_wakeup_enable(&mdp->pdev->dev, mdp->wol_enabled);
+
+	return 0;
+}
+
 static const struct ethtool_ops sh_eth_ethtool_ops = {
 	.get_regs_len	= sh_eth_get_regs_len,
 	.get_regs	= sh_eth_get_regs,
@@ -2213,6 +2240,8 @@ static const struct ethtool_ops sh_eth_ethtool_ops = {
 	.set_ringparam	= sh_eth_set_ringparam,
 	.get_link_ksettings = sh_eth_get_link_ksettings,
 	.set_link_ksettings = sh_eth_set_link_ksettings,
+	.get_wol	= sh_eth_get_wol,
+	.set_wol	= sh_eth_set_wol,
 };
 
 /* network device open function */
@@ -3017,6 +3046,11 @@ static int sh_eth_drv_probe(struct platform_device *pdev)
 		goto out_release;
 	}
 
+	/* Get clock, if not found that's OK but Wake-On-Lan is unavailable */
+	mdp->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(mdp->clk))
+		mdp->clk = NULL;
+
 	ndev->base_addr = res->start;
 
 	spin_lock_init(&mdp->lock);
@@ -3111,6 +3145,9 @@ static int sh_eth_drv_probe(struct platform_device *pdev)
 	if (ret)
 		goto out_napi_del;
 
+	if (mdp->cd->magic && mdp->clk)
+		device_set_wakeup_capable(&pdev->dev, 1);
+
 	/* print device information */
 	netdev_info(ndev, "Base address at 0x%x, %pM, IRQ %d.\n",
 		    (u32)ndev->base_addr, ndev->dev_addr, ndev->irq);
@@ -3150,15 +3187,67 @@ static int sh_eth_drv_remove(struct platform_device *pdev)
 
 #ifdef CONFIG_PM
 #ifdef CONFIG_PM_SLEEP
+static int sh_eth_wol_setup(struct net_device *ndev)
+{
+	struct sh_eth_private *mdp = netdev_priv(ndev);
+
+	/* Only allow ECI interrupts */
+	synchronize_irq(ndev->irq);
+	napi_disable(&mdp->napi);
+	sh_eth_write(ndev, DMAC_M_ECI, EESIPR);
+
+	/* Enable MagicPacket */
+	sh_eth_modify(ndev, ECMR, 0, ECMR_PMDE);
+
+	/* Increased clock usage so device won't be suspended */
+	clk_enable(mdp->clk);
+
+	return enable_irq_wake(ndev->irq);
+}
+
+static int sh_eth_wol_restore(struct net_device *ndev)
+{
+	struct sh_eth_private *mdp = netdev_priv(ndev);
+	int ret;
+
+	napi_enable(&mdp->napi);
+
+	/* Disable MagicPacket */
+	sh_eth_modify(ndev, ECMR, ECMR_PMDE, 0);
+
+	/* The device need to be reset to restore MagicPacket logic
+	 * for next wakeup. If we close and open the device it will
+	 * both be reset and all registers restored. This is what
+	 * happens during suspend and resume without WoL enabled.
+	 */
+	ret = sh_eth_close(ndev);
+	if (ret < 0)
+		return ret;
+	ret = sh_eth_open(ndev);
+	if (ret < 0)
+		return ret;
+
+	/* Restore clock usage count */
+	clk_disable(mdp->clk);
+
+	return disable_irq_wake(ndev->irq);
+}
+
 static int sh_eth_suspend(struct device *dev)
 {
 	struct net_device *ndev = dev_get_drvdata(dev);
+	struct sh_eth_private *mdp = netdev_priv(ndev);
 	int ret = 0;
 
-	if (netif_running(ndev)) {
-		netif_device_detach(ndev);
+	if (!netif_running(ndev))
+		return 0;
+
+	netif_device_detach(ndev);
+
+	if (mdp->wol_enabled)
+		ret = sh_eth_wol_setup(ndev);
+	else
 		ret = sh_eth_close(ndev);
-	}
 
 	return ret;
 }
@@ -3166,14 +3255,21 @@ static int sh_eth_suspend(struct device *dev)
 static int sh_eth_resume(struct device *dev)
 {
 	struct net_device *ndev = dev_get_drvdata(dev);
+	struct sh_eth_private *mdp = netdev_priv(ndev);
 	int ret = 0;
 
-	if (netif_running(ndev)) {
+	if (!netif_running(ndev))
+		return 0;
+
+	if (mdp->wol_enabled)
+		ret = sh_eth_wol_restore(ndev);
+	else
 		ret = sh_eth_open(ndev);
-		if (ret < 0)
-			return ret;
-		netif_device_attach(ndev);
-	}
+
+	if (ret < 0)
+		return ret;
+
+	netif_device_attach(ndev);
 
 	return ret;
 }
diff --git a/drivers/net/ethernet/renesas/sh_eth.h b/drivers/net/ethernet/renesas/sh_eth.h
index d050f37..4ceed00 100644
--- a/drivers/net/ethernet/renesas/sh_eth.h
+++ b/drivers/net/ethernet/renesas/sh_eth.h
@@ -493,6 +493,7 @@ struct sh_eth_cpu_data {
 	unsigned shift_rd0:1;	/* shift Rx descriptor word 0 right by 16 */
 	unsigned rmiimode:1;	/* EtherC has RMIIMODE register */
 	unsigned rtrate:1;	/* EtherC has RTRATE register */
+	unsigned magic:1;	/* EtherC has ECMR.PMDE and ECSR.MPD */
 };
 
 struct sh_eth_private {
@@ -501,6 +502,7 @@ struct sh_eth_private {
 	const u16 *reg_offset;
 	void __iomem *addr;
 	void __iomem *tsu_addr;
+	struct clk *clk;
 	u32 num_rx_ring;
 	u32 num_tx_ring;
 	dma_addr_t rx_desc_dma;
@@ -529,6 +531,7 @@ struct sh_eth_private {
 	unsigned no_ether_link:1;
 	unsigned ether_link_active_low:1;
 	unsigned is_opened:1;
+	unsigned wol_enabled:1;
 };
 
 static inline void sh_eth_soft_swap(char *src, int len)
-- 
2.10.2

^ permalink raw reply related

* Re: [PATCH net-next 1/3] net:dsa:mv88e6xxx: use hashtable to store multicast entries
From: Andrew Lunn @ 2016-12-12 16:07 UTC (permalink / raw)
  To: Volodymyr Bendiuga
  Cc: Vivien Didelot, Volodymyr Bendiuga, f.fainelli, netdev,
	Volodymyr Bendiuga
In-Reply-To: <CAMr9Lbp5eCg1oyWGN+uiDEcF0VZuKUi87FH6JYTGj6pL82R+Mw@mail.gmail.com>

On Mon, Dec 12, 2016 at 04:22:13PM +0100, Volodymyr Bendiuga wrote:
> Hi,
> 
> I apologise for incorrectly formatted patch, I will fix and resend it.

Please don't resend with just the white space fixed. The memory model
is very important, using the fdb_prepare() call to allocate memory,
etc.

   Andrew

^ permalink raw reply

* Re: [PATCH] sh_eth: add wake-on-lan support via magic packet
From: Niklas Söderlund @ 2016-12-12 15:49 UTC (permalink / raw)
  To: Sergei Shtylyov; +Cc: Simon Horman, netdev, linux-renesas-soc
In-Reply-To: <b0bc6386-45ba-2205-6cd4-dce9a1529124@cogentembedded.com>

Hi Sergei,

Thanks for your feedback.

On 2016-12-11 00:25:41 +0300, Sergei Shtylyov wrote:
> Hello!
> 
> On 12/08/2016 05:56 PM, Niklas Söderlund wrote:
> 
> > >   You only enable the WOL support fo the R-Car gen2 chips but never say that
> > > explicitly, neither in the subject nor here.
> > > 
> > > > Signed-off-by: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
> > > > ---
> > > >  drivers/net/ethernet/renesas/sh_eth.c | 120 +++++++++++++++++++++++++++++++---
> > > >  drivers/net/ethernet/renesas/sh_eth.h |   4 ++
> > > >  2 files changed, 116 insertions(+), 8 deletions(-)
> > > 
> > > > diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
> > > > index 05b0dc5..3974046 100644
> > > > --- a/drivers/net/ethernet/renesas/sh_eth.c
> > > > +++ b/drivers/net/ethernet/renesas/sh_eth.c
> [...]
> > > > @@ -1657,6 +1658,10 @@ static irqreturn_t sh_eth_interrupt(int irq, void *netdev)
> > > >  		goto out;
> > > > 
> > > >  	if (!likely(mdp->irq_enabled)) {
> > > 
> > >    Oops, I guess unlikely(!mdp->irq_enabled) was meant here...
> > 
> > I can correct this in a separate patch if you wish.
> 
>    I'll look into this myself, I think.

OK.

> 
> > > +		/* Handle MagicPacket interrupt */
> > > +		if (sh_eth_read(ndev, ECSR) & ECSR_MPD)
> 
>    What if it wasn't enabled ATM?

Sorry I don't understand this comment.

> 
> [...]
> > > > @@ -3111,6 +3150,10 @@ static int sh_eth_drv_probe(struct platform_device *pdev)
> > > >  	if (ret)
> > > >  		goto out_napi_del;
> > > > 
> > > > +	mdp->wol_enabled = false;
> > > 
> > >    No need, the '*mdp' was kzalloc'ed.
> > 
> > OK, i prefer to explicitly set for easier reading of the code. But if
> > you wish I will remove this in v2.
> 
>    Yes, remove it please.

Will remove for v2.

> 
> > > > @@ -3150,15 +3193,71 @@ static int sh_eth_drv_remove(struct platform_device *pdev)
> > > > 
> > > >  #ifdef CONFIG_PM
> > > >  #ifdef CONFIG_PM_SLEEP
> > > > +static int sh_eth_wol_setup(struct net_device *ndev)
> > > > +{
> > > > +	struct sh_eth_private *mdp = netdev_priv(ndev);
> > > > +
> > > > +	/* Only allow ECI interrupts */
> > > > +	mdp->irq_enabled = false;
> > > 
> > >    Why 'false' if you enable IRQs below?
> > 
> > I mask all interrupts except MagicPacket (ECSIPR_MPDIP) interrupts form
> > the ECI (DMAC_M_ECI) and by setting irq_enabled to false the interrupt
> > handler will only ack any residue interrupt.
> 
>    I don't see where it ack's anything, it just clears EESIPR and returns in
> this case.

You are right, I must have misread when looking at this.

> 
> > This is how it's done in
> > other parts of the driver when disabling interrupts.
> 
>    Not in all parts of the driver that disable EESIPR interrupts... I must
> confess that I never liked that 'mdp->irq_enabled' flag and still suspect we
> can get things done without it... I need to look at this code again, sigh...
> 
> > This is also why I only check for MagicPacket interrupts if irq_enabled
> > is false.
> 
>   I would have preferred that this was done with the other EMAC interrupts,
> in sh_eth_error().

I removed the check for Magic Packet in sh_eth_interrupt() and running 
without setting mdp->irq_enabled = false. sh_eth_error() will then clear 
any ECI interrupt so no need to add Magic Packet detection to it since 
all that is needed on Magic Packet is to clear the interrupt which 
already is done. This works and I can do multiple suspend/resume cycles, 
will be in v2 thanks for the suggestion.

> 
> > > > +	synchronize_irq(ndev->irq);
> > > > +	napi_disable(&mdp->napi);
> > > > +	sh_eth_write(ndev, DMAC_M_ECI, EESIPR);
> > > > +
> > > > +	/* Enable ECI MagicPacket interrupt */
> > > > +	sh_eth_write(ndev, ECSIPR_MPDIP, ECSIPR);
> 
>    I'd prefer if it was always enabled via 'ecsipr_value'.

Will do so in v2.

> 
> > > > +
> > > > +	/* Enable MagicPacket */
> > > > +	sh_eth_modify(ndev, ECMR, 0, ECMR_PMDE);
> > > > +
> > > > +	/* Increased clock usage so device won't be suspended */
> > > > +	clk_enable(mdp->clk);
> > > 
> > >    Hum, intermixiggn runtime PM with clock API doesn't look good...
> > 
> > I agree it looks weird but I need a way to increment the usage count for
> > the clock otherwise the PM code will disable the module clock and WoL
> > will not work.
> 
>    How will it do it if you don't call sh_eth_close() in this case?
> 
> > Note that this call will not enable the clock just
> > increase the usage count so it won't be disabled when the PM code
> > decrease it after the sh_eth suspend function is run.
> 
>    You mean that the PM code calls RPM or clk API on its own? That's strange...

Yes it calls clk API.

> 
> > If you know of a different way of ensuring that the clock is not turned
> > off I be happy to look at it. I did some investigation into this and
> > calling clk_enable() directly is for example what happens in the
> > enable_irq_wake() call path to ensure the clock for the irq_chip is not
> > turned off if it is a wakeup source, se for example
> > gpio_rcar_irq_set_wake() in drivers/gpio/gpio-rcar.c.
> 
>    Thanks, will look into it...
> 
> [...]
> 
> MBR, Sergei
> 

-- 
Regards,
Niklas Söderlund

^ permalink raw reply

* Re: [patch net-next] irda: w83977af_ir: cleanup an indent issue
From: Joe Perches @ 2016-12-12 15:22 UTC (permalink / raw)
  To: Dan Carpenter, Samuel Ortiz; +Cc: netdev, kernel-janitors
In-Reply-To: <20161212112134.GA10035@elgon.mountain>

On Mon, 2016-12-12 at 14:21 +0300, Dan Carpenter wrote:
> In commit 99d8d2159d7c ("irda: w83977af_ir: Neaten logging"), we
> accidentally added an extra tab to these lines.

Thanks Dan.

Oops,  It's kinda funny I did the whole
series to fix that misindentation originally
and added it back when rebasing.

^ permalink raw reply

* Re: Designing a safe RX-zero-copy Memory Model for Networking
From: Jesper Dangaard Brouer @ 2016-12-12 15:10 UTC (permalink / raw)
  To: Mike Rapoport
  Cc: netdev@vger.kernel.org, linux-mm, John Fastabend,
	Willem de Bruijn, Björn Töpel, Karlsson, Magnus,
	Alexander Duyck, Mel Gorman, Tom Herbert, Brenden Blanco,
	Tariq Toukan, Saeed Mahameed, Jesse Brandeburg, Kalman Meth,
	brouer
In-Reply-To: <20161212141433.GB19987@rapoport-lnx>

On Mon, 12 Dec 2016 16:14:33 +0200
Mike Rapoport <rppt@linux.vnet.ibm.com> wrote:

> On Mon, Dec 12, 2016 at 10:40:42AM +0100, Jesper Dangaard Brouer wrote:
> > 
> > On Mon, 12 Dec 2016 10:38:13 +0200 Mike Rapoport <rppt@linux.vnet.ibm.com> wrote:
> >   
> > > Hello Jesper,
> > > 
> > > On Mon, Dec 05, 2016 at 03:31:32PM +0100, Jesper Dangaard Brouer wrote:  
> > > > Hi all,
> > > > 
> > > > This is my design for how to safely handle RX zero-copy in the network
> > > > stack, by using page_pool[1] and modifying NIC drivers.  Safely means
> > > > not leaking kernel info in pages mapped to userspace and resilience
> > > > so a malicious userspace app cannot crash the kernel.
> > > > 
> > > > Design target
> > > > =============
> > > > 
> > > > Allow the NIC to function as a normal Linux NIC and be shared in a
> > > > safe manor, between the kernel network stack and an accelerated
> > > > userspace application using RX zero-copy delivery.
> > > > 
> > > > Target is to provide the basis for building RX zero-copy solutions in
> > > > a memory safe manor.  An efficient communication channel for userspace
> > > > delivery is out of scope for this document, but OOM considerations are
> > > > discussed below (`Userspace delivery and OOM`_).    
> > > 
> > > Sorry, if this reply is a bit off-topic.  
> > 
> > It is very much on topic IMHO :-)
> >   
> > > I'm working on implementation of RX zero-copy for virtio and I've dedicated
> > > some thought about making guest memory available for physical NIC DMAs.
> > > I believe this is quite related to your page_pool proposal, at least from
> > > the NIC driver perspective, so I'd like to share some thoughts here.  
> > 
> > Seems quite related. I'm very interested in cooperating with you! I'm
> > not very familiar with virtio, and how packets/pages gets channeled
> > into virtio.  
> 
> They are copied :-)
> Presuming we are dealing only with vhost backend, the received skb
> eventually gets converted to IOVs, which in turn are copied to the guest
> memory. The IOVs point to the guest memory that is allocated by virtio-net
> running in the guest.

Thanks for explaining that. It seems like a lot of overhead. I have to
wrap my head around this... so, the hardware NIC is receiving the
packet/page, in the RX ring, and after converting it to IOVs, it is
conceptually transmitted into the guest, and then the guest-side have a
RX-function to handle this packet. Correctly understood?

 
> > > The idea is to dedicate one (or more) of the NIC's queues to a VM, e.g.
> > > using macvtap, and then propagate guest RX memory allocations to the NIC
> > > using something like new .ndo_set_rx_buffers method.  
> > 
> > I believe the page_pool API/design aligns with this idea/use-case.
> >   
> > > What is your view about interface between the page_pool and the NIC
> > > drivers?  
> > 
> > In my Prove-of-Concept implementation, the NIC driver (mlx5) register
> > a page_pool per RX queue.  This is done for two reasons (1) performance
> > and (2) for supporting use-cases where only one single RX-ring queue is
> > (re)configured to support RX-zero-copy.  There are some associated
> > extra cost of enabling this mode, thus it makes sense to only enable it
> > when needed.
> > 
> > I've not decided how this gets enabled, maybe some new driver NDO.  It
> > could also happen when a XDP program gets loaded, which request this
> > feature.
> > 
> > The macvtap solution is nice and we should support it, but it requires
> > VM to have their MAC-addr registered on the physical switch.  This
> > design is about adding flexibility. Registering an XDP eBPF filter
> > provides the maximum flexibility for matching the destination VM.  
> 
> I'm not very familiar with XDP eBPF, and it's difficult for me to estimate
> what needs to be done in BPF program to do proper conversion of skb to the
> virtio descriptors.

XDP is a step _before_ the SKB is allocated.  The XDP eBPF program can
modify the packet-page data, but I don't think it is needed for your
use-case.  View XDP (primarily) as an early (demux) filter.

XDP is missing a feature your need, which is TX packet into another
net_device (I actually imagine a port mapping table, that point to a
net_device).  This require a new "TX-raw" NDO that takes a page (+
offset and length). 

I imagine, the virtio driver (virtio_net or a new driver?) getting
extended with this new "TX-raw" NDO, that takes "raw" packet-pages.
 Whether zero-copy is possible is determined by checking if page
originates from a page_pool that have enabled zero-copy (and likely
matching against a "protection domain" id number).


> We were not considered using XDP yet, so we've decided to limit the initial
> implementation to macvtap because we can ensure correspondence between a
> NIC queue and virtual NIC, which is not the case with more generic tap
> device. It could be that use of XDP will allow for a generic solution for
> virtio case as well.

You don't need an XDP filter, if you can make the HW do the early demux
binding into a queue.  The check for if memory is zero-copy enabled
would be the same.

> >   
> > > Have you considered using "push" model for setting the NIC's RX memory?  
> > 
> > I don't understand what you mean by a "push" model?  
> 
> Currently, memory allocation in NIC drivers boils down to alloc_page with
> some wrapping code. I see two possible ways to make NIC use of some
> preallocated pages: either NIC driver will call an API (probably different
> from alloc_page) to obtain that memory, or there will be NDO API that
> allows to set the NIC's RX buffers. I named the later case "push".

As you might have guessed, I'm not into the "push" model, because this
means I cannot share the queue with the normal network stack.  Which I
believe is possible as outlined (in email and [2]) and can be done with
out HW filter features (like macvlan).

-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer

[1] https://prototype-kernel.readthedocs.io/en/latest/networking/XDP/index.html
[2] https://prototype-kernel.readthedocs.io/en/latest/vm/page_pool/design/memory_model_nic.html

^ permalink raw reply

* Re: fib_frontend: Add network specific broadcasts, when it takes a sense
From: David Miller @ 2016-12-12 15:03 UTC (permalink / raw)
  To: jbenc; +Cc: brandon.philips, netdev, tom, aaron.levy, bison
In-Reply-To: <20161212153035.4a751ccc@griffin>

From: Jiri Benc <jbenc@redhat.com>
Date: Mon, 12 Dec 2016 15:30:35 +0100

> On Fri, 9 Dec 2016 15:41:52 -0800, Brandon Philips wrote:
>> The issue we have: when creating the VXLAN interface and assigning it
>> an address we see a broadcast route being added by the Kernel. For
>> example if we have 10.4.0.0/16 a broadcast route to 10.4.0.0 is
>> created. This route is unwanted because we assign 10.4.0.0 to one of
>> our VXLAN interfaces.
> 
> Are you saying you're trying to assign the IP address 10.4.0.0/16 as a
> unicast address to an interface? Then you'll run into way more problems
> than the one you're describing. You can't have host part of the IP
> address consisting of all zeros (or all ones). Just don't do it. Choose
> a valid IP address instead.

Agreed.

^ permalink raw reply

* Re: [PATCH net-next 1/3] net:dsa:mv88e6xxx: use hashtable to store multicast entries
From: Vivien Didelot @ 2016-12-12 15:03 UTC (permalink / raw)
  To: Volodymyr Bendiuga, andrew, f.fainelli, netdev,
	volodymyr.bendiuga
  Cc: Volodymyr Bendiuga
In-Reply-To: <1481549958-1265-1-git-send-email-volodymyr.bendiuga@gmail.com>

Hi Volodymyr,

Volodymyr Bendiuga <volodymyr.bendiuga@gmail.com> writes:

> Hashtable will make it extremely faster when inserting fdb entries
> into the forwarding database.

This is hard to follow. As Andrew correctly mentioned, when you have two
or more patches, please format them with --cover-letter, describe them
in patch 0 and send them all together, so that we get a single thread.

You are correct about the ATU get operations being slow. However, we
intentionally keep the driver stateless at the moment to keep it simple.

Unless speed is an issue to fix here, I am quite reluctant to add
caching to this driver yet. What is the issue you are having with the
ATU being slow?

Thanks,

	Vivien

^ permalink raw reply

* Re: [PATCH v2] net: macb: Added PCI wrapper for Platform Driver.
From: kbuild test robot @ 2016-12-12 15:00 UTC (permalink / raw)
  To: Bartosz Folta
  Cc: kbuild-all, Nicolas Ferre, David S. Miller, Niklas Cassel,
	Alexandre Torgue, Satanand Burla, Raghu Vatsavayi, Simon Horman,
	linux-kernel@vger.kernel.org, netdev@vger.kernel.org,
	Bartosz Folta, Rafal Ozieblo
In-Reply-To: <SN1PR0701MB1951E07687DBD871E4F1BF31CC980@SN1PR0701MB1951.namprd07.prod.outlook.com>

[-- Attachment #1: Type: text/plain, Size: 5003 bytes --]

Hi Bartosz,

[auto build test ERROR on net-next/master]
[also build test ERROR on v4.9 next-20161209]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Bartosz-Folta/net-macb-Added-PCI-wrapper-for-Platform-Driver/20161212-220228
config: blackfin-allyesconfig (attached as .config)
compiler: bfin-uclinux-gcc (GCC) 6.2.0
reproduce:
        wget https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        make.cross ARCH=blackfin 

All error/warnings (new ones prefixed by >>):

   drivers/net/ethernet/cadence/macb_pci.c: In function 'macb_probe':
   drivers/net/ethernet/cadence/macb_pci.c:78:19: error: implicit declaration of function 'clk_register_fixed_rate' [-Werror=implicit-function-declaration]
     plat_data.pclk = clk_register_fixed_rate(&pdev->dev, "pclk", NULL, 0,
                      ^~~~~~~~~~~~~~~~~~~~~~~
   drivers/net/ethernet/cadence/macb_pci.c:78:17: warning: assignment makes pointer from integer without a cast [-Wint-conversion]
     plat_data.pclk = clk_register_fixed_rate(&pdev->dev, "pclk", NULL, 0,
                    ^
   drivers/net/ethernet/cadence/macb_pci.c:85:17: warning: assignment makes pointer from integer without a cast [-Wint-conversion]
     plat_data.hclk = clk_register_fixed_rate(&pdev->dev, "hclk", NULL, 0,
                    ^
   drivers/net/ethernet/cadence/macb_pci.c:116:2: error: implicit declaration of function 'clk_unregister' [-Werror=implicit-function-declaration]
     clk_unregister(plat_data.hclk);
     ^~~~~~~~~~~~~~
   drivers/net/ethernet/cadence/macb_pci.c: At top level:
>> drivers/net/ethernet/cadence/macb_pci.c:149:1: warning: data definition has no type or storage class
    module_pci_driver(macb_pci_driver);
    ^~~~~~~~~~~~~~~~~
>> drivers/net/ethernet/cadence/macb_pci.c:149:1: error: type defaults to 'int' in declaration of 'module_pci_driver' [-Werror=implicit-int]
>> drivers/net/ethernet/cadence/macb_pci.c:149:1: warning: parameter names (without types) in function declaration
   drivers/net/ethernet/cadence/macb_pci.c:142:26: warning: 'macb_pci_driver' defined but not used [-Wunused-variable]
    static struct pci_driver macb_pci_driver = {
                             ^~~~~~~~~~~~~~~
   cc1: some warnings being treated as errors

vim +149 drivers/net/ethernet/cadence/macb_pci.c

    79							 GEM_PCLK_RATE);
    80		if (IS_ERR(plat_data.pclk)) {
    81			err = PTR_ERR(plat_data.pclk);
    82			goto err_pclk_register;
    83		}
    84	
  > 85		plat_data.hclk = clk_register_fixed_rate(&pdev->dev, "hclk", NULL, 0,
    86							 GEM_HCLK_RATE);
    87		if (IS_ERR(plat_data.hclk)) {
    88			err = PTR_ERR(plat_data.hclk);
    89			goto err_hclk_register;
    90		}
    91	
    92		/* set up platform device info */
    93		memset(&plat_info, 0, sizeof(plat_info));
    94		plat_info.parent = &pdev->dev;
    95		plat_info.fwnode = pdev->dev.fwnode;
    96		plat_info.name = PLAT_DRIVER_NAME;
    97		plat_info.id = pdev->devfn;
    98		plat_info.res = res;
    99		plat_info.num_res = ARRAY_SIZE(res);
   100		plat_info.data = &plat_data;
   101		plat_info.size_data = sizeof(plat_data);
   102		plat_info.dma_mask = DMA_BIT_MASK(32);
   103	
   104		/* register platform device */
   105		plat_dev = platform_device_register_full(&plat_info);
   106		if (IS_ERR(plat_dev)) {
   107			err = PTR_ERR(plat_dev);
   108			goto err_plat_dev_register;
   109		}
   110	
   111		pci_set_drvdata(pdev, plat_dev);
   112	
   113		return 0;
   114	
   115	err_plat_dev_register:
 > 116		clk_unregister(plat_data.hclk);
   117	
   118	err_hclk_register:
   119		clk_unregister(plat_data.pclk);
   120	
   121	err_pclk_register:
   122		pci_disable_device(pdev);
   123		return err;
   124	}
   125	
   126	void macb_remove(struct pci_dev *pdev)
   127	{
   128		struct platform_device *plat_dev = pci_get_drvdata(pdev);
   129		struct macb_platform_data *plat_data = dev_get_platdata(&plat_dev->dev);
   130	
   131		platform_device_unregister(plat_dev);
   132		pci_disable_device(pdev);
   133		clk_unregister(plat_data->pclk);
   134		clk_unregister(plat_data->hclk);
   135	}
   136	
   137	static struct pci_device_id dev_id_table[] = {
   138		{ PCI_DEVICE(CDNS_VENDOR_ID, CDNS_DEVICE_ID), },
   139		{ 0, }
   140	};
   141	
   142	static struct pci_driver macb_pci_driver = {
   143		.name     = PCI_DRIVER_NAME,
   144		.id_table = dev_id_table,
   145		.probe    = macb_probe,
   146		.remove	  = macb_remove,
   147	};
   148	
 > 149	module_pci_driver(macb_pci_driver);
   150	MODULE_DEVICE_TABLE(pci, dev_id_table);
   151	MODULE_LICENSE("GPL");
   152	MODULE_DESCRIPTION("Cadence NIC PCI wrapper");

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 42460 bytes --]

^ permalink raw reply

* Re: fib_frontend: Add network specific broadcasts, when it takes a sense
From: Jiri Benc @ 2016-12-12 14:52 UTC (permalink / raw)
  To: Brandon Philips; +Cc: netdev, Tom Denham, Aaron Levy, Brad Ison
In-Reply-To: <CAD2oYtPPsvHgitejXTMAYa8qVvkibdPruEUcv8hhhiCfgfOvvw@mail.gmail.com>

On Mon, 12 Dec 2016 09:44:54 -0500, Brandon Philips wrote:
> Regardless, it is hard to find an RFC that says "simply don't do this
> because _____". The closest I could find was RFC 922 after sending
> this which says:

https://tools.ietf.org/html/rfc1812#section-5.3.5 is probably what
you're looking for.

 Jiri

^ permalink raw reply

* Re: [PATCH v2] net: macb: Added PCI wrapper for Platform Driver.
From: kbuild test robot @ 2016-12-12 14:50 UTC (permalink / raw)
  To: Bartosz Folta
  Cc: kbuild-all, Nicolas Ferre, David S. Miller, Niklas Cassel,
	Alexandre Torgue, Satanand Burla, Raghu Vatsavayi, Simon Horman,
	linux-kernel@vger.kernel.org, netdev@vger.kernel.org,
	Bartosz Folta, Rafal Ozieblo
In-Reply-To: <SN1PR0701MB1951E07687DBD871E4F1BF31CC980@SN1PR0701MB1951.namprd07.prod.outlook.com>

[-- Attachment #1: Type: text/plain, Size: 3609 bytes --]

Hi Bartosz,

[auto build test ERROR on net-next/master]
[also build test ERROR on v4.9 next-20161209]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Bartosz-Folta/net-macb-Added-PCI-wrapper-for-Platform-Driver/20161212-220228
config: alpha-allyesconfig (attached as .config)
compiler: alpha-linux-gnu-gcc (Debian 6.1.1-9) 6.1.1 20160705
reproduce:
        wget https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        make.cross ARCH=alpha 

All error/warnings (new ones prefixed by >>):

   drivers/net/ethernet/cadence/macb_pci.c: In function 'macb_probe':
>> drivers/net/ethernet/cadence/macb_pci.c:78:19: error: implicit declaration of function 'clk_register_fixed_rate' [-Werror=implicit-function-declaration]
     plat_data.pclk = clk_register_fixed_rate(&pdev->dev, "pclk", NULL, 0,
                      ^~~~~~~~~~~~~~~~~~~~~~~
>> drivers/net/ethernet/cadence/macb_pci.c:78:17: warning: assignment makes pointer from integer without a cast [-Wint-conversion]
     plat_data.pclk = clk_register_fixed_rate(&pdev->dev, "pclk", NULL, 0,
                    ^
   drivers/net/ethernet/cadence/macb_pci.c:85:17: warning: assignment makes pointer from integer without a cast [-Wint-conversion]
     plat_data.hclk = clk_register_fixed_rate(&pdev->dev, "hclk", NULL, 0,
                    ^
>> drivers/net/ethernet/cadence/macb_pci.c:116:2: error: implicit declaration of function 'clk_unregister' [-Werror=implicit-function-declaration]
     clk_unregister(plat_data.hclk);
     ^~~~~~~~~~~~~~
   cc1: some warnings being treated as errors

vim +/clk_register_fixed_rate +78 drivers/net/ethernet/cadence/macb_pci.c

    72			 (void *)(uintptr_t)pci_resource_start(pdev, 0));
    73	
    74		/* set up macb platform data */
    75		memset(&plat_data, 0, sizeof(plat_data));
    76	
    77		/* initialize clocks */
  > 78		plat_data.pclk = clk_register_fixed_rate(&pdev->dev, "pclk", NULL, 0,
    79							 GEM_PCLK_RATE);
    80		if (IS_ERR(plat_data.pclk)) {
    81			err = PTR_ERR(plat_data.pclk);
    82			goto err_pclk_register;
    83		}
    84	
    85		plat_data.hclk = clk_register_fixed_rate(&pdev->dev, "hclk", NULL, 0,
    86							 GEM_HCLK_RATE);
    87		if (IS_ERR(plat_data.hclk)) {
    88			err = PTR_ERR(plat_data.hclk);
    89			goto err_hclk_register;
    90		}
    91	
    92		/* set up platform device info */
    93		memset(&plat_info, 0, sizeof(plat_info));
    94		plat_info.parent = &pdev->dev;
    95		plat_info.fwnode = pdev->dev.fwnode;
    96		plat_info.name = PLAT_DRIVER_NAME;
    97		plat_info.id = pdev->devfn;
    98		plat_info.res = res;
    99		plat_info.num_res = ARRAY_SIZE(res);
   100		plat_info.data = &plat_data;
   101		plat_info.size_data = sizeof(plat_data);
   102		plat_info.dma_mask = DMA_BIT_MASK(32);
   103	
   104		/* register platform device */
   105		plat_dev = platform_device_register_full(&plat_info);
   106		if (IS_ERR(plat_dev)) {
   107			err = PTR_ERR(plat_dev);
   108			goto err_plat_dev_register;
   109		}
   110	
   111		pci_set_drvdata(pdev, plat_dev);
   112	
   113		return 0;
   114	
   115	err_plat_dev_register:
 > 116		clk_unregister(plat_data.hclk);
   117	
   118	err_hclk_register:
   119		clk_unregister(plat_data.pclk);

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 48070 bytes --]

^ permalink raw reply

* Re: Designing a safe RX-zero-copy Memory Model for Networking
From: John Fastabend @ 2016-12-12 14:49 UTC (permalink / raw)
  To: Mike Rapoport, Jesper Dangaard Brouer
  Cc: netdev@vger.kernel.org, linux-mm, Willem de Bruijn,
	Björn Töpel, Karlsson, Magnus, Alexander Duyck,
	Mel Gorman, Tom Herbert, Brenden Blanco, Tariq Toukan,
	Saeed Mahameed, Jesse Brandeburg, Kalman Meth, Vladislav Yasevich
In-Reply-To: <20161212141433.GB19987@rapoport-lnx>

On 16-12-12 06:14 AM, Mike Rapoport wrote:
> On Mon, Dec 12, 2016 at 10:40:42AM +0100, Jesper Dangaard Brouer wrote:
>>
>> On Mon, 12 Dec 2016 10:38:13 +0200 Mike Rapoport <rppt@linux.vnet.ibm.com> wrote:
>>
>>> Hello Jesper,
>>>
>>> On Mon, Dec 05, 2016 at 03:31:32PM +0100, Jesper Dangaard Brouer wrote:
>>>> Hi all,
>>>>
>>>> This is my design for how to safely handle RX zero-copy in the network
>>>> stack, by using page_pool[1] and modifying NIC drivers.  Safely means
>>>> not leaking kernel info in pages mapped to userspace and resilience
>>>> so a malicious userspace app cannot crash the kernel.
>>>>
>>>> Design target
>>>> =============
>>>>
>>>> Allow the NIC to function as a normal Linux NIC and be shared in a
>>>> safe manor, between the kernel network stack and an accelerated
>>>> userspace application using RX zero-copy delivery.
>>>>
>>>> Target is to provide the basis for building RX zero-copy solutions in
>>>> a memory safe manor.  An efficient communication channel for userspace
>>>> delivery is out of scope for this document, but OOM considerations are
>>>> discussed below (`Userspace delivery and OOM`_).  
>>>
>>> Sorry, if this reply is a bit off-topic.
>>
>> It is very much on topic IMHO :-)
>>
>>> I'm working on implementation of RX zero-copy for virtio and I've dedicated
>>> some thought about making guest memory available for physical NIC DMAs.
>>> I believe this is quite related to your page_pool proposal, at least from
>>> the NIC driver perspective, so I'd like to share some thoughts here.
>>
>> Seems quite related. I'm very interested in cooperating with you! I'm
>> not very familiar with virtio, and how packets/pages gets channeled
>> into virtio.
> 
> They are copied :-)
> Presuming we are dealing only with vhost backend, the received skb
> eventually gets converted to IOVs, which in turn are copied to the guest
> memory. The IOVs point to the guest memory that is allocated by virtio-net
> running in the guest.
> 

Great I'm also doing something similar.

My plan was to embed the zero copy as an AF_PACKET mode and then push
a AF_PACKET backend into vhost. I'll post a patch later this week.

>>> The idea is to dedicate one (or more) of the NIC's queues to a VM, e.g.
>>> using macvtap, and then propagate guest RX memory allocations to the NIC
>>> using something like new .ndo_set_rx_buffers method.
>>
>> I believe the page_pool API/design aligns with this idea/use-case.
>>
>>> What is your view about interface between the page_pool and the NIC
>>> drivers?
>>
>> In my Prove-of-Concept implementation, the NIC driver (mlx5) register
>> a page_pool per RX queue.  This is done for two reasons (1) performance
>> and (2) for supporting use-cases where only one single RX-ring queue is
>> (re)configured to support RX-zero-copy.  There are some associated
>> extra cost of enabling this mode, thus it makes sense to only enable it
>> when needed.
>>
>> I've not decided how this gets enabled, maybe some new driver NDO.  It
>> could also happen when a XDP program gets loaded, which request this
>> feature.
>>
>> The macvtap solution is nice and we should support it, but it requires
>> VM to have their MAC-addr registered on the physical switch.  This
>> design is about adding flexibility. Registering an XDP eBPF filter
>> provides the maximum flexibility for matching the destination VM.
> 
> I'm not very familiar with XDP eBPF, and it's difficult for me to estimate
> what needs to be done in BPF program to do proper conversion of skb to the
> virtio descriptors.

I don't think XDP has much to do with this code and they should be done
separately. XDP runs eBPF code on received packets after the DMA engine
has already placed the packet in memory so its too late in the process.

The other piece here is enabling XDP in vhost but that is again separate
IMO.

Notice that ixgbe supports pushing packets into a macvlan via 'tc'
traffic steering commands so even though macvlan gets an L2 address it
doesn't mean it can't use other criteria to steer traffic to it.

> 
> We were not considered using XDP yet, so we've decided to limit the initial
> implementation to macvtap because we can ensure correspondence between a
> NIC queue and virtual NIC, which is not the case with more generic tap
> device. It could be that use of XDP will allow for a generic solution for
> virtio case as well.

Interesting this was one of the original ideas behind the macvlan
offload mode. iirc Vlad also was interested in this.

I'm guessing this was used because of the ability to push macvlan onto
its own queue?

>  
>>
>>> Have you considered using "push" model for setting the NIC's RX memory?
>>
>> I don't understand what you mean by a "push" model?
> 
> Currently, memory allocation in NIC drivers boils down to alloc_page with
> some wrapping code. I see two possible ways to make NIC use of some
> preallocated pages: either NIC driver will call an API (probably different
> from alloc_page) to obtain that memory, or there will be NDO API that
> allows to set the NIC's RX buffers. I named the later case "push".

I prefer the ndo op. This matches up well with AF_PACKET model where we
have "slots" and offload is just a transparent "push" of these "slots"
to the driver. Below we have a snippet of our proposed API,

(https://patchwork.ozlabs.org/patch/396714/ note the descriptor mapping
bits will be dropped)

+ * int (*ndo_direct_qpair_page_map) (struct vm_area_struct *vma,
+ *				     struct net_device *dev)
+ *	Called to map queue pair range from split_queue_pairs into
+ *	mmap region.
+

> +
> +static int
> +ixgbe_ndo_qpair_page_map(struct vm_area_struct *vma, struct net_device *dev)
> +{
> +	struct ixgbe_adapter *adapter = netdev_priv(dev);
> +	phys_addr_t phy_addr = pci_resource_start(adapter->pdev, 0);
> +	unsigned long pfn_rx = (phy_addr + RX_DESC_ADDR_OFFSET) >> PAGE_SHIFT;
> +	unsigned long pfn_tx = (phy_addr + TX_DESC_ADDR_OFFSET) >> PAGE_SHIFT;
> +	unsigned long dummy_page_phy;
> +	pgprot_t pre_vm_page_prot;
> +	unsigned long start;
> +	unsigned int i;
> +	int err;
> +
> +	if (!dummy_page_buf) {
> +		dummy_page_buf = kzalloc(PAGE_SIZE_4K, GFP_KERNEL);
> +		if (!dummy_page_buf)
> +			return -ENOMEM;
> +
> +		for (i = 0; i < PAGE_SIZE_4K / sizeof(unsigned int); i++)
> +			dummy_page_buf[i] = 0xdeadbeef;
> +	}
> +
> +	dummy_page_phy = virt_to_phys(dummy_page_buf);
> +	pre_vm_page_prot = vma->vm_page_prot;
> +	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
> +
> +	/* assume the vm_start is 4K aligned address */
> +	for (start = vma->vm_start;
> +	     start < vma->vm_end;
> +	     start += PAGE_SIZE_4K) {
> +		if (start == vma->vm_start + RX_DESC_ADDR_OFFSET) {
> +			err = remap_pfn_range(vma, start, pfn_rx, PAGE_SIZE_4K,
> +					      vma->vm_page_prot);
> +			if (err)
> +				return -EAGAIN;
> +		} else if (start == vma->vm_start + TX_DESC_ADDR_OFFSET) {
> +			err = remap_pfn_range(vma, start, pfn_tx, PAGE_SIZE_4K,
> +					      vma->vm_page_prot);
> +			if (err)
> +				return -EAGAIN;
> +		} else {
> +			unsigned long addr = dummy_page_phy > PAGE_SHIFT;
> +
> +			err = remap_pfn_range(vma, start, addr, PAGE_SIZE_4K,
> +					      pre_vm_page_prot);
> +			if (err)
> +				return -EAGAIN;
> +		}
> +	}
> +	return 0;
> +}
> +

Any thoughts on something like the above? We could push it when net-next
opens. One piece that fits naturally into vhost/macvtap is the kicks and
queue splicing are already there so no need to implement this making the
above patch much simpler.

.John

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply

* Re: [PATCH perf/core] samples/bpf: Drop unnecessary build targets.
From: Arnaldo Carvalho de Melo @ 2016-12-12 14:48 UTC (permalink / raw)
  To: Joe Stringer; +Cc: linux-kernel, wangnan0, ast, daniel, netdev
In-Reply-To: <20161209175109.6779-1-joe@ovn.org>

Em Fri, Dec 09, 2016 at 09:51:09AM -0800, Joe Stringer escreveu:
> Commit f72179ef11db ("samples/bpf: Switch over to libbpf") added these
> two makefile changes that were unnecessary for switching samples to use
> libbpf. The extra make is already handled by the build dependency, and
> libbpf target doesn't build because it lacks main(). Remove these.
> 
> Reported-by: Wang Nan <wangnan0@huawei.com>
> Signed-off-by: Joe Stringer <joe@ovn.org>

Thanks, applied.

- Arnaldo

^ permalink raw reply

* Re: fib_frontend: Add network specific broadcasts, when it takes a sense
From: Brandon Philips @ 2016-12-12 14:44 UTC (permalink / raw)
  To: Jiri Benc; +Cc: netdev, Tom Denham, Aaron Levy, Brad Ison
In-Reply-To: <20161212153035.4a751ccc@griffin>

On Mon, Dec 12, 2016 at 9:30 AM, Jiri Benc <jbenc@redhat.com> wrote:
> On Fri, 9 Dec 2016 15:41:52 -0800, Brandon Philips wrote:
>> The issue we have: when creating the VXLAN interface and assigning it
>> an address we see a broadcast route being added by the Kernel. For
>> example if we have 10.4.0.0/16 a broadcast route to 10.4.0.0 is
>> created. This route is unwanted because we assign 10.4.0.0 to one of
>> our VXLAN interfaces.
>
> Are you saying you're trying to assign the IP address 10.4.0.0/16 as a
> unicast address to an interface? Then you'll run into way more problems
> than the one you're describing. You can't have host part of the IP
> address consisting of all zeros (or all ones). Just don't do it. Choose
> a valid IP address instead.

Yes, this is what we are doing; it is because of an upstream, to us,
address assignment so I will figure it out upstream.

Regardless, it is hard to find an RFC that says "simply don't do this
because _____". The closest I could find was RFC 922 after sending
this which says:

"There is probably no reason for such addresses to appear anywhere but
as the source address of an ICMP Information".

I will submit a patch that at least documents this RFC and quote.

Thanks!

Brandon

^ permalink raw reply

* Re: fib_frontend: Add network specific broadcasts, when it takes a sense
From: Jiri Benc @ 2016-12-12 14:30 UTC (permalink / raw)
  To: Brandon Philips; +Cc: netdev, Tom Denham, Aaron Levy, Brad Ison
In-Reply-To: <CAD2oYtOS3YKFhhAfXSdJUjKyAx9JpfGz-JLNvGC_mYP7zRp+Rg@mail.gmail.com>

On Fri, 9 Dec 2016 15:41:52 -0800, Brandon Philips wrote:
> The issue we have: when creating the VXLAN interface and assigning it
> an address we see a broadcast route being added by the Kernel. For
> example if we have 10.4.0.0/16 a broadcast route to 10.4.0.0 is
> created. This route is unwanted because we assign 10.4.0.0 to one of
> our VXLAN interfaces.

Are you saying you're trying to assign the IP address 10.4.0.0/16 as a
unicast address to an interface? Then you'll run into way more problems
than the one you're describing. You can't have host part of the IP
address consisting of all zeros (or all ones). Just don't do it. Choose
a valid IP address instead.

 Jiri

^ permalink raw reply

* Re: [PATCH v2 1/4] net: hix5hd2_gmac: add generic compatible string
From: Rob Herring @ 2016-12-12 14:21 UTC (permalink / raw)
  To: Dongpo Li
  Cc: Mark Rutland, Michael Turquette, Stephen Boyd, Russell King,
	Zhangfei Gao, Yisen Zhuang, salil.mehta, David Miller,
	Arnd Bergmann, Andrew Lunn, Jiancheng Xue, benjamin.chenhao,
	caizhiyong, netdev, devicetree@vger.kernel.org,
	linux-kernel@vger.kernel.org
In-Reply-To: <584E871D.7060200@hisilicon.com>

On Mon, Dec 12, 2016 at 5:16 AM, Dongpo Li <lidongpo@hisilicon.com> wrote:
> Hi Rob,
>
> On 2016/12/10 6:35, Rob Herring wrote:
>> On Mon, Dec 05, 2016 at 09:27:58PM +0800, Dongpo Li wrote:
>>> The "hix5hd2" is SoC name, add the generic ethernet driver name.
>>> The "hisi-gemac-v1" is the basic version and "hisi-gemac-v2" adds
>>> the SG/TXCSUM/TSO/UFO features.
>>>
>>> Signed-off-by: Dongpo Li <lidongpo@hisilicon.com>
>>> ---
>>>  .../devicetree/bindings/net/hisilicon-hix5hd2-gmac.txt    |  9 +++++++--
>>>  drivers/net/ethernet/hisilicon/hix5hd2_gmac.c             | 15 +++++++++++----
>>>  2 files changed, 18 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/Documentation/devicetree/bindings/net/hisilicon-hix5hd2-gmac.txt b/Documentation/devicetree/bindings/net/hisilicon-hix5hd2-gmac.txt
>>> index 75d398b..75920f0 100644
>>> --- a/Documentation/devicetree/bindings/net/hisilicon-hix5hd2-gmac.txt
>>> +++ b/Documentation/devicetree/bindings/net/hisilicon-hix5hd2-gmac.txt
>>> @@ -1,7 +1,12 @@
>>>  Hisilicon hix5hd2 gmac controller
>>>
>>>  Required properties:
>>> -- compatible: should be "hisilicon,hix5hd2-gmac".
>>> +- compatible: should contain one of the following SoC strings:
>>> +    * "hisilicon,hix5hd2-gemac"
>>> +    * "hisilicon,hi3798cv200-gemac"
>>> +    and one of the following version string:
>>> +    * "hisilicon,hisi-gemac-v1"
>>> +    * "hisilicon,hisi-gemac-v2"
>>
>> What combinations are valid? I assume both chips don't have both v1 and
>> v2. 2 SoCs and 2 versions so far, I don't think there is much point to
>> have the v1 and v2 compatible strings.
>>
> The v1 and v2 are generic MAC compatible strings, many HiSilicon SoCs may
> use the same MAC version. For example,
> hix5hd2, hi3716cv200 SoCs use the v1 MAC version,
> hi3798cv200, hi3516a SoCs use the v2 MAC version,
> and there may be more SoCs added in future.
> So I think the generic compatible strings are okay here.
> Should I add the hi3716cv200, hi3516a SoCs compatible here?

Yes.

> Do you have any good advice?
>
>>>  - reg: specifies base physical address(s) and size of the device registers.
>>>    The first region is the MAC register base and size.
>>>    The second region is external interface control register.
>>> @@ -20,7 +25,7 @@ Required properties:
>>>
>>>  Example:
>>>      gmac0: ethernet@f9840000 {
>>> -            compatible = "hisilicon,hix5hd2-gmac";
>>> +            compatible = "hisilicon,hix5hd2-gemac", "hisilicon,hisi-gemac-v1";
>>
>> You can't just change compatible strings.
>>
> Okay, maybe I should name all the compatible string with the suffix "-gmac" instead of
> "-gemac". This can keep the compatible strings with the same suffix. Is this okay?
> Can I just add the generic compatible string without changing the SoCs compatible string?
> Like following:
>         gmac0: ethernet@f9840000 {
>  -              compatible = "hisilicon,hix5hd2-gmac";
>  +              compatible = "hisilicon,hix5hd2-gmac", "hisilicon,hisi-gmac-v1";

Yes, this is fine.

Rob

^ permalink raw reply

* Re: stmmac DT property snps,axi_all
From: Giuseppe CAVALLARO @ 2016-12-12 14:18 UTC (permalink / raw)
  To: Niklas Cassel, Alexandre Torgue; +Cc: netdev
In-Reply-To: <e0362693-4ae9-b3d6-3955-c72df7a1b0c0@axis.com>

Please Niklas

when you send the patch, add my

Acked-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>


On 12/9/2016 10:53 AM, Niklas Cassel wrote:
> On 12/09/2016 10:20 AM, Niklas Cassel wrote:
>> On 12/08/2016 02:36 PM, Alexandre Torgue wrote:
>>> Hi Niklas,
>>>
>>> On 12/05/2016 05:18 PM, Niklas Cassel wrote:
>>>> Hello Giuseppe
>>>>
>>>>
>>>> I'm trying to figure out what snps,axi_all is supposed to represent.
>>>>
>>>> It appears that the value is saved, but never used in the code.
>>>>
>>>> Looking at the register specification, I'm guessing that it represents
>>>> Address-Aligned Beats, but there is already the property snps,aal
>>>> for that.
>>> IMO, it is not useful. Indeed AXI_AAL is a read only bit (in AXI bus mode register) and reflects the aal bit in DMA bus register.
>>> As you know we use "snps,aal" to set aal bit in DMA bus register.
>>> So "snps,axi_all" entry seems useless. Let's see with Peppe.
>> Ok, I see. GMAC and GMAC4 is different here.
>>
>> For GMAC4 AAL only exists in DMA_SYS_BUS_MODE.
>> It's not reflected anywhere else.
>>
>> The code is correct in the driver.
>>
>> If snps,axi_all is just created for a read-only register,
>> and it is currently never used in the code,
>> while we have snps,aal, which is correct and works,
>> I guess it should be ok to remove snps,axi_all.
>>
>> I can cook up a patch.
>>
>
> Here we go :)
>
> I will send it as a real patch once net-next reopens.
>
>
>>From defc01cb7c22611b89d9cf1fcae72544092bd62c Mon Sep 17 00:00:00 2001
> From: Niklas Cassel <niklas.cassel@axis.com>
> Date: Fri, 9 Dec 2016 10:27:00 +0100
> Subject: [PATCH net-next] net: stmmac: remove unused duplicate property
>  snps,axi_all
>
> For core revision 3.x Address-Aligned Beats is available in two registers.
> The DT property snps,aal was created for AAL in the DMA bus register,
> which is a read/write bit.
> The DT property snps,axi_all was created for AXI_AAL in the AXI bus mode
> register, which is a read only bit that reflects the value of AAL in the
> DMA bus register.
>
> Since the value of snps,axi_all is never used in the driver,
> and since the property was created for a bit that is read only,
> it should be safe to remove the property.
>
> Signed-off-by: Niklas Cassel <niklas.cassel@axis.com>
> ---
>  Documentation/devicetree/bindings/net/stmmac.txt      | 1 -
>  drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c | 1 -
>  include/linux/stmmac.h                                | 1 -
>  3 files changed, 3 deletions(-)
>
> diff --git a/Documentation/devicetree/bindings/net/stmmac.txt b/Documentation/devicetree/bindings/net/stmmac.txt
> index 128da752fec9..c3d2fd480a1b 100644
> --- a/Documentation/devicetree/bindings/net/stmmac.txt
> +++ b/Documentation/devicetree/bindings/net/stmmac.txt
> @@ -65,7 +65,6 @@ Optional properties:
>      - snps,wr_osr_lmt: max write outstanding req. limit
>      - snps,rd_osr_lmt: max read outstanding req. limit
>      - snps,kbbe: do not cross 1KiB boundary.
> -    - snps,axi_all: align address
>      - snps,blen: this is a vector of supported burst length.
>      - snps,fb: fixed-burst
>      - snps,mb: mixed-burst
> diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
> index 082cd48db6a7..60ba8993c650 100644
> --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
> +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
> @@ -121,7 +121,6 @@ static struct stmmac_axi *stmmac_axi_setup(struct platform_device *pdev)
>      axi->axi_lpi_en = of_property_read_bool(np, "snps,lpi_en");
>      axi->axi_xit_frm = of_property_read_bool(np, "snps,xit_frm");
>      axi->axi_kbbe = of_property_read_bool(np, "snps,axi_kbbe");
> -    axi->axi_axi_all = of_property_read_bool(np, "snps,axi_all");
>      axi->axi_fb = of_property_read_bool(np, "snps,axi_fb");
>      axi->axi_mb = of_property_read_bool(np, "snps,axi_mb");
>      axi->axi_rb =  of_property_read_bool(np, "snps,axi_rb");
> diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
> index 266dab9ad782..889e0e9a3f1c 100644
> --- a/include/linux/stmmac.h
> +++ b/include/linux/stmmac.h
> @@ -103,7 +103,6 @@ struct stmmac_axi {
>      u32 axi_wr_osr_lmt;
>      u32 axi_rd_osr_lmt;
>      bool axi_kbbe;
> -    bool axi_axi_all;
>      u32 axi_blen[AXI_BLEN];
>      bool axi_fb;
>      bool axi_mb;
>

^ permalink raw reply

* Re: [PATCH net-next 1/3] net:dsa:mv88e6xxx: use hashtable to store multicast entries
From: Andrew Lunn @ 2016-12-12 14:18 UTC (permalink / raw)
  To: Volodymyr Bendiuga; +Cc: vivien.didelot, f.fainelli, netdev, volodymyr.bendiuga
In-Reply-To: <1481549958-1265-1-git-send-email-volodymyr.bendiuga@gmail.com>

Hi Volodymyr

Any multi patch patchset should include a cover note. See git
format-patch --cover. This should explain the big picture what does
the patchset do, why etc.

Also, David sent out an email saying net-next was closed. While it is
closed, please send patches are RFC.

Thanks
	Andrew

^ permalink raw reply

* Re: stmmac driver...
From: Giuseppe CAVALLARO @ 2016-12-12 14:17 UTC (permalink / raw)
  To: David Miller, alexandre.torgue; +Cc: netdev
In-Reply-To: <20161207.130654.1484281922991494182.davem@davemloft.net>

Hi David

On 12/7/2016 7:06 PM, David Miller wrote:
>
> Giuseppe and Alexandre,
>
> There are a lot of patches and discussions happening around the stammc
> driver lately and both of you are listed as the maintainers.
>
> I really need prompt and conclusive reviews of these patch submissions
> from you, and participation in all discussions about the driver.

yes we are trying to do the best.

> Otherwise I have only three things I can do: 1) let the patches rot in
> patchwork for days 2) trust that the patches are sane and fit your
> desires and goals and just apply them or 3) reject them since they
> aren't being reviewed properly.

at this stage, I think the best is: (3).

>
> Thanks in advance.
>
you are welcome


Peppe

^ permalink raw reply

* Re: Designing a safe RX-zero-copy Memory Model for Networking
From: Mike Rapoport @ 2016-12-12 14:14 UTC (permalink / raw)
  To: Jesper Dangaard Brouer
  Cc: netdev@vger.kernel.org, linux-mm, John Fastabend,
	Willem de Bruijn, Björn Töpel, Karlsson, Magnus,
	Alexander Duyck, Mel Gorman, Tom Herbert, Brenden Blanco,
	Tariq Toukan, Saeed Mahameed, Jesse Brandeburg, Kalman Meth
In-Reply-To: <20161212104042.0a011212@redhat.com>

On Mon, Dec 12, 2016 at 10:40:42AM +0100, Jesper Dangaard Brouer wrote:
> 
> On Mon, 12 Dec 2016 10:38:13 +0200 Mike Rapoport <rppt@linux.vnet.ibm.com> wrote:
> 
> > Hello Jesper,
> > 
> > On Mon, Dec 05, 2016 at 03:31:32PM +0100, Jesper Dangaard Brouer wrote:
> > > Hi all,
> > > 
> > > This is my design for how to safely handle RX zero-copy in the network
> > > stack, by using page_pool[1] and modifying NIC drivers.  Safely means
> > > not leaking kernel info in pages mapped to userspace and resilience
> > > so a malicious userspace app cannot crash the kernel.
> > > 
> > > Design target
> > > =============
> > > 
> > > Allow the NIC to function as a normal Linux NIC and be shared in a
> > > safe manor, between the kernel network stack and an accelerated
> > > userspace application using RX zero-copy delivery.
> > > 
> > > Target is to provide the basis for building RX zero-copy solutions in
> > > a memory safe manor.  An efficient communication channel for userspace
> > > delivery is out of scope for this document, but OOM considerations are
> > > discussed below (`Userspace delivery and OOM`_).  
> > 
> > Sorry, if this reply is a bit off-topic.
> 
> It is very much on topic IMHO :-)
> 
> > I'm working on implementation of RX zero-copy for virtio and I've dedicated
> > some thought about making guest memory available for physical NIC DMAs.
> > I believe this is quite related to your page_pool proposal, at least from
> > the NIC driver perspective, so I'd like to share some thoughts here.
> 
> Seems quite related. I'm very interested in cooperating with you! I'm
> not very familiar with virtio, and how packets/pages gets channeled
> into virtio.

They are copied :-)
Presuming we are dealing only with vhost backend, the received skb
eventually gets converted to IOVs, which in turn are copied to the guest
memory. The IOVs point to the guest memory that is allocated by virtio-net
running in the guest.

> > The idea is to dedicate one (or more) of the NIC's queues to a VM, e.g.
> > using macvtap, and then propagate guest RX memory allocations to the NIC
> > using something like new .ndo_set_rx_buffers method.
> 
> I believe the page_pool API/design aligns with this idea/use-case.
> 
> > What is your view about interface between the page_pool and the NIC
> > drivers?
> 
> In my Prove-of-Concept implementation, the NIC driver (mlx5) register
> a page_pool per RX queue.  This is done for two reasons (1) performance
> and (2) for supporting use-cases where only one single RX-ring queue is
> (re)configured to support RX-zero-copy.  There are some associated
> extra cost of enabling this mode, thus it makes sense to only enable it
> when needed.
> 
> I've not decided how this gets enabled, maybe some new driver NDO.  It
> could also happen when a XDP program gets loaded, which request this
> feature.
> 
> The macvtap solution is nice and we should support it, but it requires
> VM to have their MAC-addr registered on the physical switch.  This
> design is about adding flexibility. Registering an XDP eBPF filter
> provides the maximum flexibility for matching the destination VM.

I'm not very familiar with XDP eBPF, and it's difficult for me to estimate
what needs to be done in BPF program to do proper conversion of skb to the
virtio descriptors.

We were not considered using XDP yet, so we've decided to limit the initial
implementation to macvtap because we can ensure correspondence between a
NIC queue and virtual NIC, which is not the case with more generic tap
device. It could be that use of XDP will allow for a generic solution for
virtio case as well.
 
> 
> > Have you considered using "push" model for setting the NIC's RX memory?
> 
> I don't understand what you mean by a "push" model?

Currently, memory allocation in NIC drivers boils down to alloc_page with
some wrapping code. I see two possible ways to make NIC use of some
preallocated pages: either NIC driver will call an API (probably different
from alloc_page) to obtain that memory, or there will be NDO API that
allows to set the NIC's RX buffers. I named the later case "push".
 

^ permalink raw reply

* Re: Synopsys Ethernet QoS
From: Giuseppe CAVALLARO @ 2016-12-12 14:11 UTC (permalink / raw)
  To: Joao Pinto, Florian Fainelli, Andy Shevchenko
  Cc: David Miller, lars.persson, rabin.vincent, netdev,
	CARLOS.PALMINHA, Jie.Deng1
In-Reply-To: <f5357ea2-a295-ab08-046e-f8b8f6ca4344@synopsys.com>

Hello All.

On 12/12/2016 11:19 AM, Joao Pinto wrote:
> Hi,
>
> Às 1:44 AM de 12/10/2016, Florian Fainelli escreveu:
>> Le 12/09/16 à 16:16, Andy Shevchenko a écrit :
>>> On Sat, Dec 10, 2016 at 12:52 AM, Florian Fainelli <f.fainelli@gmail.com> wrote:
>>>
>>>> It's kind of sad that customers of that IP (stmmac, amd-xgbe, sxgbe)
>>>
>>>> did
>>>> actually pioneer the upstreaming effort, but it is good to see people
>>>> from Synopsys willing to fix that in the future.
>>>
>>> Wait, you would like to tell that we have more than 2 drivers for the
>>> same (okay, same vendor) IP?!
>>> It's better to unify them earlier, than have n+ copies.
>>
>> Unfortunately that is the case, see this email:
>>
>> https://www.mail-archive.com/netdev@vger.kernel.org/msg142796.html
>>
>> dwc_eth_qos and stmmac have some overlap. There seems to be work
>> underway to unify these two to begin with.
>>
>>>
>>> P.S. Though, I don't see how sxgbe got in the list. First glance on
>>> the code doesn't show similarities.
>>
>> Well samsung/sxgbe looks potentially similar to amd/xgbe, but that's
>> just my cursory look at the code, it may very well be something entirely
>> different. The descriptor formats just look suspiciously similar.
>>
>
> Thank you for your inputs! Renaming seems to be a hotspot. I agree that maybe
> instead of renaming (breaking retro-compatibility as David and Florian
> mentioned), the best is to move stmmac to synopsys/ after merging *qos* and
> removing it. As Florian mentioned, git is capable of detecting folder restructured.
>
> @Rabin Vincent: Hi Rabin. Since Axis is more familiar with the synopsys/*qos*
> driver would it be possible for you to make an initial analysis of what has to
> be merged into Stmmac? This way the development would speed-up.

my priority on this topic was to help Joao to easily port his
platform on stmmac and IIUC we already achieved some results.
For several reasons, we had discussed to support GMAC IP with stmmac
and, as first stage, to evaluate and port the relevant changes
from  dwc_eth_qos.c. If renaming of moving the stmmac should be
discussed later and for sure all together could find the best solution
in the right moment where some critical patches and supports are in
place.

As highlighted by Jie, the XLGMAC is another IP (that I do not know
at all so I cannot say if there is some way to support it
by using stmmac).

Thanks a lot.

peppe

>
> Thanks to all.
>
> Joao
>

^ permalink raw reply

* Re: [PATCH net-next 2/3] net:dsa:mv88e6xxx: add helper functions to operate on hashtable
From: Andrew Lunn @ 2016-12-12 14:03 UTC (permalink / raw)
  To: Volodymyr Bendiuga; +Cc: vivien.didelot, f.fainelli, netdev, volodymyr.bendiuga
In-Reply-To: <1481550031-3227-1-git-send-email-volodymyr.bendiuga@gmail.com>

On Mon, Dec 12, 2016 at 02:40:31PM +0100, Volodymyr Bendiuga wrote:
> This implementation is similar to rocker driver: drivers/net/ethernet/rocker_ofdpa.c
> 
> mv88e6xxx_pvec_tbl_find - iterates through entries in the hashtable
> and looks for a match.
> 
> mv88e6xxx_pvec_tbl_get - returns en entry if it is found in the
> hashtable
> 
> mv88e6xxx_pvec_tbl_update - updates the hashtable: inserts new entry,
> deletes/modifies existing one.
> 
> Signed-off-by: Volodymyr Bendiuga <volodymyr.bendiuga@gmail.com>

Hi Volodymyr

It looks like your white space is all messed up. Please use checkpatch.

> +
> +static int mv88e6xxx_pvec_tbl_update(struct mv88e6xxx_chip *chip, u16 fid,
> +			      const unsigned char *addr, u16 pvec)
> +{
> +        struct pvec_tbl_entry *obj;
> +        struct pvec_tbl_entry *found;
> +        bool remove = pvec ? false : true;
> +
> +        obj = kzalloc(sizeof(*obj), GFP_KERNEL);
> +        if (!obj)
> +                return -ENOMEM;

So we need to look at the memory model here.

Currently the driver is stateless. This now introduces state. That
means we need to look at the prepare calls switchdev has, since we are
only allowed to fail in the prepare call. You need to allocate the
memory in the port_fdb_prepare() and then use the memory in
port_fdb_add() etc.

I don't think your third patch does any of this.

  Andrew

^ permalink raw reply

* Re: [PATCH net-next 1/3] net:dsa:mv88e6xxx: use hashtable to store multicast entries
From: Andrew Lunn @ 2016-12-12 13:54 UTC (permalink / raw)
  To: Volodymyr Bendiuga; +Cc: vivien.didelot, f.fainelli, netdev, volodymyr.bendiuga
In-Reply-To: <1481549958-1265-1-git-send-email-volodymyr.bendiuga@gmail.com>

On Mon, Dec 12, 2016 at 02:39:18PM +0100, Volodymyr Bendiuga wrote:
> Hashtable will make it extremely faster when inserting fdb entries
> into the forwarding database.
> 
> Signed-off-by: Volodymyr Bendiuga <volodymyr.bendiuga@gmail.com>
> ---
>  drivers/net/dsa/mv88e6xxx/mv88e6xxx.h | 15 +++++++++++++++
>  1 file changed, 15 insertions(+)
> 
> diff --git a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h
> index 431e954..407e6db 100644
> --- a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h
> +++ b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h
> @@ -15,6 +15,8 @@
>  #include <linux/if_vlan.h>
>  #include <linux/irq.h>
>  #include <linux/gpio/consumer.h>
> +#include <linux/crc32.h>
> +#include <linux/hashtable.h>
>  
>  #ifndef UINT64_MAX
>  #define UINT64_MAX		(u64)(~((u64)0))
> @@ -672,6 +674,16 @@ struct mv88e6xxx_info {
>  	const struct mv88e6xxx_ops *ops;
>  };
>  
> +struct pvec_tbl_entry {

Please use the mv88e6xxx_ prefix.

       Andrew

^ permalink raw reply

* [PATCH net-next 3/3] net:dsa:mv88e6xxx: use hashtable instead of atu_get
From: Volodymyr Bendiuga @ 2016-12-12 13:41 UTC (permalink / raw)
  To: andrew, vivien.didelot, f.fainelli, netdev, volodymyr.bendiuga

mv88e6xxx_atu_get() is quite slow, and therefore can be
replaced with hashtable helper functions that are much
faster when inserting entries into the database, or searching
for them. The logics in mv88e6xxx_db_load_purte() is similar to
previous implementation, but hashtable helper functions
are used instead of mv88e6xxx_atu_get().

Signed-off-by: Volodymyr Bendiuga <volodymyr.bendiuga@gmail.com>
---
 drivers/net/dsa/mv88e6xxx/chip.c | 63 +++++++++++-----------------------------
 1 file changed, 17 insertions(+), 46 deletions(-)

diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 6597f3f..6032c5b 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -2069,42 +2069,6 @@ static u16 mv88e6xxx_pvec_tbl_get(struct mv88e6xxx_chip *chip,
         return 0;
 }
 
-static int _mv88e6xxx_atu_getnext(struct mv88e6xxx_chip *chip, u16 fid,
-				  struct mv88e6xxx_atu_entry *entry);
-
-static int mv88e6xxx_atu_get(struct mv88e6xxx_chip *chip, int fid,
-			     const u8 *addr, struct mv88e6xxx_atu_entry *entry)
-{
-	struct mv88e6xxx_atu_entry next;
-	int err;
-
-	eth_broadcast_addr(next.mac);
-
-	err = _mv88e6xxx_atu_mac_write(chip, next.mac);
-	if (err)
-		return err;
-
-	do {
-		err = _mv88e6xxx_atu_getnext(chip, fid, &next);
-		if (err)
-			return err;
-
-		if (next.state == GLOBAL_ATU_DATA_STATE_UNUSED)
-			break;
-
-		if (ether_addr_equal(next.mac, addr)) {
-			*entry = next;
-			return 0;
-		}
-	} while (!is_broadcast_ether_addr(next.mac));
-
-	memset(entry, 0, sizeof(*entry));
-	entry->fid = fid;
-	ether_addr_copy(entry->mac, addr);
-
-	return 0;
-}
-
 static int mv88e6xxx_port_db_load_purge(struct mv88e6xxx_chip *chip, int port,
 					const unsigned char *addr, u16 vid,
 					u8 state)
@@ -2121,18 +2085,25 @@ static int mv88e6xxx_port_db_load_purge(struct mv88e6xxx_chip *chip, int port,
 	if (err)
 		return err;
 
-	err = mv88e6xxx_atu_get(chip, vlan.fid, addr, &entry);
-	if (err)
-		return err;
+	entry.fid = vlan.fid;
+	entry.state = state;
+	ether_addr_copy(entry.mac, addr);
+
+	/* multicast entries can use several ports for each address */
+	if (is_multicast_ether_addr(addr)) {
+		entry.portv_trunkid = mv88e6xxx_pvec_tbl_get(chip, entry.fid, addr);
+		if (state == GLOBAL_ATU_DATA_STATE_UNUSED)
+			entry.portv_trunkid &= ~BIT(port);
+		else
+			entry.portv_trunkid |= BIT(port);
 
-	/* Purge the ATU entry only if no port is using it anymore */
-	if (state == GLOBAL_ATU_DATA_STATE_UNUSED) {
-		entry.portv_trunkid &= ~BIT(port);
-		if (!entry.portv_trunkid)
-			entry.state = GLOBAL_ATU_DATA_STATE_UNUSED;
+		mv88e6xxx_pvec_tbl_update(chip, entry.fid, addr, entry.portv_trunkid);
+		entry.trunk = false;
 	} else {
-		entry.portv_trunkid |= BIT(port);
-		entry.state = state;
+		if (state != GLOBAL_ATU_DATA_STATE_UNUSED) {
+			entry.trunk = false;
+			entry.portv_trunkid = BIT(port);
+		}
 	}
 
 	return _mv88e6xxx_atu_load(chip, &entry);
-- 
2.7.4

^ permalink raw reply related

* [PATCH 2/2] encx24j600: Fix some checkstyle warnings
From: jeroen.de_wachter.ext @ 2016-12-12 13:29 UTC (permalink / raw)
  To: David Miller, Jon Ringle; +Cc: Andrew Morton, netdev, Jeroen De Wachter
In-Reply-To: <1481549349-8199-1-git-send-email-jeroen.de_wachter.ext@nokia.com>

From: Jeroen De Wachter <jeroen.de_wachter.ext@nokia.com>

Signed-off-by: Jeroen De Wachter <jeroen.de_wachter.ext@nokia.com>
---
 drivers/net/ethernet/microchip/encx24j600-regmap.c | 17 +++++++++++------
 drivers/net/ethernet/microchip/encx24j600.c        | 16 ++++++++++++++--
 2 files changed, 25 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/microchip/encx24j600-regmap.c b/drivers/net/ethernet/microchip/encx24j600-regmap.c
index f3bb905..44bb04d 100644
--- a/drivers/net/ethernet/microchip/encx24j600-regmap.c
+++ b/drivers/net/ethernet/microchip/encx24j600-regmap.c
@@ -26,11 +26,11 @@ static inline bool is_bits_set(int value, int mask)
 }
 
 static int encx24j600_switch_bank(struct encx24j600_context *ctx,
-					 int bank)
+				  int bank)
 {
 	int ret = 0;
-
 	int bank_opcode = BANK_SELECT(bank);
+
 	ret = spi_write(ctx->spi, &bank_opcode, 1);
 	if (ret == 0)
 		ctx->bank = bank;
@@ -39,7 +39,7 @@ static int encx24j600_switch_bank(struct encx24j600_context *ctx,
 }
 
 static int encx24j600_cmdn(struct encx24j600_context *ctx, u8 opcode,
-			    const void *buf, size_t len)
+			   const void *buf, size_t len)
 {
 	struct spi_message m;
 	struct spi_transfer t[2] = { { .tx_buf = &opcode, .len = 1, },
@@ -54,12 +54,14 @@ static int encx24j600_cmdn(struct encx24j600_context *ctx, u8 opcode,
 static void regmap_lock_mutex(void *context)
 {
 	struct encx24j600_context *ctx = context;
+
 	mutex_lock(&ctx->mutex);
 }
 
 static void regmap_unlock_mutex(void *context)
 {
 	struct encx24j600_context *ctx = context;
+
 	mutex_unlock(&ctx->mutex);
 }
 
@@ -128,6 +130,7 @@ static int regmap_encx24j600_sfr_update(struct encx24j600_context *ctx,
 
 	if (reg < 0x80) {
 		int ret = 0;
+
 		cmd = banked_code | banked_reg;
 		if ((banked_reg < 0x16) && (ctx->bank != bank))
 			ret = encx24j600_switch_bank(ctx, bank);
@@ -174,6 +177,7 @@ static int regmap_encx24j600_sfr_write(void *context, u8 reg, u8 *val,
 				       size_t len)
 {
 	struct encx24j600_context *ctx = context;
+
 	return regmap_encx24j600_sfr_update(ctx, reg, val, len, WCRU, WCRCODE);
 }
 
@@ -228,9 +232,9 @@ int regmap_encx24j600_spi_write(void *context, u8 reg, const u8 *data,
 
 	if (reg < 0xc0)
 		return encx24j600_cmdn(ctx, reg, data, count);
-	else
-		/* SPI 1-byte command. Ignore data */
-		return spi_write(ctx->spi, &reg, 1);
+
+	/* SPI 1-byte command. Ignore data */
+	return spi_write(ctx->spi, &reg, 1);
 }
 EXPORT_SYMBOL_GPL(regmap_encx24j600_spi_write);
 
@@ -495,6 +499,7 @@ static bool encx24j600_phymap_volatile(struct device *dev, unsigned int reg)
 	.writeable_reg = encx24j600_phymap_writeable,
 	.volatile_reg = encx24j600_phymap_volatile,
 };
+
 static struct regmap_bus phymap_encx24j600 = {
 	.reg_write = regmap_encx24j600_phy_reg_write,
 	.reg_read = regmap_encx24j600_phy_reg_read,
diff --git a/drivers/net/ethernet/microchip/encx24j600.c b/drivers/net/ethernet/microchip/encx24j600.c
index 5251aa3..fbce616 100644
--- a/drivers/net/ethernet/microchip/encx24j600.c
+++ b/drivers/net/ethernet/microchip/encx24j600.c
@@ -30,7 +30,7 @@
 
 #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK)
 static int debug = -1;
-module_param(debug, int, 0);
+module_param(debug, int, 0000);
 MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
 
 /* SRAM memory layout:
@@ -105,6 +105,7 @@ static u16 encx24j600_read_reg(struct encx24j600_priv *priv, u8 reg)
 	struct net_device *dev = priv->ndev;
 	unsigned int val = 0;
 	int ret = regmap_read(priv->ctx.regmap, reg, &val);
+
 	if (unlikely(ret))
 		netif_err(priv, drv, dev, "%s: error %d reading reg %02x\n",
 			  __func__, ret, reg);
@@ -115,6 +116,7 @@ static void encx24j600_write_reg(struct encx24j600_priv *priv, u8 reg, u16 val)
 {
 	struct net_device *dev = priv->ndev;
 	int ret = regmap_write(priv->ctx.regmap, reg, val);
+
 	if (unlikely(ret))
 		netif_err(priv, drv, dev, "%s: error %d writing reg %02x=%04x\n",
 			  __func__, ret, reg, val);
@@ -125,6 +127,7 @@ static void encx24j600_update_reg(struct encx24j600_priv *priv, u8 reg,
 {
 	struct net_device *dev = priv->ndev;
 	int ret = regmap_update_bits(priv->ctx.regmap, reg, mask, val);
+
 	if (unlikely(ret))
 		netif_err(priv, drv, dev, "%s: error %d updating reg %02x=%04x~%04x\n",
 			  __func__, ret, reg, val, mask);
@@ -135,6 +138,7 @@ static u16 encx24j600_read_phy(struct encx24j600_priv *priv, u8 reg)
 	struct net_device *dev = priv->ndev;
 	unsigned int val = 0;
 	int ret = regmap_read(priv->ctx.phymap, reg, &val);
+
 	if (unlikely(ret))
 		netif_err(priv, drv, dev, "%s: error %d reading %02x\n",
 			  __func__, ret, reg);
@@ -145,6 +149,7 @@ static void encx24j600_write_phy(struct encx24j600_priv *priv, u8 reg, u16 val)
 {
 	struct net_device *dev = priv->ndev;
 	int ret = regmap_write(priv->ctx.phymap, reg, val);
+
 	if (unlikely(ret))
 		netif_err(priv, drv, dev, "%s: error %d writing reg %02x=%04x\n",
 			  __func__, ret, reg, val);
@@ -164,6 +169,7 @@ static void encx24j600_cmd(struct encx24j600_priv *priv, u8 cmd)
 {
 	struct net_device *dev = priv->ndev;
 	int ret = regmap_write(priv->ctx.regmap, cmd, 0);
+
 	if (unlikely(ret))
 		netif_err(priv, drv, dev, "%s: error %d with cmd %02x\n",
 			  __func__, ret, cmd);
@@ -173,6 +179,7 @@ static int encx24j600_raw_read(struct encx24j600_priv *priv, u8 reg, u8 *data,
 			       size_t count)
 {
 	int ret;
+
 	mutex_lock(&priv->ctx.mutex);
 	ret = regmap_encx24j600_spi_read(&priv->ctx, reg, data, count);
 	mutex_unlock(&priv->ctx.mutex);
@@ -184,6 +191,7 @@ static int encx24j600_raw_write(struct encx24j600_priv *priv, u8 reg,
 				const u8 *data, size_t count)
 {
 	int ret;
+
 	mutex_lock(&priv->ctx.mutex);
 	ret = regmap_encx24j600_spi_write(&priv->ctx, reg, data, count);
 	mutex_unlock(&priv->ctx.mutex);
@@ -194,6 +202,7 @@ static int encx24j600_raw_write(struct encx24j600_priv *priv, u8 reg,
 static void encx24j600_update_phcon1(struct encx24j600_priv *priv)
 {
 	u16 phcon1 = encx24j600_read_phy(priv, PHCON1);
+
 	if (priv->autoneg == AUTONEG_ENABLE) {
 		phcon1 |= ANEN | RENEG;
 	} else {
@@ -328,6 +337,7 @@ static int encx24j600_receive_packet(struct encx24j600_priv *priv,
 {
 	struct net_device *dev = priv->ndev;
 	struct sk_buff *skb = netdev_alloc_skb(dev, rsv->len + NET_IP_ALIGN);
+
 	if (!skb) {
 		pr_err_ratelimited("RX: OOM: packet dropped\n");
 		dev->stats.rx_dropped++;
@@ -828,6 +838,7 @@ static void encx24j600_set_multicast_list(struct net_device *dev)
 static void encx24j600_hw_tx(struct encx24j600_priv *priv)
 {
 	struct net_device *dev = priv->ndev;
+
 	netif_info(priv, tx_queued, dev, "TX Packet Len:%d\n",
 		   priv->tx_skb->len);
 
@@ -895,7 +906,6 @@ static void encx24j600_tx_timeout(struct net_device *dev)
 
 	dev->stats.tx_errors++;
 	netif_wake_queue(dev);
-	return;
 }
 
 static int encx24j600_get_regs_len(struct net_device *dev)
@@ -958,12 +968,14 @@ static int encx24j600_set_settings(struct net_device *dev,
 static u32 encx24j600_get_msglevel(struct net_device *dev)
 {
 	struct encx24j600_priv *priv = netdev_priv(dev);
+
 	return priv->msg_enable;
 }
 
 static void encx24j600_set_msglevel(struct net_device *dev, u32 val)
 {
 	struct encx24j600_priv *priv = netdev_priv(dev);
+
 	priv->msg_enable = val;
 }
 
-- 
1.8.3.1

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox