Netdev List
 help / color / mirror / Atom feed
* [PATCH v3 03/14] NFC: nxp-nci: Get rid of platform data
From: Andy Shevchenko @ 2019-07-25 19:35 UTC (permalink / raw)
  To: Clément Perrochaud, Charles Gorand, netdev, David S. Miller,
	Sedat Dilek
  Cc: Andy Shevchenko, Sedat Dilek
In-Reply-To: <20190725193511.64274-1-andriy.shevchenko@linux.intel.com>

Legacy platform data must go away. We are on the safe side here since
there are no users of it in the kernel.

If anyone by any odd reason needs it the GPIO lookup tables and
built-in device properties at your service.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Tested-by: Sedat Dilek <sedat.dilek@gmail.com>
---
 MAINTAINERS                           |  1 -
 drivers/nfc/nxp-nci/core.c            |  1 -
 drivers/nfc/nxp-nci/i2c.c             |  9 +--------
 drivers/nfc/nxp-nci/nxp-nci.h         |  1 -
 include/linux/platform_data/nxp-nci.h | 19 -------------------
 5 files changed, 1 insertion(+), 30 deletions(-)
 delete mode 100644 include/linux/platform_data/nxp-nci.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 711b5d07f73d..e54e19f2c96d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11351,7 +11351,6 @@ F:	include/net/nfc/
 F:	include/uapi/linux/nfc.h
 F:	drivers/nfc/
 F:	include/linux/platform_data/nfcmrvl.h
-F:	include/linux/platform_data/nxp-nci.h
 F:	Documentation/devicetree/bindings/net/nfc/
 
 NFS, SUNRPC, AND LOCKD CLIENTS
diff --git a/drivers/nfc/nxp-nci/core.c b/drivers/nfc/nxp-nci/core.c
index 8dafc696719f..aed18ca60170 100644
--- a/drivers/nfc/nxp-nci/core.c
+++ b/drivers/nfc/nxp-nci/core.c
@@ -14,7 +14,6 @@
 #include <linux/gpio.h>
 #include <linux/module.h>
 #include <linux/nfc.h>
-#include <linux/platform_data/nxp-nci.h>
 
 #include <net/nfc/nci_core.h>
 
diff --git a/drivers/nfc/nxp-nci/i2c.c b/drivers/nfc/nxp-nci/i2c.c
index 5db71869f04b..47b3b7e612e6 100644
--- a/drivers/nfc/nxp-nci/i2c.c
+++ b/drivers/nfc/nxp-nci/i2c.c
@@ -23,7 +23,6 @@
 #include <linux/gpio/consumer.h>
 #include <linux/of_gpio.h>
 #include <linux/of_irq.h>
-#include <linux/platform_data/nxp-nci.h>
 #include <asm/unaligned.h>
 
 #include <net/nfc/nfc.h>
@@ -304,7 +303,6 @@ static int nxp_nci_i2c_probe(struct i2c_client *client,
 			    const struct i2c_device_id *id)
 {
 	struct nxp_nci_i2c_phy *phy;
-	struct nxp_nci_nfc_platform_data *pdata;
 	int r;
 
 	if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) {
@@ -323,17 +321,12 @@ static int nxp_nci_i2c_probe(struct i2c_client *client,
 	phy->i2c_dev = client;
 	i2c_set_clientdata(client, phy);
 
-	pdata = client->dev.platform_data;
-
-	if (!pdata && client->dev.of_node) {
+	if (client->dev.of_node) {
 		r = nxp_nci_i2c_parse_devtree(client);
 		if (r < 0) {
 			nfc_err(&client->dev, "Failed to get DT data\n");
 			goto probe_exit;
 		}
-	} else if (pdata) {
-		phy->gpio_en = pdata->gpio_en;
-		phy->gpio_fw = pdata->gpio_fw;
 	} else if (ACPI_HANDLE(&client->dev)) {
 		r = nxp_nci_i2c_acpi_config(phy);
 		if (r < 0)
diff --git a/drivers/nfc/nxp-nci/nxp-nci.h b/drivers/nfc/nxp-nci/nxp-nci.h
index 6fe7c45544bf..ae3fb2735a4e 100644
--- a/drivers/nfc/nxp-nci/nxp-nci.h
+++ b/drivers/nfc/nxp-nci/nxp-nci.h
@@ -14,7 +14,6 @@
 #include <linux/completion.h>
 #include <linux/firmware.h>
 #include <linux/nfc.h>
-#include <linux/platform_data/nxp-nci.h>
 
 #include <net/nfc/nci_core.h>
 
diff --git a/include/linux/platform_data/nxp-nci.h b/include/linux/platform_data/nxp-nci.h
deleted file mode 100644
index 97827ad468e2..000000000000
--- a/include/linux/platform_data/nxp-nci.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Generic platform data for the NXP NCI NFC chips.
- *
- * Copyright (C) 2014  NXP Semiconductors  All rights reserved.
- *
- * Authors: Clément Perrochaud <clement.perrochaud@nxp.com>
- */
-
-#ifndef _NXP_NCI_H_
-#define _NXP_NCI_H_
-
-struct nxp_nci_nfc_platform_data {
-	unsigned int gpio_en;
-	unsigned int gpio_fw;
-	unsigned int irq;
-};
-
-#endif /* _NXP_NCI_H_ */
-- 
2.20.1


^ permalink raw reply related

* [PATCH v3 05/14] NFC: nxp-nci: Add GPIO ACPI mapping table
From: Andy Shevchenko @ 2019-07-25 19:35 UTC (permalink / raw)
  To: Clément Perrochaud, Charles Gorand, netdev, David S. Miller,
	Sedat Dilek
  Cc: Andy Shevchenko, Sedat Dilek
In-Reply-To: <20190725193511.64274-1-andriy.shevchenko@linux.intel.com>

In order to unify GPIO resource request prepare gpiod_get_index()
to behave correctly when there is no mapping provided by firmware.

Here we add explicit mapping between _CRS GpioIo() resources and
their names used in the driver.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Tested-by: Sedat Dilek <sedat.dilek@gmail.com>
---
 drivers/nfc/nxp-nci/i2c.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/drivers/nfc/nxp-nci/i2c.c b/drivers/nfc/nxp-nci/i2c.c
index 713c267acf88..7344405feddf 100644
--- a/drivers/nfc/nxp-nci/i2c.c
+++ b/drivers/nfc/nxp-nci/i2c.c
@@ -247,6 +247,15 @@ static irqreturn_t nxp_nci_i2c_irq_thread_fn(int irq, void *phy_id)
 	return IRQ_NONE;
 }
 
+static const struct acpi_gpio_params firmware_gpios = { 1, 0, false };
+static const struct acpi_gpio_params enable_gpios = { 2, 0, false };
+
+static const struct acpi_gpio_mapping acpi_nxp_nci_gpios[] = {
+	{ "enable-gpios", &enable_gpios, 1 },
+	{ "firmware-gpios", &firmware_gpios, 1 },
+	{ }
+};
+
 static int nxp_nci_i2c_parse_devtree(struct i2c_client *client)
 {
 	struct nxp_nci_i2c_phy *phy = i2c_get_clientdata(client);
@@ -269,9 +278,14 @@ static int nxp_nci_i2c_parse_devtree(struct i2c_client *client)
 static int nxp_nci_i2c_acpi_config(struct nxp_nci_i2c_phy *phy)
 {
 	struct i2c_client *client = phy->i2c_dev;
+	int r;
 
-	phy->gpiod_en = devm_gpiod_get_index(&client->dev, NULL, 2, GPIOD_OUT_LOW);
-	phy->gpiod_fw = devm_gpiod_get_index(&client->dev, NULL, 1, GPIOD_OUT_LOW);
+	r = devm_acpi_dev_add_driver_gpios(&client->dev, acpi_nxp_nci_gpios);
+	if (r)
+		return r;
+
+	phy->gpiod_en = devm_gpiod_get(&client->dev, "enable", GPIOD_OUT_LOW);
+	phy->gpiod_fw = devm_gpiod_get(&client->dev, "firmware", GPIOD_OUT_LOW);
 
 	if (IS_ERR(phy->gpiod_en) || IS_ERR(phy->gpiod_fw)) {
 		nfc_err(&client->dev, "No GPIOs\n");
-- 
2.20.1


^ permalink raw reply related

* RE: [EXT] Re: [PATCH v6 rdma-next 5/6] RDMA/qedr: Add doorbell overflow recovery support
From: Michal Kalderon @ 2019-07-25 19:38 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Ariel Elior, dledford@redhat.com, galpress@amazon.com,
	linux-rdma@vger.kernel.org, davem@davemloft.net,
	netdev@vger.kernel.org
In-Reply-To: <20190725180106.GB18757@ziepe.ca>

> From: Jason Gunthorpe <jgg@ziepe.ca>
> Sent: Thursday, July 25, 2019 9:01 PM
> 
> External Email
> 
> ----------------------------------------------------------------------
> On Tue, Jul 09, 2019 at 05:17:34PM +0300, Michal Kalderon wrote:
> 
> > +static int qedr_init_user_db_rec(struct ib_udata *udata,
> > +				 struct qedr_dev *dev, struct qedr_userq *q,
> > +				 bool requires_db_rec)
> > +{
> > +	struct qedr_ucontext *uctx =
> > +		rdma_udata_to_drv_context(udata, struct qedr_ucontext,
> > +					  ibucontext);
> > +
> > +	/* Aborting for non doorbell userqueue (SRQ) or non-supporting lib
> */
> > +	if (requires_db_rec == 0 || !uctx->db_rec)
> > +		return 0;
> > +
> > +	/* Allocate a page for doorbell recovery, add to mmap ) */
> > +	q->db_rec_data = (void *)get_zeroed_page(GFP_KERNEL);
> 
> I now think this needs to be GFP_USER and our other drivers have a bug here
> as well..
Ok, will fix this.

> 
> Jason

^ permalink raw reply

* RE: [EXT] Re: [PATCH v6 rdma-next 0/6] RDMA/qedr: Use the doorbell overflow recovery mechanism for RDMA
From: Michal Kalderon @ 2019-07-25 19:40 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Ariel Elior, dledford@redhat.com, galpress@amazon.com,
	linux-rdma@vger.kernel.org, davem@davemloft.net,
	netdev@vger.kernel.org, Bernard Metzler
In-Reply-To: <20190725180148.GA20288@ziepe.ca>

> From: Jason Gunthorpe <jgg@ziepe.ca>
> Sent: Thursday, July 25, 2019 9:02 PM
> 
> External Email
> 
> ----------------------------------------------------------------------
> On Tue, Jul 09, 2019 at 05:17:29PM +0300, Michal Kalderon wrote:
> > This patch series uses the doorbell overflow recovery mechanism
> > introduced in commit 36907cd5cd72 ("qed: Add doorbell overflow
> > recovery mechanism") for rdma ( RoCE and iWARP )
> >
> > The first three patches modify the core code to contain helper
> > functions for managing mmap_xa inserting, getting and freeing entries.
> > The code was taken almost as is from the efa driver.
> > There is still an open discussion on whether we should take this even
> > further and make the entire mmap generic. Until a decision is made, I
> > only created the database API and modified the efa and qedr driver to
> > use it. The doorbell recovery code will be based on the common code.
> >
> > Efa driver was compile tested only.
> >
> > rdma-core pull request #493
> >
> > Changes from V5:
> > - Switch between driver dealloc_ucontext and mmap_entries_remove.
> > - No need to verify the key after using the key to load an entry from
> >   the mmap_xa.
> > - Change mmap_free api to pass an 'entry' object.
> > - Add documentation for mmap_free and for newly exported functions.
> > - Fix some extra/missing line breaks.
> 
> Lets do SIW now as well, it has the same xa scheme copied from EFA
ok
> 
> Thanks,
> Jason

^ permalink raw reply

* [PATCH] sis900: add support for ethtool's EEPROM dump
From: Sergej Benilov @ 2019-07-25 19:48 UTC (permalink / raw)
  To: venza, netdev, andrew; +Cc: Sergej Benilov

Implement ethtool's EEPROM dump command (ethtool -e|--eeprom-dump).

Thx to Andrew Lunn for comments.

Signed-off-by: Sergej Benilov <sergej.benilov@googlemail.com>
---
 drivers/net/ethernet/sis/sis900.c | 68 +++++++++++++++++++++++++++++++
 1 file changed, 68 insertions(+)

diff --git a/drivers/net/ethernet/sis/sis900.c b/drivers/net/ethernet/sis/sis900.c
index 6e07f5ebacfc..85eaccbbbac1 100644
--- a/drivers/net/ethernet/sis/sis900.c
+++ b/drivers/net/ethernet/sis/sis900.c
@@ -191,6 +191,8 @@ struct sis900_private {
 	unsigned int tx_full; /* The Tx queue is full. */
 	u8 host_bridge_rev;
 	u8 chipset_rev;
+	/* EEPROM data */
+	int eeprom_size;
 };
 
 MODULE_AUTHOR("Jim Huang <cmhuang@sis.com.tw>, Ollie Lho <ollie@sis.com.tw>");
@@ -475,6 +477,8 @@ static int sis900_probe(struct pci_dev *pci_dev,
 	sis_priv->pci_dev = pci_dev;
 	spin_lock_init(&sis_priv->lock);
 
+	sis_priv->eeprom_size = 24;
+
 	pci_set_drvdata(pci_dev, net_dev);
 
 	ring_space = pci_alloc_consistent(pci_dev, TX_TOTAL_SIZE, &ring_dma);
@@ -2122,6 +2126,68 @@ static void sis900_get_wol(struct net_device *net_dev, struct ethtool_wolinfo *w
 	wol->supported = (WAKE_PHY | WAKE_MAGIC);
 }
 
+static int sis900_get_eeprom_len(struct net_device *dev)
+{
+	struct sis900_private *sis_priv = netdev_priv(dev);
+
+	return sis_priv->eeprom_size;
+}
+
+static int sis900_read_eeprom(struct net_device *net_dev, u8 *buf)
+{
+	struct sis900_private *sis_priv = netdev_priv(net_dev);
+	void __iomem *ioaddr = sis_priv->ioaddr;
+	int wait, ret = -EAGAIN;
+	u16 signature;
+	u16 *ebuf = (u16 *)buf;
+	int i;
+
+	if (sis_priv->chipset_rev == SIS96x_900_REV) {
+		sw32(mear, EEREQ);
+		for (wait = 0; wait < 2000; wait++) {
+			if (sr32(mear) & EEGNT) {
+				/* read 16 bits, and index by 16 bits */
+				for (i = 0; i < sis_priv->eeprom_size / 2; i++)
+					ebuf[i] = (u16)read_eeprom(ioaddr, i);
+				ret = 0;
+				break;
+			}
+			udelay(1);
+		}
+		sw32(mear, EEDONE);
+	} else {
+		signature = (u16)read_eeprom(ioaddr, EEPROMSignature);
+		if (signature != 0xffff && signature != 0x0000) {
+			/* read 16 bits, and index by 16 bits */
+			for (i = 0; i < sis_priv->eeprom_size / 2; i++)
+				ebuf[i] = (u16)read_eeprom(ioaddr, i);
+			ret = 0;
+		}
+	}
+	return ret;
+}
+
+#define SIS900_EEPROM_MAGIC	0xBABE
+static int sis900_get_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom, u8 *data)
+{
+	struct sis900_private *sis_priv = netdev_priv(dev);
+	u8 *eebuf;
+	int res;
+
+	eebuf = kmalloc(sis_priv->eeprom_size, GFP_KERNEL);
+	if (!eebuf)
+		return -ENOMEM;
+
+	eeprom->magic = SIS900_EEPROM_MAGIC;
+	spin_lock_irq(&sis_priv->lock);
+	res = sis900_read_eeprom(dev, eebuf);
+	spin_unlock_irq(&sis_priv->lock);
+	if (!res)
+		memcpy(data, eebuf + eeprom->offset, eeprom->len);
+	kfree(eebuf);
+	return res;
+}
+
 static const struct ethtool_ops sis900_ethtool_ops = {
 	.get_drvinfo 	= sis900_get_drvinfo,
 	.get_msglevel	= sis900_get_msglevel,
@@ -2132,6 +2198,8 @@ static const struct ethtool_ops sis900_ethtool_ops = {
 	.set_wol	= sis900_set_wol,
 	.get_link_ksettings = sis900_get_link_ksettings,
 	.set_link_ksettings = sis900_set_link_ksettings,
+	.get_eeprom_len = sis900_get_eeprom_len,
+	.get_eeprom = sis900_get_eeprom,
 };
 
 /**
-- 
2.17.1


^ permalink raw reply related

* Re: [PATCH] sis900: add support for ethtool --eeprom-dump
From: Sergej Benilov @ 2019-07-25 19:52 UTC (permalink / raw)
  To: Andrew Lunn; +Cc: venza, netdev
In-Reply-To: <20190725182029.GK21952@lunn.ch>

On Thu, 25 Jul 2019 at 20:20, Andrew Lunn <andrew@lunn.ch> wrote:
>
> On Thu, Jul 25, 2019 at 06:41:41PM +0200, Sergej Benilov wrote:
> > On Thu, 25 Jul 2019 at 18:25, Andrew Lunn <andrew@lunn.ch> wrote:
> > >
> > > > +static int sis900_read_eeprom(struct net_device *net_dev, u8 *buf)
> > > > +{
> > > > +     struct sis900_private *sis_priv = netdev_priv(net_dev);
> > > > +     void __iomem *ioaddr = sis_priv->ioaddr;
> > > > +     int wait, ret = -EAGAIN;
> > > > +     u16 signature;
> > > > +     u16 *ebuf = (u16 *)buf;
> > > > +     int i;
> > > > +
> > > > +     if (sis_priv->chipset_rev == SIS96x_900_REV) {
> > > > +             sw32(mear, EEREQ);
> > > > +             for (wait = 0; wait < 2000; wait++) {
> > > > +                     if (sr32(mear) & EEGNT) {
> > > > +                             /* read 16 bits, and index by 16 bits */
> > > > +                             for (i = 0; i < sis_priv->eeprom_size / 2; i++)
> > > > +                                     ebuf[i] = (u16)read_eeprom(ioaddr, i);
> > > > +                     ret = 0;
> > > > +                     break;
> > > > +                     }
> > > > +             udelay(1);
> > > > +             }
> > > > +     sw32(mear, EEDONE);
> > >
> > > The indentation looks all messed up here.
> >
> > This has passed ./scripts/checkpatch.pl, as you had suggested for the
> > previous patch.
>
> checkpatch just checks for things like tabs vs space.
>
> I would expect the indentation to be more like:
>
>
>         if (sis_priv->chipset_rev == SIS96x_900_REV) {
>                 sw32(mear, EEREQ);
>                 for (wait = 0; wait < 2000; wait++) {
>                         if (sr32(mear) & EEGNT) {
>                                 /* read 16 bits, and index by 16 bits */
>                                 for (i = 0; i < sis_priv->eeprom_size / 2; i++)
>                                         ebuf[i] = (u16)read_eeprom(ioaddr, i);
>                                 ret = 0;
>                                 break;
>                         }
>                         udelay(1);
>                 }
>                 sw32(mear, EEDONE);
>         } else {
>                 signature = (u16)read_eeprom(ioaddr, EEPROMSignature);
>                 if (signature != 0xffff && signature != 0x0000) {
>                         /* read 16 bits, and index by 16 bits */
>                         for (i = 0; i < sis_priv->eeprom_size / 2; i++)
>                                 ebuf[i] = (u16)read_eeprom(ioaddr, i);
>                         ret = 0;
>                 }
>         }
>         return ret;
>

Ok, I see now what you mean.
I fixed the alignment.

This patch is superseded.

> > > Why do you not put the data directly into data and avoid this memory
> > > allocation, and memcpy?
> >
> > Because EEPROM data from 'eeprom->offset' offset and of 'eeprom->len'
> > length only is expected to be returned in 'data'.
>
> O.K.
>
>         Andrew

^ permalink raw reply

* Re: [PATCH v6 rdma-next 1/6] RDMA/core: Create mmap database and cookie helper functions
From: Jason Gunthorpe @ 2019-07-25 19:52 UTC (permalink / raw)
  To: Michal Kalderon
  Cc: Kamal Heib, Ariel Elior, dledford@redhat.com, galpress@amazon.com,
	linux-rdma@vger.kernel.org, davem@davemloft.net,
	netdev@vger.kernel.org
In-Reply-To: <MN2PR18MB3182469DB08CD20B56C9697FA1C10@MN2PR18MB3182.namprd18.prod.outlook.com>

On Thu, Jul 25, 2019 at 07:34:15PM +0000, Michal Kalderon wrote:
> > > +	ibdev_dbg(ucontext->device,
> > > +		  "mmap: obj[0x%p] key[%#llx] addr[%#llx] len[%#llx]
> > removed\n",
> > > +		  entry->obj, key, entry->address, entry->length);
> > > +
> > > +	return entry;
> > > +}
> > > +EXPORT_SYMBOL(rdma_user_mmap_entry_get);
> > 
> > It is a mistake we keep making, and maybe the war is hopelessly lost now,
> > but functions called from a driver should not be part of the ib_uverbs module
> > - ideally uverbs is an optional module. They should be in ib_core.
> > 
> > Maybe put this in ib_core_uverbs.c ?

> But if there isn't ib_uverbs user apps can't be run right ? and then
> these functions Won't get called anyway ?

Right, but, we don't want loading the driver to force creating
/dev/infiniband/uverbs - so the driver support component of uverbs
should live in ib_core, and the /dev/ component should be in ib_uverbs

> > > +	xa_lock(&ucontext->mmap_xa);
> > > +	if (check_add_overflow(ucontext->mmap_xa_page,
> > > +			       (u32)(length >> PAGE_SHIFT),
> > 
> > Should this be divide round up ?

> For cases that length is not rounded to PAGE_SHIFT? 

It should never happen, but yes
 
> > 
> > > +			       &next_mmap_page))
> > > +		goto err_unlock;
> > 
> > I still don't like that this algorithm latches into a permanent failure when the
> > xa_page wraps.
> > 
> > It seems worth spending a bit more time here to tidy this.. Keep using the
> > mmap_xa_page scheme, but instead do something like
> > 
> > alloc_cyclic_range():
> > 
> > while () {
> >    // Find first empty element in a cyclic way
> >    xa_page_first = mmap_xa_page;
> >    xa_find(xa, &xa_page_first, U32_MAX, XA_FREE_MARK)
> > 
> >    // Is there a enough room to have the range?
> >    if (check_add_overflow(xa_page_first, npages, &xa_page_end)) {
> >       mmap_xa_page = 0;
> >       continue;
> >    }
> > 
> >    // See if the element before intersects
> >    elm = xa_find(xa, &zero, xa_page_end, 0);
> >    if (elm && intersects(xa_page_first, xa_page_last, elm->first, elm->last)) {
> >       mmap_xa_page = elm->last + 1;
> >       continue
> >    }
> > 
> >    // xa_page_first -> xa_page_end should now be free
> >    xa_insert(xa, xa_page_start, entry);
> >    mmap_xa_page = xa_page_end + 1;
> >    return xa_page_start;
> > }
> > 
> > Approximately, please check it.

> But we don't free entires from the xa_array ( only when ucontext is destroyed) so how will 
> There be an empty element after we wrap ?  

Oh!

That should be fixed up too, in the general case if a user is
creating/destroying driver objects in loop we don't want memory usage
to be unbounded.

The rdma_user_mmap stuff has VMA ops that can refcount the xa entry
and now that this is core code it is easy enough to harmonize the two
things and track the xa side from the struct rdma_umap_priv

The question is, does EFA or qedr have a use model for this that
allows a userspace verb to create/destroy in a loop? ie do we need to
fix this right now?

Jason

^ permalink raw reply

* Re: [PATCH bpf-next v2 1/7] bpf/flow_dissector: pass input flags to BPF flow dissector program
From: Alexei Starovoitov @ 2019-07-25 19:58 UTC (permalink / raw)
  To: Stanislav Fomichev
  Cc: netdev, bpf, davem, ast, daniel, Willem de Bruijn, Song Liu,
	Petar Penkov
In-Reply-To: <20190725153342.3571-2-sdf@google.com>

On Thu, Jul 25, 2019 at 08:33:36AM -0700, Stanislav Fomichev wrote:
> C flow dissector supports input flags that tell it to customize parsing
> by either stopping early or trying to parse as deep as possible. Pass
> those flags to the BPF flow dissector so it can make the same
> decisions. In the next commits I'll add support for those flags to
> our reference bpf_flow.c
> 
> Acked-by: Willem de Bruijn <willemb@google.com>
> Acked-by: Song Liu <songliubraving@fb.com>
> Cc: Song Liu <songliubraving@fb.com>
> Cc: Willem de Bruijn <willemb@google.com>
> Cc: Petar Penkov <ppenkov@google.com>
> Signed-off-by: Stanislav Fomichev <sdf@google.com>
> ---
>  include/linux/skbuff.h       | 2 +-
>  include/net/flow_dissector.h | 4 ----
>  include/uapi/linux/bpf.h     | 5 +++++
>  net/bpf/test_run.c           | 2 +-
>  net/core/flow_dissector.c    | 5 +++--
>  5 files changed, 10 insertions(+), 8 deletions(-)
> 
> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> index 718742b1c505..9b7a8038beec 100644
> --- a/include/linux/skbuff.h
> +++ b/include/linux/skbuff.h
> @@ -1271,7 +1271,7 @@ static inline int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr)
>  
>  struct bpf_flow_dissector;
>  bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx,
> -		      __be16 proto, int nhoff, int hlen);
> +		      __be16 proto, int nhoff, int hlen, unsigned int flags);
>  
>  bool __skb_flow_dissect(const struct net *net,
>  			const struct sk_buff *skb,
> diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
> index 90bd210be060..3e2642587b76 100644
> --- a/include/net/flow_dissector.h
> +++ b/include/net/flow_dissector.h
> @@ -253,10 +253,6 @@ enum flow_dissector_key_id {
>  	FLOW_DISSECTOR_KEY_MAX,
>  };
>  
> -#define FLOW_DISSECTOR_F_PARSE_1ST_FRAG		BIT(0)
> -#define FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL	BIT(1)
> -#define FLOW_DISSECTOR_F_STOP_AT_ENCAP		BIT(2)
> -
>  struct flow_dissector_key {
>  	enum flow_dissector_key_id key_id;
>  	size_t offset; /* offset of struct flow_dissector_key_*
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index fa1c753dcdbc..b4ad19bd6aa8 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -3507,6 +3507,10 @@ enum bpf_task_fd_type {
>  	BPF_FD_TYPE_URETPROBE,		/* filename + offset */
>  };
>  
> +#define FLOW_DISSECTOR_F_PARSE_1ST_FRAG		(1U << 0)
> +#define FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL	(1U << 1)
> +#define FLOW_DISSECTOR_F_STOP_AT_ENCAP		(1U << 2)
> +

I'm a bit concerned with direct move.
Last time we were in similar situation we've created:
enum {
        BPF_TCP_ESTABLISHED = 1,
        BPF_TCP_SYN_SENT,

and added:
        BUILD_BUG_ON((int)BPF_TCP_ESTABLISHED != (int)TCP_ESTABLISHED);
        BUILD_BUG_ON((int)BPF_TCP_SYN_SENT != (int)TCP_SYN_SENT);

It may be overkill here, but feels safer than direct move.
Adding BPF_ prefix also feels necessary to avoid very unlikely
(but still theoretically possible) conflicts.


^ permalink raw reply

* Re: [PATCH bpf-next v2 1/7] bpf/flow_dissector: pass input flags to BPF flow dissector program
From: Stanislav Fomichev @ 2019-07-25 20:03 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Stanislav Fomichev, netdev, bpf, davem, ast, daniel,
	Willem de Bruijn, Song Liu, Petar Penkov
In-Reply-To: <20190725195856.ttdt75dxwhawjqvi@ast-mbp>

On 07/25, Alexei Starovoitov wrote:
> On Thu, Jul 25, 2019 at 08:33:36AM -0700, Stanislav Fomichev wrote:
> > C flow dissector supports input flags that tell it to customize parsing
> > by either stopping early or trying to parse as deep as possible. Pass
> > those flags to the BPF flow dissector so it can make the same
> > decisions. In the next commits I'll add support for those flags to
> > our reference bpf_flow.c
> > 
> > Acked-by: Willem de Bruijn <willemb@google.com>
> > Acked-by: Song Liu <songliubraving@fb.com>
> > Cc: Song Liu <songliubraving@fb.com>
> > Cc: Willem de Bruijn <willemb@google.com>
> > Cc: Petar Penkov <ppenkov@google.com>
> > Signed-off-by: Stanislav Fomichev <sdf@google.com>
> > ---
> >  include/linux/skbuff.h       | 2 +-
> >  include/net/flow_dissector.h | 4 ----
> >  include/uapi/linux/bpf.h     | 5 +++++
> >  net/bpf/test_run.c           | 2 +-
> >  net/core/flow_dissector.c    | 5 +++--
> >  5 files changed, 10 insertions(+), 8 deletions(-)
> > 
> > diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> > index 718742b1c505..9b7a8038beec 100644
> > --- a/include/linux/skbuff.h
> > +++ b/include/linux/skbuff.h
> > @@ -1271,7 +1271,7 @@ static inline int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr)
> >  
> >  struct bpf_flow_dissector;
> >  bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx,
> > -		      __be16 proto, int nhoff, int hlen);
> > +		      __be16 proto, int nhoff, int hlen, unsigned int flags);
> >  
> >  bool __skb_flow_dissect(const struct net *net,
> >  			const struct sk_buff *skb,
> > diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
> > index 90bd210be060..3e2642587b76 100644
> > --- a/include/net/flow_dissector.h
> > +++ b/include/net/flow_dissector.h
> > @@ -253,10 +253,6 @@ enum flow_dissector_key_id {
> >  	FLOW_DISSECTOR_KEY_MAX,
> >  };
> >  
> > -#define FLOW_DISSECTOR_F_PARSE_1ST_FRAG		BIT(0)
> > -#define FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL	BIT(1)
> > -#define FLOW_DISSECTOR_F_STOP_AT_ENCAP		BIT(2)
> > -
> >  struct flow_dissector_key {
> >  	enum flow_dissector_key_id key_id;
> >  	size_t offset; /* offset of struct flow_dissector_key_*
> > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > index fa1c753dcdbc..b4ad19bd6aa8 100644
> > --- a/include/uapi/linux/bpf.h
> > +++ b/include/uapi/linux/bpf.h
> > @@ -3507,6 +3507,10 @@ enum bpf_task_fd_type {
> >  	BPF_FD_TYPE_URETPROBE,		/* filename + offset */
> >  };
> >  
> > +#define FLOW_DISSECTOR_F_PARSE_1ST_FRAG		(1U << 0)
> > +#define FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL	(1U << 1)
> > +#define FLOW_DISSECTOR_F_STOP_AT_ENCAP		(1U << 2)
> > +
> 
> I'm a bit concerned with direct move.
> Last time we were in similar situation we've created:
> enum {
>         BPF_TCP_ESTABLISHED = 1,
>         BPF_TCP_SYN_SENT,
> 
> and added:
>         BUILD_BUG_ON((int)BPF_TCP_ESTABLISHED != (int)TCP_ESTABLISHED);
>         BUILD_BUG_ON((int)BPF_TCP_SYN_SENT != (int)TCP_SYN_SENT);
> 
> It may be overkill here, but feels safer than direct move.
> Adding BPF_ prefix also feels necessary to avoid very unlikely
> (but still theoretically possible) conflicts.
Sounds good, thanks for the pointers, will do the same here!

^ permalink raw reply

* Re: [PATCH] sis900: add support for ethtool's EEPROM dump
From: Andrew Lunn @ 2019-07-25 20:03 UTC (permalink / raw)
  To: Sergej Benilov; +Cc: venza, netdev
In-Reply-To: <20190725194806.17964-1-sergej.benilov@googlemail.com>

On Thu, Jul 25, 2019 at 09:48:06PM +0200, Sergej Benilov wrote:
> Implement ethtool's EEPROM dump command (ethtool -e|--eeprom-dump).
> 
> Thx to Andrew Lunn for comments.
> 
> Signed-off-by: Sergej Benilov <sergej.benilov@googlemail.com>

Reviewed-by: Andrew Lunn <andrew@lunn.ch>

    Andrew

^ permalink raw reply

* [pull request][net 0/9] Mellanox, mlx5 fixes 2019-07-25
From: Saeed Mahameed @ 2019-07-25 20:36 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev@vger.kernel.org, Jakub Kicinski, Saeed Mahameed

Hi Dave,

This series introduces some fixes to mlx5 driver.

1) Ariel is addressing an issue with enacp flow counter race condition
2) Aya fixes ethtool speed handling
3) Edward fixes modify_cq hw bits alignment 
4) Maor fixes RDMA_RX capabilities handling
5) Mark reverses unregister devices order to address an issue with LAG
6) From Tariq,
  - wrong max num channels indication regression
  - TLS counters naming and documentation as suggested by Jakub
  - kTLS, Call WARN_ONCE on netdev mismatch

There is one patch in this series that touches nfp driver to align
TLS statistics names with latest documentation, Jakub is CC'ed.

Please pull and let me know if there is any problem.

For -stable v4.9:
  ('net/mlx5: Use reversed order when unregister devices')

For -stable v4.20
  ('net/mlx5e: Prevent encap flow counter update async to user query')
  ('net/mlx5: Fix modify_cq_in alignment')

For -stable v5.1
  ('net/mlx5e: Fix matching of speed to PRM link modes')

For -stable v5.2
  ('net/mlx5: Add missing RDMA_RX capabilities')

Thanks,
Saeed.

---
The following changes since commit 5f9e832c137075045d15cd6899ab0505cfb2ca4b:

  Linus 5.3-rc1 (2019-07-21 14:05:38 -0700)

are available in the Git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux.git tags/mlx5-fixes-2019-07-25

for you to fetch changes up to 280c089916228a005af7f95c1716ea1fea1027b5:

  Documentation: TLS: fix stat counters description (2019-07-25 13:31:01 -0700)

----------------------------------------------------------------
mlx5-fixes-2019-07-25

----------------------------------------------------------------
Ariel Levkovich (1):
      net/mlx5e: Prevent encap flow counter update async to user query

Aya Levin (1):
      net/mlx5e: Fix matching of speed to PRM link modes

Edward Srouji (1):
      net/mlx5: Fix modify_cq_in alignment

Maor Gottlieb (1):
      net/mlx5: Add missing RDMA_RX capabilities

Mark Zhang (1):
      net/mlx5: Use reversed order when unregister devices

Tariq Toukan (4):
      net/mlx5e: Fix wrong max num channels indication
      net/mlx5e: kTLS, Call WARN_ONCE on netdev mismatch
      nfp: tls: rename tls packet counters
      Documentation: TLS: fix stat counters description

 Documentation/networking/tls-offload.rst           | 23 ++++++--
 drivers/net/ethernet/mellanox/mlx5/core/dev.c      |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en.h       | 12 +---
 .../net/ethernet/mellanox/mlx5/core/en/params.h    |  5 +-
 drivers/net/ethernet/mellanox/mlx5/core/en/port.c  | 27 ++++++---
 drivers/net/ethernet/mellanox/mlx5/core/en/port.h  |  6 +-
 .../ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c |  2 +-
 .../net/ethernet/mellanox/mlx5/core/en_ethtool.c   | 69 +++++++++++++++-------
 .../ethernet/mellanox/mlx5/core/en_fs_ethtool.c    |  3 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  | 36 +++++------
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c   |  2 +
 drivers/net/ethernet/mellanox/mlx5/core/en_stats.c |  8 +--
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c    |  4 +-
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.h  |  5 +-
 .../net/ethernet/mellanox/mlx5/core/fs_counters.c  |  5 ++
 .../net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c  |  7 +--
 .../ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c |  1 +
 .../net/ethernet/netronome/nfp/nfp_net_ethtool.c   |  4 +-
 include/linux/mlx5/fs.h                            |  1 +
 include/linux/mlx5/mlx5_ifc.h                      |  6 +-
 20 files changed, 139 insertions(+), 89 deletions(-)

^ permalink raw reply

* [net 1/9] net/mlx5: Use reversed order when unregister devices
From: Saeed Mahameed @ 2019-07-25 20:36 UTC (permalink / raw)
  To: David S. Miller
  Cc: netdev@vger.kernel.org, Jakub Kicinski, Mark Zhang, Parav Pandit,
	Leon Romanovsky, Saeed Mahameed
In-Reply-To: <20190725203618.11011-1-saeedm@mellanox.com>

From: Mark Zhang <markz@mellanox.com>

When lag is active, which is controlled by the bonded mlx5e netdev, mlx5
interface unregestering must happen in the reverse order where rdma is
unregistered (unloaded) first, to guarantee all references to the lag
context in hardware is removed, then remove mlx5e netdev interface which
will cleanup the lag context from hardware.

Without this fix during destroy of LAG interface, we observed following
errors:
 * mlx5_cmd_check:752:(pid 12556): DESTROY_LAG(0x843) op_mod(0x0) failed,
   status bad parameter(0x3), syndrome (0xe4ac33)
 * mlx5_cmd_check:752:(pid 12556): DESTROY_LAG(0x843) op_mod(0x0) failed,
   status bad parameter(0x3), syndrome (0xa5aee8).

Fixes: a31208b1e11d ("net/mlx5_core: New init and exit flow for mlx5_core")
Reviewed-by: Parav Pandit <parav@mellanox.com>
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Mark Zhang <markz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
index 5bb6a26ea267..50862275544e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
@@ -213,7 +213,7 @@ void mlx5_unregister_device(struct mlx5_core_dev *dev)
 	struct mlx5_interface *intf;
 
 	mutex_lock(&mlx5_intf_mutex);
-	list_for_each_entry(intf, &intf_list, list)
+	list_for_each_entry_reverse(intf, &intf_list, list)
 		mlx5_remove_device(intf, priv);
 	list_del(&priv->dev_list);
 	mutex_unlock(&mlx5_intf_mutex);
-- 
2.21.0


^ permalink raw reply related

* [net 2/9] net/mlx5: Add missing RDMA_RX capabilities
From: Saeed Mahameed @ 2019-07-25 20:36 UTC (permalink / raw)
  To: David S. Miller
  Cc: netdev@vger.kernel.org, Jakub Kicinski, Maor Gottlieb,
	Saeed Mahameed
In-Reply-To: <20190725203618.11011-1-saeedm@mellanox.com>

From: Maor Gottlieb <maorg@mellanox.com>

New flow table type RDMA_RX was added but the MLX5_CAP_FLOW_TABLE_TYPE
didn't handle this new flow table type.
This means that MLX5_CAP_FLOW_TABLE_TYPE returns an empty capability to
this flow table type.

Update both the macro and the maximum supported flow table type to
RDMA_RX.

Fixes: d83eb50e29de ("net/mlx5: Add support in RDMA RX steering")
Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index c48c382f926f..c1252d6be0ef 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -68,7 +68,7 @@ enum fs_flow_table_type {
 	FS_FT_SNIFFER_RX	= 0X5,
 	FS_FT_SNIFFER_TX	= 0X6,
 	FS_FT_RDMA_RX		= 0X7,
-	FS_FT_MAX_TYPE = FS_FT_SNIFFER_TX,
+	FS_FT_MAX_TYPE = FS_FT_RDMA_RX,
 };
 
 enum fs_flow_table_op_mod {
@@ -275,7 +275,8 @@ void mlx5_cleanup_fs(struct mlx5_core_dev *dev);
 	(type == FS_FT_FDB) ? MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, cap) :		\
 	(type == FS_FT_SNIFFER_RX) ? MLX5_CAP_FLOWTABLE_SNIFFER_RX(mdev, cap) :		\
 	(type == FS_FT_SNIFFER_TX) ? MLX5_CAP_FLOWTABLE_SNIFFER_TX(mdev, cap) :		\
-	(BUILD_BUG_ON_ZERO(FS_FT_SNIFFER_TX != FS_FT_MAX_TYPE))\
+	(type == FS_FT_RDMA_RX) ? MLX5_CAP_FLOWTABLE_RDMA_RX(mdev, cap) :		\
+	(BUILD_BUG_ON_ZERO(FS_FT_RDMA_RX != FS_FT_MAX_TYPE))\
 	)
 
 #endif
-- 
2.21.0


^ permalink raw reply related

* [net 3/9] net/mlx5: Fix modify_cq_in alignment
From: Saeed Mahameed @ 2019-07-25 20:36 UTC (permalink / raw)
  To: David S. Miller
  Cc: netdev@vger.kernel.org, Jakub Kicinski, Edward Srouji,
	stable@vger.kernel.org, Yishai Hadas, Leon Romanovsky,
	Saeed Mahameed
In-Reply-To: <20190725203618.11011-1-saeedm@mellanox.com>

From: Edward Srouji <edwards@mellanox.com>

Fix modify_cq_in alignment to match the device specification.
After this fix the 'cq_umem_valid' field will be in the right offset.

Cc: <stable@vger.kernel.org> # 4.19
Fixes: bd37197554eb ("net/mlx5: Update mlx5_ifc with DEVX UID bits")
Signed-off-by: Edward Srouji <edwards@mellanox.com>
Reviewed-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/mlx5_ifc.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index b3d5752657d9..ec571fd7fcf8 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -5975,10 +5975,12 @@ struct mlx5_ifc_modify_cq_in_bits {
 
 	struct mlx5_ifc_cqc_bits cq_context;
 
-	u8         reserved_at_280[0x40];
+	u8         reserved_at_280[0x60];
 
 	u8         cq_umem_valid[0x1];
-	u8         reserved_at_2c1[0x5bf];
+	u8         reserved_at_2e1[0x1f];
+
+	u8         reserved_at_300[0x580];
 
 	u8         pas[0][0x40];
 };
-- 
2.21.0


^ permalink raw reply related

* [net 4/9] net/mlx5e: Fix wrong max num channels indication
From: Saeed Mahameed @ 2019-07-25 20:36 UTC (permalink / raw)
  To: David S. Miller
  Cc: netdev@vger.kernel.org, Jakub Kicinski, Tariq Toukan,
	Saeed Mahameed
In-Reply-To: <20190725203618.11011-1-saeedm@mellanox.com>

From: Tariq Toukan <tariqt@mellanox.com>

No XSK support in the enhanced IPoIB driver and representors.
Add a profile property to specify this, and enhance the logic
that calculates the max number of channels to take it into
account.

Fixes: db05815b36cb ("net/mlx5e: Add XSK zero-copy support")
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h  | 12 ++-----
 .../ethernet/mellanox/mlx5/core/en/params.h   |  5 +--
 .../ethernet/mellanox/mlx5/core/en_ethtool.c  |  2 +-
 .../mellanox/mlx5/core/en_fs_ethtool.c        |  3 +-
 .../net/ethernet/mellanox/mlx5/core/en_main.c | 36 +++++++++----------
 .../net/ethernet/mellanox/mlx5/core/en_rep.c  |  2 ++
 .../ethernet/mellanox/mlx5/core/en_stats.c    |  8 ++---
 .../ethernet/mellanox/mlx5/core/ipoib/ipoib.c |  7 ++--
 .../mellanox/mlx5/core/ipoib/ipoib_vlan.c     |  1 +
 9 files changed, 35 insertions(+), 41 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 79d93d6c7d7a..ce1be2a84231 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -159,7 +159,7 @@ do {                                                            \
 enum mlx5e_rq_group {
 	MLX5E_RQ_GROUP_REGULAR,
 	MLX5E_RQ_GROUP_XSK,
-	MLX5E_NUM_RQ_GROUPS /* Keep last. */
+#define MLX5E_NUM_RQ_GROUPS(g) (1 + MLX5E_RQ_GROUP_##g)
 };
 
 static inline u16 mlx5_min_rx_wqes(int wq_type, u32 wq_size)
@@ -182,14 +182,6 @@ static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev)
 		min_t(int, mlx5_comp_vectors_count(mdev), MLX5E_MAX_NUM_CHANNELS);
 }
 
-/* Use this function to get max num channels after netdev was created */
-static inline int mlx5e_get_netdev_max_channels(struct net_device *netdev)
-{
-	return min_t(unsigned int,
-		     netdev->num_rx_queues / MLX5E_NUM_RQ_GROUPS,
-		     netdev->num_tx_queues);
-}
-
 struct mlx5e_tx_wqe {
 	struct mlx5_wqe_ctrl_seg ctrl;
 	struct mlx5_wqe_eth_seg  eth;
@@ -830,6 +822,7 @@ struct mlx5e_priv {
 	struct net_device         *netdev;
 	struct mlx5e_stats         stats;
 	struct mlx5e_channel_stats channel_stats[MLX5E_MAX_NUM_CHANNELS];
+	u16                        max_nch;
 	u8                         max_opened_tc;
 	struct hwtstamp_config     tstamp;
 	u16                        q_counter;
@@ -871,6 +864,7 @@ struct mlx5e_profile {
 		mlx5e_fp_handle_rx_cqe handle_rx_cqe_mpwqe;
 	} rx_handlers;
 	int	max_tc;
+	u8	rq_groups;
 };
 
 void mlx5e_build_ptys2ethtool_map(void);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
index bd882b5ee9a7..3a615d663d84 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
@@ -66,9 +66,10 @@ static inline void mlx5e_qid_get_ch_and_group(struct mlx5e_params *params,
 	*group = qid / nch;
 }
 
-static inline bool mlx5e_qid_validate(struct mlx5e_params *params, u64 qid)
+static inline bool mlx5e_qid_validate(const struct mlx5e_profile *profile,
+				      struct mlx5e_params *params, u64 qid)
 {
-	return qid < params->num_channels * MLX5E_NUM_RQ_GROUPS;
+	return qid < params->num_channels * profile->rq_groups;
 }
 
 /* Parameter calculations */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 126ec4181286..ed25757ac5bd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -391,7 +391,7 @@ void mlx5e_ethtool_get_channels(struct mlx5e_priv *priv,
 {
 	mutex_lock(&priv->state_lock);
 
-	ch->max_combined   = mlx5e_get_netdev_max_channels(priv->netdev);
+	ch->max_combined   = priv->max_nch;
 	ch->combined_count = priv->channels.params.num_channels;
 	if (priv->xsk.refcnt) {
 		/* The upper half are XSK queues. */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
index ea3a490b569a..94304abc49e9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
@@ -611,7 +611,8 @@ static int validate_flow(struct mlx5e_priv *priv,
 		return -ENOSPC;
 
 	if (fs->ring_cookie != RX_CLS_FLOW_DISC)
-		if (!mlx5e_qid_validate(&priv->channels.params, fs->ring_cookie))
+		if (!mlx5e_qid_validate(priv->profile, &priv->channels.params,
+					fs->ring_cookie))
 			return -EINVAL;
 
 	switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT)) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 47eea6b3a1c3..570c42b7eeea 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -1677,10 +1677,10 @@ static int mlx5e_open_sqs(struct mlx5e_channel *c,
 			  struct mlx5e_channel_param *cparam)
 {
 	struct mlx5e_priv *priv = c->priv;
-	int err, tc, max_nch = mlx5e_get_netdev_max_channels(priv->netdev);
+	int err, tc;
 
 	for (tc = 0; tc < params->num_tc; tc++) {
-		int txq_ix = c->ix + tc * max_nch;
+		int txq_ix = c->ix + tc * priv->max_nch;
 
 		err = mlx5e_open_txqsq(c, c->priv->tisn[tc], txq_ix,
 				       params, &cparam->sq, &c->sq[tc], tc);
@@ -2438,11 +2438,10 @@ int mlx5e_create_indirect_rqt(struct mlx5e_priv *priv)
 
 int mlx5e_create_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs)
 {
-	const int max_nch = mlx5e_get_netdev_max_channels(priv->netdev);
 	int err;
 	int ix;
 
-	for (ix = 0; ix < max_nch; ix++) {
+	for (ix = 0; ix < priv->max_nch; ix++) {
 		err = mlx5e_create_rqt(priv, 1 /*size */, &tirs[ix].rqt);
 		if (unlikely(err))
 			goto err_destroy_rqts;
@@ -2460,10 +2459,9 @@ int mlx5e_create_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs)
 
 void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs)
 {
-	const int max_nch = mlx5e_get_netdev_max_channels(priv->netdev);
 	int i;
 
-	for (i = 0; i < max_nch; i++)
+	for (i = 0; i < priv->max_nch; i++)
 		mlx5e_destroy_rqt(priv, &tirs[i].rqt);
 }
 
@@ -2557,7 +2555,7 @@ static void mlx5e_redirect_rqts(struct mlx5e_priv *priv,
 		mlx5e_redirect_rqt(priv, rqtn, MLX5E_INDIR_RQT_SIZE, rrp);
 	}
 
-	for (ix = 0; ix < mlx5e_get_netdev_max_channels(priv->netdev); ix++) {
+	for (ix = 0; ix < priv->max_nch; ix++) {
 		struct mlx5e_redirect_rqt_param direct_rrp = {
 			.is_rss = false,
 			{
@@ -2758,7 +2756,7 @@ static int mlx5e_modify_tirs_lro(struct mlx5e_priv *priv)
 			goto free_in;
 	}
 
-	for (ix = 0; ix < mlx5e_get_netdev_max_channels(priv->netdev); ix++) {
+	for (ix = 0; ix < priv->max_nch; ix++) {
 		err = mlx5_core_modify_tir(mdev, priv->direct_tir[ix].tirn,
 					   in, inlen);
 		if (err)
@@ -2858,12 +2856,11 @@ static void mlx5e_netdev_set_tcs(struct net_device *netdev)
 
 static void mlx5e_build_tc2txq_maps(struct mlx5e_priv *priv)
 {
-	int max_nch = mlx5e_get_netdev_max_channels(priv->netdev);
 	int i, tc;
 
-	for (i = 0; i < max_nch; i++)
+	for (i = 0; i < priv->max_nch; i++)
 		for (tc = 0; tc < priv->profile->max_tc; tc++)
-			priv->channel_tc2txq[i][tc] = i + tc * max_nch;
+			priv->channel_tc2txq[i][tc] = i + tc * priv->max_nch;
 }
 
 static void mlx5e_build_tx2sq_maps(struct mlx5e_priv *priv)
@@ -2884,7 +2881,7 @@ static void mlx5e_build_tx2sq_maps(struct mlx5e_priv *priv)
 void mlx5e_activate_priv_channels(struct mlx5e_priv *priv)
 {
 	int num_txqs = priv->channels.num * priv->channels.params.num_tc;
-	int num_rxqs = priv->channels.num * MLX5E_NUM_RQ_GROUPS;
+	int num_rxqs = priv->channels.num * priv->profile->rq_groups;
 	struct net_device *netdev = priv->netdev;
 
 	mlx5e_netdev_set_tcs(netdev);
@@ -3306,7 +3303,6 @@ int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc)
 
 int mlx5e_create_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs)
 {
-	const int max_nch = mlx5e_get_netdev_max_channels(priv->netdev);
 	struct mlx5e_tir *tir;
 	void *tirc;
 	int inlen;
@@ -3319,7 +3315,7 @@ int mlx5e_create_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs)
 	if (!in)
 		return -ENOMEM;
 
-	for (ix = 0; ix < max_nch; ix++) {
+	for (ix = 0; ix < priv->max_nch; ix++) {
 		memset(in, 0, inlen);
 		tir = &tirs[ix];
 		tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
@@ -3358,10 +3354,9 @@ void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc)
 
 void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs)
 {
-	const int max_nch = mlx5e_get_netdev_max_channels(priv->netdev);
 	int i;
 
-	for (i = 0; i < max_nch; i++)
+	for (i = 0; i < priv->max_nch; i++)
 		mlx5e_destroy_tir(priv->mdev, &tirs[i]);
 }
 
@@ -3487,7 +3482,7 @@ void mlx5e_fold_sw_stats64(struct mlx5e_priv *priv, struct rtnl_link_stats64 *s)
 {
 	int i;
 
-	for (i = 0; i < mlx5e_get_netdev_max_channels(priv->netdev); i++) {
+	for (i = 0; i < priv->max_nch; i++) {
 		struct mlx5e_channel_stats *channel_stats = &priv->channel_stats[i];
 		struct mlx5e_rq_stats *xskrq_stats = &channel_stats->xskrq;
 		struct mlx5e_rq_stats *rq_stats = &channel_stats->rq;
@@ -4960,8 +4955,7 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev,
 		return err;
 
 	mlx5e_build_nic_params(mdev, &priv->xsk, rss, &priv->channels.params,
-			       mlx5e_get_netdev_max_channels(netdev),
-			       netdev->mtu);
+			       priv->max_nch, netdev->mtu);
 
 	mlx5e_timestamp_init(priv);
 
@@ -5164,6 +5158,7 @@ static const struct mlx5e_profile mlx5e_nic_profile = {
 	.rx_handlers.handle_rx_cqe       = mlx5e_handle_rx_cqe,
 	.rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq,
 	.max_tc		   = MLX5E_MAX_NUM_TC,
+	.rq_groups	   = MLX5E_NUM_RQ_GROUPS(XSK),
 };
 
 /* mlx5e generic netdev management API (move to en_common.c) */
@@ -5181,6 +5176,7 @@ int mlx5e_netdev_init(struct net_device *netdev,
 	priv->profile     = profile;
 	priv->ppriv       = ppriv;
 	priv->msglevel    = MLX5E_MSG_LEVEL;
+	priv->max_nch     = netdev->num_rx_queues / max_t(u8, profile->rq_groups, 1);
 	priv->max_opened_tc = 1;
 
 	mutex_init(&priv->state_lock);
@@ -5218,7 +5214,7 @@ struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev,
 
 	netdev = alloc_etherdev_mqs(sizeof(struct mlx5e_priv),
 				    nch * profile->max_tc,
-				    nch * MLX5E_NUM_RQ_GROUPS);
+				    nch * profile->rq_groups);
 	if (!netdev) {
 		mlx5_core_err(mdev, "alloc_etherdev_mqs() failed\n");
 		return NULL;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 7245d287633d..731819a26a0c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -1702,6 +1702,7 @@ static const struct mlx5e_profile mlx5e_rep_profile = {
 	.rx_handlers.handle_rx_cqe       = mlx5e_handle_rx_cqe_rep,
 	.rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq,
 	.max_tc			= 1,
+	.rq_groups		= MLX5E_NUM_RQ_GROUPS(REGULAR),
 };
 
 static const struct mlx5e_profile mlx5e_uplink_rep_profile = {
@@ -1719,6 +1720,7 @@ static const struct mlx5e_profile mlx5e_uplink_rep_profile = {
 	.rx_handlers.handle_rx_cqe       = mlx5e_handle_rx_cqe_rep,
 	.rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq,
 	.max_tc			= MLX5E_MAX_NUM_TC,
+	.rq_groups		= MLX5E_NUM_RQ_GROUPS(REGULAR),
 };
 
 static bool
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index 539b4d3656da..57f9f346d213 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -172,7 +172,7 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv)
 
 	memset(s, 0, sizeof(*s));
 
-	for (i = 0; i < mlx5e_get_netdev_max_channels(priv->netdev); i++) {
+	for (i = 0; i < priv->max_nch; i++) {
 		struct mlx5e_channel_stats *channel_stats =
 			&priv->channel_stats[i];
 		struct mlx5e_xdpsq_stats *xdpsq_red_stats = &channel_stats->xdpsq;
@@ -1395,7 +1395,7 @@ static const struct counter_desc ch_stats_desc[] = {
 
 static int mlx5e_grp_channels_get_num_stats(struct mlx5e_priv *priv)
 {
-	int max_nch = mlx5e_get_netdev_max_channels(priv->netdev);
+	int max_nch = priv->max_nch;
 
 	return (NUM_RQ_STATS * max_nch) +
 	       (NUM_CH_STATS * max_nch) +
@@ -1409,8 +1409,8 @@ static int mlx5e_grp_channels_get_num_stats(struct mlx5e_priv *priv)
 static int mlx5e_grp_channels_fill_strings(struct mlx5e_priv *priv, u8 *data,
 					   int idx)
 {
-	int max_nch = mlx5e_get_netdev_max_channels(priv->netdev);
 	bool is_xsk = priv->xsk.ever_used;
+	int max_nch = priv->max_nch;
 	int i, j, tc;
 
 	for (i = 0; i < max_nch; i++)
@@ -1452,8 +1452,8 @@ static int mlx5e_grp_channels_fill_strings(struct mlx5e_priv *priv, u8 *data,
 static int mlx5e_grp_channels_fill_stats(struct mlx5e_priv *priv, u64 *data,
 					 int idx)
 {
-	int max_nch = mlx5e_get_netdev_max_channels(priv->netdev);
 	bool is_xsk = priv->xsk.ever_used;
+	int max_nch = priv->max_nch;
 	int i, j, tc;
 
 	for (i = 0; i < max_nch; i++)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
index 6bfaaab362dc..1a2560e3bf7c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@ -88,8 +88,7 @@ int mlx5i_init(struct mlx5_core_dev *mdev,
 	netdev->mtu = netdev->max_mtu;
 
 	mlx5e_build_nic_params(mdev, NULL, &priv->rss_params, &priv->channels.params,
-			       mlx5e_get_netdev_max_channels(netdev),
-			       netdev->mtu);
+			       priv->max_nch, netdev->mtu);
 	mlx5i_build_nic_params(mdev, &priv->channels.params);
 
 	mlx5e_timestamp_init(priv);
@@ -118,11 +117,10 @@ void mlx5i_cleanup(struct mlx5e_priv *priv)
 
 static void mlx5i_grp_sw_update_stats(struct mlx5e_priv *priv)
 {
-	int max_nch = mlx5e_get_netdev_max_channels(priv->netdev);
 	struct mlx5e_sw_stats s = { 0 };
 	int i, j;
 
-	for (i = 0; i < max_nch; i++) {
+	for (i = 0; i < priv->max_nch; i++) {
 		struct mlx5e_channel_stats *channel_stats;
 		struct mlx5e_rq_stats *rq_stats;
 
@@ -436,6 +434,7 @@ static const struct mlx5e_profile mlx5i_nic_profile = {
 	.rx_handlers.handle_rx_cqe       = mlx5i_handle_rx_cqe,
 	.rx_handlers.handle_rx_cqe_mpwqe = NULL, /* Not supported */
 	.max_tc		   = MLX5I_MAX_NUM_TC,
+	.rq_groups	   = MLX5E_NUM_RQ_GROUPS(REGULAR),
 };
 
 /* mlx5i netdev NDos */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c
index 6e56fa769d2e..c5a491e22e55 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c
@@ -355,6 +355,7 @@ static const struct mlx5e_profile mlx5i_pkey_nic_profile = {
 	.rx_handlers.handle_rx_cqe       = mlx5i_handle_rx_cqe,
 	.rx_handlers.handle_rx_cqe_mpwqe = NULL, /* Not supported */
 	.max_tc		   = MLX5I_MAX_NUM_TC,
+	.rq_groups	   = MLX5E_NUM_RQ_GROUPS(REGULAR),
 };
 
 const struct mlx5e_profile *mlx5i_pkey_get_profile(void)
-- 
2.21.0


^ permalink raw reply related

* [net 5/9] net/mlx5e: Fix matching of speed to PRM link modes
From: Saeed Mahameed @ 2019-07-25 20:36 UTC (permalink / raw)
  To: David S. Miller
  Cc: netdev@vger.kernel.org, Jakub Kicinski, Aya Levin, Saeed Mahameed
In-Reply-To: <20190725203618.11011-1-saeedm@mellanox.com>

From: Aya Levin <ayal@mellanox.com>

Speed translation is performed based on legacy or extended PTYS
register. Translate speed with respect to:
1) Capability bit of extended PTYS table.
2) User request:
 a) When auto-negotiation is turned on, inspect advertisement whether it
 contains extended link modes.
 b) When auto-negotiation is turned off, speed > 100Gbps (maximal
 speed supported in legacy mode).
With both conditions fulfilled translation is done with extended PTYS
table otherwise use legacy PTYS table.
Without this patch 25/50/100 Gbps speed cannot be set, since try to
configure in extended mode but read from legacy mode.

Fixes: dd1b9e09c12b ("net/mlx5: ethtool, Allow legacy link-modes configuration via non-extended ptys")
Signed-off-by: Aya Levin <ayal@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/en/port.c | 27 +++++---
 .../net/ethernet/mellanox/mlx5/core/en/port.h |  6 +-
 .../ethernet/mellanox/mlx5/core/en_ethtool.c  | 67 +++++++++++++------
 3 files changed, 68 insertions(+), 32 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
index d5e5afbdca6d..f777994f3005 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
@@ -78,9 +78,10 @@ static const u32 mlx5e_ext_link_speed[MLX5E_EXT_LINK_MODES_NUMBER] = {
 };
 
 static void mlx5e_port_get_speed_arr(struct mlx5_core_dev *mdev,
-				     const u32 **arr, u32 *size)
+				     const u32 **arr, u32 *size,
+				     bool force_legacy)
 {
-	bool ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet);
+	bool ext = force_legacy ? false : MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet);
 
 	*size = ext ? ARRAY_SIZE(mlx5e_ext_link_speed) :
 		      ARRAY_SIZE(mlx5e_link_speed);
@@ -152,7 +153,8 @@ int mlx5_port_set_eth_ptys(struct mlx5_core_dev *dev, bool an_disable,
 			    sizeof(out), MLX5_REG_PTYS, 0, 1);
 }
 
-u32 mlx5e_port_ptys2speed(struct mlx5_core_dev *mdev, u32 eth_proto_oper)
+u32 mlx5e_port_ptys2speed(struct mlx5_core_dev *mdev, u32 eth_proto_oper,
+			  bool force_legacy)
 {
 	unsigned long temp = eth_proto_oper;
 	const u32 *table;
@@ -160,7 +162,7 @@ u32 mlx5e_port_ptys2speed(struct mlx5_core_dev *mdev, u32 eth_proto_oper)
 	u32 max_size;
 	int i;
 
-	mlx5e_port_get_speed_arr(mdev, &table, &max_size);
+	mlx5e_port_get_speed_arr(mdev, &table, &max_size, force_legacy);
 	i = find_first_bit(&temp, max_size);
 	if (i < max_size)
 		speed = table[i];
@@ -170,6 +172,7 @@ u32 mlx5e_port_ptys2speed(struct mlx5_core_dev *mdev, u32 eth_proto_oper)
 int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed)
 {
 	struct mlx5e_port_eth_proto eproto;
+	bool force_legacy = false;
 	bool ext;
 	int err;
 
@@ -177,8 +180,13 @@ int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed)
 	err = mlx5_port_query_eth_proto(mdev, 1, ext, &eproto);
 	if (err)
 		goto out;
-
-	*speed = mlx5e_port_ptys2speed(mdev, eproto.oper);
+	if (ext && !eproto.admin) {
+		force_legacy = true;
+		err = mlx5_port_query_eth_proto(mdev, 1, false, &eproto);
+		if (err)
+			goto out;
+	}
+	*speed = mlx5e_port_ptys2speed(mdev, eproto.oper, force_legacy);
 	if (!(*speed))
 		err = -EINVAL;
 
@@ -201,7 +209,7 @@ int mlx5e_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed)
 	if (err)
 		return err;
 
-	mlx5e_port_get_speed_arr(mdev, &table, &max_size);
+	mlx5e_port_get_speed_arr(mdev, &table, &max_size, false);
 	for (i = 0; i < max_size; ++i)
 		if (eproto.cap & MLX5E_PROT_MASK(i))
 			max_speed = max(max_speed, table[i]);
@@ -210,14 +218,15 @@ int mlx5e_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed)
 	return 0;
 }
 
-u32 mlx5e_port_speed2linkmodes(struct mlx5_core_dev *mdev, u32 speed)
+u32 mlx5e_port_speed2linkmodes(struct mlx5_core_dev *mdev, u32 speed,
+			       bool force_legacy)
 {
 	u32 link_modes = 0;
 	const u32 *table;
 	u32 max_size;
 	int i;
 
-	mlx5e_port_get_speed_arr(mdev, &table, &max_size);
+	mlx5e_port_get_speed_arr(mdev, &table, &max_size, force_legacy);
 	for (i = 0; i < max_size; ++i) {
 		if (table[i] == speed)
 			link_modes |= MLX5E_PROT_MASK(i);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h
index 70f536ec51c4..4a7f4497692b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h
@@ -48,10 +48,12 @@ void mlx5_port_query_eth_autoneg(struct mlx5_core_dev *dev, u8 *an_status,
 				 u8 *an_disable_cap, u8 *an_disable_admin);
 int mlx5_port_set_eth_ptys(struct mlx5_core_dev *dev, bool an_disable,
 			   u32 proto_admin, bool ext);
-u32 mlx5e_port_ptys2speed(struct mlx5_core_dev *mdev, u32 eth_proto_oper);
+u32 mlx5e_port_ptys2speed(struct mlx5_core_dev *mdev, u32 eth_proto_oper,
+			  bool force_legacy);
 int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed);
 int mlx5e_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed);
-u32 mlx5e_port_speed2linkmodes(struct mlx5_core_dev *mdev, u32 speed);
+u32 mlx5e_port_speed2linkmodes(struct mlx5_core_dev *mdev, u32 speed,
+			       bool force_legacy);
 
 int mlx5e_port_query_pbmc(struct mlx5_core_dev *mdev, void *out);
 int mlx5e_port_set_pbmc(struct mlx5_core_dev *mdev, void *in);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index ed25757ac5bd..03bed714bac3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -785,7 +785,7 @@ static void ptys2ethtool_supported_advertised_port(struct ethtool_link_ksettings
 }
 
 static void get_speed_duplex(struct net_device *netdev,
-			     u32 eth_proto_oper,
+			     u32 eth_proto_oper, bool force_legacy,
 			     struct ethtool_link_ksettings *link_ksettings)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
@@ -795,7 +795,7 @@ static void get_speed_duplex(struct net_device *netdev,
 	if (!netif_carrier_ok(netdev))
 		goto out;
 
-	speed = mlx5e_port_ptys2speed(priv->mdev, eth_proto_oper);
+	speed = mlx5e_port_ptys2speed(priv->mdev, eth_proto_oper, force_legacy);
 	if (!speed) {
 		speed = SPEED_UNKNOWN;
 		goto out;
@@ -914,8 +914,8 @@ int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv,
 	/* Fields: eth_proto_admin and ext_eth_proto_admin  are
 	 * mutually exclusive. Hence try reading legacy advertising
 	 * when extended advertising is zero.
-	 * admin_ext indicates how eth_proto_admin should be
-	 * interpreted
+	 * admin_ext indicates which proto_admin (ext vs. legacy)
+	 * should be read and interpreted
 	 */
 	admin_ext = ext;
 	if (ext && !eth_proto_admin) {
@@ -924,7 +924,7 @@ int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv,
 		admin_ext = false;
 	}
 
-	eth_proto_oper   = MLX5_GET_ETH_PROTO(ptys_reg, out, ext,
+	eth_proto_oper   = MLX5_GET_ETH_PROTO(ptys_reg, out, admin_ext,
 					      eth_proto_oper);
 	eth_proto_lp	    = MLX5_GET(ptys_reg, out, eth_proto_lp_advertise);
 	an_disable_admin    = MLX5_GET(ptys_reg, out, an_disable_admin);
@@ -939,7 +939,8 @@ int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv,
 	get_supported(mdev, eth_proto_cap, link_ksettings);
 	get_advertising(eth_proto_admin, tx_pause, rx_pause, link_ksettings,
 			admin_ext);
-	get_speed_duplex(priv->netdev, eth_proto_oper, link_ksettings);
+	get_speed_duplex(priv->netdev, eth_proto_oper, !admin_ext,
+			 link_ksettings);
 
 	eth_proto_oper = eth_proto_oper ? eth_proto_oper : eth_proto_cap;
 
@@ -1016,45 +1017,69 @@ static u32 mlx5e_ethtool2ptys_ext_adver_link(const unsigned long *link_modes)
 	return ptys_modes;
 }
 
+static bool ext_link_mode_requested(const unsigned long *adver)
+{
+#define MLX5E_MIN_PTYS_EXT_LINK_MODE_BIT ETHTOOL_LINK_MODE_50000baseKR_Full_BIT
+	int size = __ETHTOOL_LINK_MODE_MASK_NBITS - MLX5E_MIN_PTYS_EXT_LINK_MODE_BIT;
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(modes);
+
+	bitmap_set(modes, MLX5E_MIN_PTYS_EXT_LINK_MODE_BIT, size);
+	return bitmap_intersects(modes, adver, __ETHTOOL_LINK_MODE_MASK_NBITS);
+}
+
+static bool ext_speed_requested(u32 speed)
+{
+#define MLX5E_MAX_PTYS_LEGACY_SPEED 100000
+	return !!(speed > MLX5E_MAX_PTYS_LEGACY_SPEED);
+}
+
+static bool ext_requested(u8 autoneg, const unsigned long *adver, u32 speed)
+{
+	bool ext_link_mode = ext_link_mode_requested(adver);
+	bool ext_speed = ext_speed_requested(speed);
+
+	return  autoneg == AUTONEG_ENABLE ? ext_link_mode : ext_speed;
+}
+
 int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv,
 				     const struct ethtool_link_ksettings *link_ksettings)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
 	struct mlx5e_port_eth_proto eproto;
+	const unsigned long *adver;
 	bool an_changes = false;
 	u8 an_disable_admin;
 	bool ext_supported;
-	bool ext_requested;
 	u8 an_disable_cap;
 	bool an_disable;
 	u32 link_modes;
 	u8 an_status;
+	u8 autoneg;
 	u32 speed;
+	bool ext;
 	int err;
 
 	u32 (*ethtool2ptys_adver_func)(const unsigned long *adver);
 
-#define MLX5E_PTYS_EXT ((1ULL << ETHTOOL_LINK_MODE_50000baseKR_Full_BIT) - 1)
+	adver = link_ksettings->link_modes.advertising;
+	autoneg = link_ksettings->base.autoneg;
+	speed = link_ksettings->base.speed;
 
-	ext_requested = !!(link_ksettings->link_modes.advertising[0] >
-			MLX5E_PTYS_EXT ||
-			link_ksettings->link_modes.advertising[1]);
+	ext = ext_requested(autoneg, adver, speed),
 	ext_supported = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet);
-	ext_requested &= ext_supported;
+	if (!ext_supported && ext)
+		return -EOPNOTSUPP;
 
-	speed = link_ksettings->base.speed;
-	ethtool2ptys_adver_func = ext_requested ?
-				  mlx5e_ethtool2ptys_ext_adver_link :
+	ethtool2ptys_adver_func = ext ? mlx5e_ethtool2ptys_ext_adver_link :
 				  mlx5e_ethtool2ptys_adver_link;
-	err = mlx5_port_query_eth_proto(mdev, 1, ext_requested, &eproto);
+	err = mlx5_port_query_eth_proto(mdev, 1, ext, &eproto);
 	if (err) {
 		netdev_err(priv->netdev, "%s: query port eth proto failed: %d\n",
 			   __func__, err);
 		goto out;
 	}
-	link_modes = link_ksettings->base.autoneg == AUTONEG_ENABLE ?
-		ethtool2ptys_adver_func(link_ksettings->link_modes.advertising) :
-		mlx5e_port_speed2linkmodes(mdev, speed);
+	link_modes = autoneg == AUTONEG_ENABLE ? ethtool2ptys_adver_func(adver) :
+		mlx5e_port_speed2linkmodes(mdev, speed, !ext);
 
 	link_modes = link_modes & eproto.cap;
 	if (!link_modes) {
@@ -1067,14 +1092,14 @@ int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv,
 	mlx5_port_query_eth_autoneg(mdev, &an_status, &an_disable_cap,
 				    &an_disable_admin);
 
-	an_disable = link_ksettings->base.autoneg == AUTONEG_DISABLE;
+	an_disable = autoneg == AUTONEG_DISABLE;
 	an_changes = ((!an_disable && an_disable_admin) ||
 		      (an_disable && !an_disable_admin));
 
 	if (!an_changes && link_modes == eproto.admin)
 		goto out;
 
-	mlx5_port_set_eth_ptys(mdev, an_disable, link_modes, ext_requested);
+	mlx5_port_set_eth_ptys(mdev, an_disable, link_modes, ext);
 	mlx5_toggle_port_link(mdev);
 
 out:
-- 
2.21.0


^ permalink raw reply related

* [net 6/9] net/mlx5e: Prevent encap flow counter update async to user query
From: Saeed Mahameed @ 2019-07-25 20:36 UTC (permalink / raw)
  To: David S. Miller
  Cc: netdev@vger.kernel.org, Jakub Kicinski, Ariel Levkovich,
	Roi Dayan, Saeed Mahameed
In-Reply-To: <20190725203618.11011-1-saeedm@mellanox.com>

From: Ariel Levkovich <lariel@mellanox.com>

This patch prevents a race between user invoked cached counters
query and a neighbor last usage updater.

The cached flow counter stats can be queried by calling
"mlx5_fc_query_cached" which provides the number of bytes and
packets that passed via this flow since the last time this counter
was queried.
It does so by reducting the last saved stats from the current, cached
stats and then updating the last saved stats with the cached stats.
It also provide the lastuse value for that flow.

Since "mlx5e_tc_update_neigh_used_value" needs to retrieve the
last usage time of encapsulation flows, it calls the flow counter
query method periodically and async to user queries of the flow counter
using cls_flower.
This call is causing the driver to update the last reported bytes and
packets from the cache and therefore, future user queries of the flow
stats will return lower than expected number for bytes and packets
since the last saved stats in the driver was updated async to the last
saved stats in cls_flower.

This causes wrong stats presentation of encapsulation flows to user.

Since the neighbor usage updater only needs the lastuse stats from the
cached counter, the fix is to use a dedicated lastuse query call that
returns the lastuse value without synching between the cached stats and
the last saved stats.

Fixes: f6dfb4c3f216 ("net/mlx5e: Update neighbour 'used' state using HW flow rules counters")
Signed-off-by: Ariel Levkovich <lariel@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c       | 4 ++--
 drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c | 5 +++++
 include/linux/mlx5/fs.h                               | 1 +
 3 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index cc096f6011d9..7ecfc53cf5f6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -1230,13 +1230,13 @@ static struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow)
 void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
 {
 	struct mlx5e_neigh *m_neigh = &nhe->m_neigh;
-	u64 bytes, packets, lastuse = 0;
 	struct mlx5e_tc_flow *flow;
 	struct mlx5e_encap_entry *e;
 	struct mlx5_fc *counter;
 	struct neigh_table *tbl;
 	bool neigh_used = false;
 	struct neighbour *n;
+	u64 lastuse;
 
 	if (m_neigh->family == AF_INET)
 		tbl = &arp_tbl;
@@ -1256,7 +1256,7 @@ void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
 					    encaps[efi->index]);
 			if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) {
 				counter = mlx5e_tc_get_counter(flow);
-				mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse);
+				lastuse = mlx5_fc_query_lastuse(counter);
 				if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) {
 					neigh_used = true;
 					break;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
index b3762123a69c..1834d9f3aa1c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
@@ -369,6 +369,11 @@ int mlx5_fc_query(struct mlx5_core_dev *dev, struct mlx5_fc *counter,
 }
 EXPORT_SYMBOL(mlx5_fc_query);
 
+u64 mlx5_fc_query_lastuse(struct mlx5_fc *counter)
+{
+	return counter->cache.lastuse;
+}
+
 void mlx5_fc_query_cached(struct mlx5_fc *counter,
 			  u64 *bytes, u64 *packets, u64 *lastuse)
 {
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 04a569568eac..f049af3f3cd8 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -220,6 +220,7 @@ int mlx5_modify_rule_destination(struct mlx5_flow_handle *handler,
 
 struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging);
 void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter);
+u64 mlx5_fc_query_lastuse(struct mlx5_fc *counter);
 void mlx5_fc_query_cached(struct mlx5_fc *counter,
 			  u64 *bytes, u64 *packets, u64 *lastuse);
 int mlx5_fc_query(struct mlx5_core_dev *dev, struct mlx5_fc *counter,
-- 
2.21.0


^ permalink raw reply related

* [net 7/9] net/mlx5e: kTLS, Call WARN_ONCE on netdev mismatch
From: Saeed Mahameed @ 2019-07-25 20:36 UTC (permalink / raw)
  To: David S. Miller
  Cc: netdev@vger.kernel.org, Jakub Kicinski, Tariq Toukan,
	Saeed Mahameed
In-Reply-To: <20190725203618.11011-1-saeedm@mellanox.com>

From: Tariq Toukan <tariqt@mellanox.com>

A netdev mismatch in the processed TLS SKB should not occur,
and indicates a kernel bug.
Add WARN_ONCE to spot such cases.

Fixes: d2ead1f360e8 ("net/mlx5e: Add kTLS TX HW offload support")
Suggested-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
index ea032f54197e..3766545ce259 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
@@ -412,7 +412,7 @@ struct sk_buff *mlx5e_ktls_handle_tx_skb(struct net_device *netdev,
 		goto out;
 
 	tls_ctx = tls_get_ctx(skb->sk);
-	if (unlikely(tls_ctx->netdev != netdev))
+	if (unlikely(WARN_ON_ONCE(tls_ctx->netdev != netdev)))
 		goto err_out;
 
 	priv_tx = mlx5e_get_ktls_tx_priv_ctx(tls_ctx);
-- 
2.21.0


^ permalink raw reply related

* [net 8/9] nfp: tls: rename tls packet counters
From: Saeed Mahameed @ 2019-07-25 20:36 UTC (permalink / raw)
  To: David S. Miller
  Cc: netdev@vger.kernel.org, Jakub Kicinski, Tariq Toukan,
	Saeed Mahameed
In-Reply-To: <20190725203618.11011-1-saeedm@mellanox.com>

From: Tariq Toukan <tariqt@mellanox.com>

Align to the naming convention in TLS documentation.

Fixes: 51a5e563298d ("nfp: tls: add basic statistics")
Suggested-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
index d9cbe84ac6ad..1b840ee47339 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
@@ -444,12 +444,12 @@ static u8 *nfp_vnic_get_sw_stats_strings(struct net_device *netdev, u8 *data)
 	data = nfp_pr_et(data, "hw_rx_csum_complete");
 	data = nfp_pr_et(data, "hw_rx_csum_err");
 	data = nfp_pr_et(data, "rx_replace_buf_alloc_fail");
-	data = nfp_pr_et(data, "rx_tls_decrypted");
+	data = nfp_pr_et(data, "rx_tls_decrypted_packets");
 	data = nfp_pr_et(data, "hw_tx_csum");
 	data = nfp_pr_et(data, "hw_tx_inner_csum");
 	data = nfp_pr_et(data, "tx_gather");
 	data = nfp_pr_et(data, "tx_lso");
-	data = nfp_pr_et(data, "tx_tls_encrypted");
+	data = nfp_pr_et(data, "tx_tls_encrypted_packets");
 	data = nfp_pr_et(data, "tx_tls_ooo");
 	data = nfp_pr_et(data, "tx_tls_drop_no_sync_data");
 
-- 
2.21.0


^ permalink raw reply related

* [net 9/9] Documentation: TLS: fix stat counters description
From: Saeed Mahameed @ 2019-07-25 20:36 UTC (permalink / raw)
  To: David S. Miller
  Cc: netdev@vger.kernel.org, Jakub Kicinski, Tariq Toukan,
	Saeed Mahameed
In-Reply-To: <20190725203618.11011-1-saeedm@mellanox.com>

From: Tariq Toukan <tariqt@mellanox.com>

Add missing description of counters.
Split tx_tls_encrypted counter into two, to give packets
and bytes indications.

Fixes: f42c104f2ec9 ("Documentation: add TLS offload documentation")
Suggested-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 Documentation/networking/tls-offload.rst | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/Documentation/networking/tls-offload.rst b/Documentation/networking/tls-offload.rst
index 048e5ca44824..b70b70dc4524 100644
--- a/Documentation/networking/tls-offload.rst
+++ b/Documentation/networking/tls-offload.rst
@@ -424,13 +424,24 @@ Statistics
 Following minimum set of TLS-related statistics should be reported
 by the driver:
 
- * ``rx_tls_decrypted`` - number of successfully decrypted TLS segments
- * ``tx_tls_encrypted`` - number of in-order TLS segments passed to device
-   for encryption
+ * ``rx_tls_decrypted_packets`` - number of successfully decrypted RX packets
+   which were part of a TLS stream.
+ * ``rx_tls_decrypted_bytes`` - number of TLS payload bytes in RX packets
+   which were successfully decrypted.
+ * ``tx_tls_encrypted_packets`` - number of TX packets passed to the device
+   for encryption of their TLS payload.
+ * ``tx_tls_encrypted_bytes`` - number of TLS payload bytes in TX packets
+   passed to the device for encryption.
+ * ``tx_tls_ctx`` - number of TLS TX HW offload contexts added to device for
+   encryption.
  * ``tx_tls_ooo`` - number of TX packets which were part of a TLS stream
-   but did not arrive in the expected order
- * ``tx_tls_drop_no_sync_data`` - number of TX packets dropped because
-   they arrived out of order and associated record could not be found
+   but did not arrive in the expected order.
+ * ``tx_tls_drop_no_sync_data`` - number of TX packets which were part of
+   a TLS stream dropped, because they arrived out of order and associated
+   record could not be found.
+ * ``tx_tls_drop_bypass_req`` - number of TX packets which were part of a TLS
+   stream dropped, because they contain both data that has been encrypted by
+   software and data that expects hardware crypto offload.
 
 Notable corner cases, exceptions and additional requirements
 ============================================================
-- 
2.21.0


^ permalink raw reply related

* [PATCH 1/2] ipmr: Make cache queue length configurable
From: Brodie Greenfield @ 2019-07-25 20:42 UTC (permalink / raw)
  To: davem, stephen, kuznet, yoshfuji, netdev
  Cc: linux-kernel, chris.packham, luuk.paulussen, Brodie Greenfield
In-Reply-To: <20190725204230.12229-1-brodie.greenfield@alliedtelesis.co.nz>

We want to be able to keep more spaces available in our queue for
processing incoming multicast traffic (adding (S,G) entries) - this lets
us learn more groups faster, rather than dropping them at this stage.

Signed-off-by: Brodie Greenfield <brodie.greenfield@alliedtelesis.co.nz>
---
 Documentation/networking/ip-sysctl.txt | 8 ++++++++
 include/net/netns/ipv4.h               | 1 +
 net/ipv4/af_inet.c                     | 1 +
 net/ipv4/ipmr.c                        | 4 +++-
 net/ipv4/sysctl_net_ipv4.c             | 7 +++++++
 5 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index acdfb5d2bcaa..02f77e932adf 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -887,6 +887,14 @@ ip_local_reserved_ports - list of comma separated ranges
 
 	Default: Empty
 
+ip_mr_cache_queue_length - INTEGER
+	Limit the number of multicast packets we can have in the queue to be
+	resolved.
+	Bear in mind that when an unresolved multicast packet is received,
+	there is an O(n) traversal of the queue. This should be considered
+	if increasing.
+	Default: 10
+
 ip_unprivileged_port_start - INTEGER
 	This is a per-namespace sysctl.  It defines the first
 	unprivileged port in the network namespace.  Privileged ports
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 104a6669e344..3411d3f18d51 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -187,6 +187,7 @@ struct netns_ipv4 {
 	int sysctl_igmp_max_msf;
 	int sysctl_igmp_llm_reports;
 	int sysctl_igmp_qrv;
+	unsigned int sysctl_ip_mr_cache_queue_length;
 
 	struct ping_group_range ping_group_range;
 
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 0dfb72c46671..8e25538bdb1e 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1827,6 +1827,7 @@ static __net_init int inet_init_net(struct net *net)
 	net->ipv4.sysctl_igmp_llm_reports = 1;
 	net->ipv4.sysctl_igmp_qrv = 2;
 
+	net->ipv4.sysctl_ip_mr_cache_queue_length = 10;
 	return 0;
 }
 
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index ddbf8c9a1abb..c6a6c3e453a9 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1127,6 +1127,7 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
 				 struct sk_buff *skb, struct net_device *dev)
 {
 	const struct iphdr *iph = ip_hdr(skb);
+	struct net *net = dev_net(dev);
 	struct mfc_cache *c;
 	bool found = false;
 	int err;
@@ -1142,7 +1143,8 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
 
 	if (!found) {
 		/* Create a new entry if allowable */
-		if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
+		if (atomic_read(&mrt->cache_resolve_queue_len) >=
+		    net->ipv4.sysctl_ip_mr_cache_queue_length ||
 		    (c = ipmr_cache_alloc_unres()) == NULL) {
 			spin_unlock_bh(&mfc_unres_lock);
 
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index ba0fc4b18465..78ae86e8c6cb 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -784,6 +784,13 @@ static struct ctl_table ipv4_net_table[] = {
 		.proc_handler	= proc_dointvec
 	},
 #ifdef CONFIG_IP_MULTICAST
+	{
+		.procname	= "ip_mr_cache_queue_length",
+		.data		= &init_net.ipv4.sysctl_ip_mr_cache_queue_length,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
 	{
 		.procname	= "igmp_qrv",
 		.data		= &init_net.ipv4.sysctl_igmp_qrv,
-- 
2.21.0


^ permalink raw reply related

* [PATCH 2/2] ip6mr: Make cache queue length configurable
From: Brodie Greenfield @ 2019-07-25 20:42 UTC (permalink / raw)
  To: davem, stephen, kuznet, yoshfuji, netdev
  Cc: linux-kernel, chris.packham, luuk.paulussen, Brodie Greenfield
In-Reply-To: <20190725204230.12229-1-brodie.greenfield@alliedtelesis.co.nz>

We want to be able to keep more spaces available in our queue for
processing incoming IPv6 multicast traffic (adding (S,G) entries) - this
lets us learn more groups faster, rather than dropping them at this stage.

Signed-off-by: Brodie Greenfield <brodie.greenfield@alliedtelesis.co.nz>
---
 Documentation/networking/ip-sysctl.txt | 8 ++++++++
 include/net/netns/ipv6.h               | 1 +
 net/ipv6/af_inet6.c                    | 1 +
 net/ipv6/ip6mr.c                       | 4 +++-
 net/ipv6/sysctl_net_ipv6.c             | 7 +++++++
 5 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 02f77e932adf..68eada3ca915 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -1481,6 +1481,14 @@ skip_notify_on_dev_down - BOOLEAN
 	on userspace caches to track link events and evict routes.
 	Default: false (generate message)
 
+ip6_mr_cache_queue_length - INTEGER
+	Limit the number of multicast packets we can have in the queue to be
+	resolved.
+	Bear in mind that when an unresolved multicast packet is received,
+	there is an O(n) traversal of the queue. This should be considered
+	if increasing.
+	Default: 10
+
 IPv6 Fragmentation:
 
 ip6frag_high_thresh - INTEGER
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index ef1ed529f33c..84b58424c799 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -46,6 +46,7 @@ struct netns_sysctl_ipv6 {
 	int max_hbh_opts_len;
 	int seg6_flowlabel;
 	bool skip_notify_on_dev_down;
+	unsigned int ip6_mr_cache_queue_length;
 };
 
 struct netns_ipv6 {
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index d99753b5e39b..6551bb63e5a2 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -856,6 +856,7 @@ static int __net_init inet6_net_init(struct net *net)
 	net->ipv6.sysctl.max_hbh_opts_cnt = IP6_DEFAULT_MAX_HBH_OPTS_CNT;
 	net->ipv6.sysctl.max_dst_opts_len = IP6_DEFAULT_MAX_DST_OPTS_LEN;
 	net->ipv6.sysctl.max_hbh_opts_len = IP6_DEFAULT_MAX_HBH_OPTS_LEN;
+	net->ipv6.sysctl.ip6_mr_cache_queue_length = 10;
 	atomic_set(&net->ipv6.fib6_sernum, 1);
 
 	err = ipv6_init_mibs(net);
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index cc01aa3f2b5e..bb445871437e 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1135,6 +1135,7 @@ static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
 static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
 				  struct sk_buff *skb, struct net_device *dev)
 {
+	struct net *net = dev_net(dev);
 	struct mfc6_cache *c;
 	bool found = false;
 	int err;
@@ -1153,7 +1154,8 @@ static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
 		 *	Create a new entry if allowable
 		 */
 
-		if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
+		if (atomic_read(&mrt->cache_resolve_queue_len) >=
+		    net->ipv6.sysctl.ip6_mr_cache_queue_length ||
 		    (c = ip6mr_cache_alloc_unres()) == NULL) {
 			spin_unlock_bh(&mfc_unres_lock);
 
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index e15cd37024fd..a27299d4cc34 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -159,6 +159,13 @@ static struct ctl_table ipv6_table_template[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
+	{
+		.procname	= "ip6_mr_cache_queue_length",
+		.data		= &init_net.ipv6.sysctl.ip6_mr_cache_queue_length,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
 	{ }
 };
 
-- 
2.21.0


^ permalink raw reply related

* [PATCH 0/2] Make ipmr queue length configurable
From: Brodie Greenfield @ 2019-07-25 20:42 UTC (permalink / raw)
  To: davem, stephen, kuznet, yoshfuji, netdev
  Cc: linux-kernel, chris.packham, luuk.paulussen, Brodie Greenfield

We want to have some more space in our queue for processing incoming
multicast packets, so we can process more of them without dropping
them prematurely. It is useful to be able to increase this limit on
higher-spec platforms that can handle more items.

For the particular use case here at Allied Telesis, we have linux
running on our switches and routers, with support for the number of
multicast groups being increased. Basically, this queue length affects
the time taken to fully learn all of the multicast streams. 

Changes in v3:
 - Corrected a v4 to v6 typo.

Changes in v2:
 - Tidy up a few unnecessary bits of code.
 - Put the sysctl inside ip multicast ifdef.
 - Included the IPv6 version.

Brodie Greenfield (2):
  ipmr: Make cache queue length configurable
  ip6mr: Make cache queue length configurable

 Documentation/networking/ip-sysctl.txt | 16 ++++++++++++++++
 include/net/netns/ipv4.h               |  1 +
 include/net/netns/ipv6.h               |  1 +
 net/ipv4/af_inet.c                     |  1 +
 net/ipv4/ipmr.c                        |  4 +++-
 net/ipv4/sysctl_net_ipv4.c             |  7 +++++++
 net/ipv6/af_inet6.c                    |  1 +
 net/ipv6/ip6mr.c                       |  4 +++-
 net/ipv6/sysctl_net_ipv6.c             |  7 +++++++
 9 files changed, 40 insertions(+), 2 deletions(-)

-- 
2.21.0


^ permalink raw reply

* Re: [net 7/9] net/mlx5e: kTLS, Call WARN_ONCE on netdev mismatch
From: Jakub Kicinski @ 2019-07-25 20:48 UTC (permalink / raw)
  To: Saeed Mahameed; +Cc: David S. Miller, netdev@vger.kernel.org, Tariq Toukan
In-Reply-To: <20190725203618.11011-8-saeedm@mellanox.com>

On Thu, 25 Jul 2019 20:36:48 +0000, Saeed Mahameed wrote:
> From: Tariq Toukan <tariqt@mellanox.com>
> 
> A netdev mismatch in the processed TLS SKB should not occur,
> and indicates a kernel bug.
> Add WARN_ONCE to spot such cases.
> 
> Fixes: d2ead1f360e8 ("net/mlx5e: Add kTLS TX HW offload support")
> Suggested-by: Jakub Kicinski <jakub.kicinski@netronome.com>
> Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
> Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>

Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>

^ permalink raw reply

* Re: [net 7/9] net/mlx5e: kTLS, Call WARN_ONCE on netdev mismatch
From: Jakub Kicinski @ 2019-07-25 20:49 UTC (permalink / raw)
  To: Saeed Mahameed; +Cc: David S. Miller, netdev@vger.kernel.org, Tariq Toukan
In-Reply-To: <20190725203618.11011-8-saeedm@mellanox.com>

On Thu, 25 Jul 2019 20:36:48 +0000, Saeed Mahameed wrote:
> From: Tariq Toukan <tariqt@mellanox.com>
> 
> A netdev mismatch in the processed TLS SKB should not occur,
> and indicates a kernel bug.
> Add WARN_ONCE to spot such cases.
> 
> Fixes: d2ead1f360e8 ("net/mlx5e: Add kTLS TX HW offload support")
> Suggested-by: Jakub Kicinski <jakub.kicinski@netronome.com>
> Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
> Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
> ---
>  drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
> index ea032f54197e..3766545ce259 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
> @@ -412,7 +412,7 @@ struct sk_buff *mlx5e_ktls_handle_tx_skb(struct net_device *netdev,
>  		goto out;
>  
>  	tls_ctx = tls_get_ctx(skb->sk);
> -	if (unlikely(tls_ctx->netdev != netdev))
> +	if (unlikely(WARN_ON_ONCE(tls_ctx->netdev != netdev)))

Ah, nit: the unlikely is probably unnecessary but that's no big deal.

#define WARN_ON_ONCE(condition) ({			\
	static int __warned;				\
	int __ret_warn_once = !!(condition);		\
							\
	if (unlikely(__ret_warn_once && !__warned)) {	\
		__warned = true;			\
		WARN_ON(1);				\
	}						\
	unlikely(__ret_warn_once);			\
})

>  		goto err_out;
>  
>  	priv_tx = mlx5e_get_ktls_tx_priv_ctx(tls_ctx);


^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox