* RE: [PATCH V4] can: flexcan: implement can Runtime PM
From: Joakim Zhang @ 2019-02-14 9:57 UTC (permalink / raw)
To: mkl@pengutronix.de, linux-can@vger.kernel.org
Cc: wg@grandegger.com, netdev@vger.kernel.org,
linux-kernel@vger.kernel.org, dl-linux-imx, Aisheng Dong
In-Reply-To: <DB7PR04MB4618BFE57BD34A8B7A708E0FE6830@DB7PR04MB4618.eurprd04.prod.outlook.com>
Kindly Ping...
Best Regards,
Joakim Zhang
> -----Original Message-----
> From: Joakim Zhang
> Sent: 2019年1月17日 14:23
> To: mkl@pengutronix.de; linux-can@vger.kernel.org
> Cc: wg@grandegger.com; netdev@vger.kernel.org;
> linux-kernel@vger.kernel.org; dl-linux-imx <linux-imx@nxp.com>; Aisheng
> Dong <aisheng.dong@nxp.com>
> Subject: RE: [PATCH V4] can: flexcan: implement can Runtime PM
>
>
> Kindly Ping...
>
> Best Regards,
> Joakim Zhang
>
> > -----Original Message-----
> > From: Joakim Zhang
> > Sent: 2018年11月30日 16:53
> > To: mkl@pengutronix.de; linux-can@vger.kernel.org
> > Cc: wg@grandegger.com; netdev@vger.kernel.org;
> > linux-kernel@vger.kernel.org; dl-linux-imx <linux-imx@nxp.com>;
> > Aisheng DONG <aisheng.dong@nxp.com>; Joakim Zhang
> > <qiangqing.zhang@nxp.com>
> > Subject: [PATCH V4] can: flexcan: implement can Runtime PM
> >
> > From: Aisheng Dong <aisheng.dong@nxp.com>
> >
> > Flexcan will be disabled during suspend if no wakeup function required
> > and enabled after resume accordingly. During this period, we could
> > explicitly disable clocks.
> > Since PM is optional, the clock is enabled at probe to guarante the
> > clock is running when PM is not enabled in the kernel.
> >
> > Implement Runtime PM which will:
> > 1) Without CONFIG_PM, clock is running whether Flexcan up or down.
> > 2) With CONFIG_PM, clock enabled while Flexcan up and disabled when
> > Flexcan down.
> > 3) Disable clock when do system suspend and enable clock while system
> > resume.
> > 4) Make Power Domain framework be able to shutdown the corresponding
> > power domain of this device.
> >
> > Signed-off-by: Aisheng Dong <aisheng.dong@nxp.com>
> > Signed-off-by: Joakim Zhang <qiangqing.zhang@nxp.com>
> > ---
> > ChangeLog:
> > V1->V2:
> > *rebased on patch "can: flexcan: add self wakeup support".
> > V2->V3:
> > *fix device fails to probe without CONFIG_PM.
> > V3->V4:
> > *runtime pm enable should ahead of registering device.
> > *disable device even if keeping the clocks on.
> > ---
> > drivers/net/can/flexcan.c | 111
> > +++++++++++++++++++++++++-------------
> > 1 file changed, 73 insertions(+), 38 deletions(-)
> >
> > diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c
> > index
> > 0f36eafe3ac1..cad42f20cfe5 100644
> > --- a/drivers/net/can/flexcan.c
> > +++ b/drivers/net/can/flexcan.c
> > @@ -24,6 +24,7 @@
> > #include <linux/of.h>
> > #include <linux/of_device.h>
> > #include <linux/platform_device.h>
> > +#include <linux/pm_runtime.h>
> > #include <linux/regulator/consumer.h> #include <linux/regmap.h>
> >
> > @@ -277,6 +278,7 @@ struct flexcan_priv {
> > u32 reg_imask1_default;
> > u32 reg_imask2_default;
> >
> > + struct device *dev;
> > struct clk *clk_ipg;
> > struct clk *clk_per;
> > const struct flexcan_devtype_data *devtype_data; @@ -444,6 +446,27
> > @@ static inline void flexcan_error_irq_disable(const struct flexcan_priv
> *priv)
> > priv->write(reg_ctrl, ®s->ctrl);
> > }
> >
> > +static int flexcan_clks_enable(const struct flexcan_priv *priv) {
> > + int err;
> > +
> > + err = clk_prepare_enable(priv->clk_ipg);
> > + if (err)
> > + return err;
> > +
> > + err = clk_prepare_enable(priv->clk_per);
> > + if (err)
> > + clk_disable_unprepare(priv->clk_ipg);
> > +
> > + return err;
> > +}
> > +
> > +static void flexcan_clks_disable(const struct flexcan_priv *priv) {
> > + clk_disable_unprepare(priv->clk_ipg);
> > + clk_disable_unprepare(priv->clk_per);
> > +}
> > +
> > static inline int flexcan_transceiver_enable(const struct flexcan_priv *priv)
> {
> > if (!priv->reg_xceiver)
> > @@ -570,19 +593,13 @@ static int flexcan_get_berr_counter(const struct
> > net_device *dev,
> > const struct flexcan_priv *priv = netdev_priv(dev);
> > int err;
> >
> > - err = clk_prepare_enable(priv->clk_ipg);
> > - if (err)
> > + err = pm_runtime_get_sync(priv->dev);
> > + if (err < 0)
> > return err;
> >
> > - err = clk_prepare_enable(priv->clk_per);
> > - if (err)
> > - goto out_disable_ipg;
> > -
> > err = __flexcan_get_berr_counter(dev, bec);
> >
> > - clk_disable_unprepare(priv->clk_per);
> > - out_disable_ipg:
> > - clk_disable_unprepare(priv->clk_ipg);
> > + pm_runtime_put(priv->dev);
> >
> > return err;
> > }
> > @@ -1215,17 +1232,13 @@ static int flexcan_open(struct net_device *dev)
> > struct flexcan_priv *priv = netdev_priv(dev);
> > int err;
> >
> > - err = clk_prepare_enable(priv->clk_ipg);
> > - if (err)
> > + err = pm_runtime_get_sync(priv->dev);
> > + if (err < 0)
> > return err;
> >
> > - err = clk_prepare_enable(priv->clk_per);
> > - if (err)
> > - goto out_disable_ipg;
> > -
> > err = open_candev(dev);
> > if (err)
> > - goto out_disable_per;
> > + goto out_disable_clks;
> >
> > err = request_irq(dev->irq, flexcan_irq, IRQF_SHARED, dev->name, dev);
> > if (err)
> > @@ -1288,10 +1301,8 @@ static int flexcan_open(struct net_device *dev)
> > free_irq(dev->irq, dev);
> > out_close:
> > close_candev(dev);
> > - out_disable_per:
> > - clk_disable_unprepare(priv->clk_per);
> > - out_disable_ipg:
> > - clk_disable_unprepare(priv->clk_ipg);
> > + out_disable_clks:
> > + pm_runtime_put(priv->dev);
> >
> > return err;
> > }
> > @@ -1306,10 +1317,9 @@ static int flexcan_close(struct net_device
> > *dev)
> >
> > can_rx_offload_del(&priv->offload);
> > free_irq(dev->irq, dev);
> > - clk_disable_unprepare(priv->clk_per);
> > - clk_disable_unprepare(priv->clk_ipg);
> >
> > close_candev(dev);
> > + pm_runtime_put(priv->dev);
> >
> > can_led_event(dev, CAN_LED_EVENT_STOP);
> >
> > @@ -1349,18 +1359,14 @@ static int register_flexcandev(struct
> > net_device
> > *dev)
> > struct flexcan_regs __iomem *regs = priv->regs;
> > u32 reg, err;
> >
> > - err = clk_prepare_enable(priv->clk_ipg);
> > + err = flexcan_clks_enable(priv);
> > if (err)
> > return err;
> >
> > - err = clk_prepare_enable(priv->clk_per);
> > - if (err)
> > - goto out_disable_ipg;
> > -
> > /* select "bus clock", chip must be disabled */
> > err = flexcan_chip_disable(priv);
> > if (err)
> > - goto out_disable_per;
> > + goto out_disable_clks;
> > reg = priv->read(®s->ctrl);
> > reg |= FLEXCAN_CTRL_CLK_SRC;
> > priv->write(reg, ®s->ctrl);
> > @@ -1389,14 +1395,13 @@ static int register_flexcandev(struct
> > net_device
> > *dev)
> >
> > err = register_candev(dev);
> >
> > - /* disable core and turn off clocks */
> > - out_chip_disable:
> > flexcan_chip_disable(priv);
> > - out_disable_per:
> > - clk_disable_unprepare(priv->clk_per);
> > - out_disable_ipg:
> > - clk_disable_unprepare(priv->clk_ipg);
> > + return 0;
> >
> > + out_chip_disable:
> > + flexcan_chip_disable(priv);
> > + out_disable_clks:
> > + flexcan_clks_disable(priv);
> > return err;
> > }
> >
> > @@ -1556,6 +1561,7 @@ static int flexcan_probe(struct platform_device
> > *pdev)
> > priv->write = flexcan_write_le;
> > }
> >
> > + priv->dev = &pdev->dev;
> > priv->can.clock.freq = clock_freq;
> > priv->can.bittiming_const = &flexcan_bittiming_const;
> > priv->can.do_set_mode = flexcan_set_mode; @@ -1569,6 +1575,10 @@
> > static int flexcan_probe(struct platform_device *pdev)
> > priv->devtype_data = devtype_data;
> > priv->reg_xceiver = reg_xceiver;
> >
> > + pm_runtime_get_noresume(&pdev->dev);
> > + pm_runtime_set_active(&pdev->dev);
> > + pm_runtime_enable(&pdev->dev);
> > +
> > err = register_flexcandev(dev);
> > if (err) {
> > dev_err(&pdev->dev, "registering netdev failed\n"); @@ -1586,6
> > +1596,7 @@ static int flexcan_probe(struct platform_device *pdev)
> > dev_info(&pdev->dev, "device registered (reg_base=%p, irq=%d)\n",
> > priv->regs, dev->irq);
> >
> > + pm_runtime_put(&pdev->dev);
> > return 0;
> >
> > failed_register:
> > @@ -1598,6 +1609,7 @@ static int flexcan_remove(struct platform_device
> > *pdev)
> > struct net_device *dev = platform_get_drvdata(pdev);
> >
> > unregister_flexcandev(dev);
> > + pm_runtime_disable(&pdev->dev);
> > free_candev(dev);
> >
> > return 0;
> > @@ -1607,7 +1619,7 @@ static int __maybe_unused
> flexcan_suspend(struct
> > device *device) {
> > struct net_device *dev = dev_get_drvdata(device);
> > struct flexcan_priv *priv = netdev_priv(dev);
> > - int err;
> > + int err = 0;
> >
> > if (netif_running(dev)) {
> > /* if wakeup is enabled, enter stop mode @@ -1620,20 +1632,22
> @@
> > static int __maybe_unused flexcan_suspend(struct device *device)
> > err = flexcan_chip_disable(priv);
> > if (err)
> > return err;
> > +
> > + err = pm_runtime_force_suspend(device);
> > }
> > netif_stop_queue(dev);
> > netif_device_detach(dev);
> > }
> > priv->can.state = CAN_STATE_SLEEPING;
> >
> > - return 0;
> > + return err;
> > }
> >
> > static int __maybe_unused flexcan_resume(struct device *device) {
> > struct net_device *dev = dev_get_drvdata(device);
> > struct flexcan_priv *priv = netdev_priv(dev);
> > - int err;
> > + int err = 0;
> >
> > priv->can.state = CAN_STATE_ERROR_ACTIVE;
> > if (netif_running(dev)) {
> > @@ -1642,14 +1656,34 @@ static int __maybe_unused
> > flexcan_resume(struct device *device)
> > if (device_may_wakeup(device)) {
> > disable_irq_wake(dev->irq);
> > } else {
> > - err = flexcan_chip_enable(priv);
> > + err = pm_runtime_force_resume(device);
> > if (err)
> > return err;
> > +
> > + err = flexcan_chip_enable(priv);
> > }
> > }
> > + return err;
> > +}
> > +
> > +static int __maybe_unused flexcan_runtime_suspend(struct device
> > +*device) {
> > + struct net_device *dev = dev_get_drvdata(device);
> > + struct flexcan_priv *priv = netdev_priv(dev);
> > +
> > + flexcan_clks_disable(priv);
> > +
> > return 0;
> > }
> >
> > +static int __maybe_unused flexcan_runtime_resume(struct device
> > +*device) {
> > + struct net_device *dev = dev_get_drvdata(device);
> > + struct flexcan_priv *priv = netdev_priv(dev);
> > +
> > + return flexcan_clks_enable(priv);
> > +}
> > +
> > static int __maybe_unused flexcan_noirq_suspend(struct device *device)
> {
> > struct net_device *dev = dev_get_drvdata(device); @@ -1676,6 +1710,7
> > @@ static int __maybe_unused flexcan_noirq_resume(struct device
> > *device)
> >
> > static const struct dev_pm_ops flexcan_pm_ops = {
> > SET_SYSTEM_SLEEP_PM_OPS(flexcan_suspend, flexcan_resume)
> > + SET_RUNTIME_PM_OPS(flexcan_runtime_suspend,
> > flexcan_runtime_resume,
> > +NULL)
> > SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(flexcan_noirq_suspend,
> > flexcan_noirq_resume) };
> >
> > --
> > 2.17.1
^ permalink raw reply
* RE: [PATCH] can: flexcan: add TX support for variable payload size
From: Joakim Zhang @ 2019-02-14 9:57 UTC (permalink / raw)
To: mkl@pengutronix.de, linux-can@vger.kernel.org
Cc: wg@grandegger.com, netdev@vger.kernel.org,
linux-kernel@vger.kernel.org, dl-linux-imx
In-Reply-To: <DB7PR04MB46185CFC0AE2641366B0A51EE6830@DB7PR04MB4618.eurprd04.prod.outlook.com>
Kindly Ping...
Best Regards,
Joakim Zhang
> -----Original Message-----
> From: Joakim Zhang
> Sent: 2019年1月17日 14:26
> To: mkl@pengutronix.de; linux-can@vger.kernel.org
> Cc: wg@grandegger.com; netdev@vger.kernel.org;
> linux-kernel@vger.kernel.org; dl-linux-imx <linux-imx@nxp.com>
> Subject: RE: [PATCH] can: flexcan: add TX support for variable payload size
>
>
> Kindly Ping...
>
> Best Regards,
> Joakim Zhang
>
> > -----Original Message-----
> > From: Joakim Zhang
> > Sent: 2018年12月12日 14:47
> > To: mkl@pengutronix.de; linux-can@vger.kernel.org
> > Cc: wg@grandegger.com; netdev@vger.kernel.org;
> > linux-kernel@vger.kernel.org; dl-linux-imx <linux-imx@nxp.com>; Joakim
> > Zhang <qiangqing.zhang@nxp.com>
> > Subject: [PATCH] can: flexcan: add TX support for variable payload
> > size
> >
> > Now the FlexCAN driver always use last mailbox for TX, it will work
> > well when MB payload size is 8/16 bytes.
> > TX mailbox would change to 13 when MB payload size is 64 bytes to
> > support CANFD. So we may need to set iflag register to add support for
> > variable payload size.
> >
> > Signed-off-by: Joakim Zhang <qiangqing.zhang@nxp.com>
> > ---
> > drivers/net/can/flexcan.c | 42
> > +++++++++++++++++++++++++++++----------
> > 1 file changed, 32 insertions(+), 10 deletions(-)
> >
> > diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c
> > index
> > 0f36eafe3ac1..13fd085fcf84 100644
> > --- a/drivers/net/can/flexcan.c
> > +++ b/drivers/net/can/flexcan.c
> > @@ -141,7 +141,9 @@
> > #define FLEXCAN_TX_MB_RESERVED_OFF_FIFO 8
> > #define FLEXCAN_TX_MB_RESERVED_OFF_TIMESTAMP 0
> > #define FLEXCAN_RX_MB_OFF_TIMESTAMP_FIRST
> > (FLEXCAN_TX_MB_RESERVED_OFF_TIMESTAMP + 1)
> > -#define FLEXCAN_IFLAG_MB(x) BIT((x) & 0x1f)
> > +#define FLEXCAN_IFLAG1_MB_NUM 32
> > +#define FLEXCAN_IFLAG1_MB(x) BIT(x)
> > +#define FLEXCAN_IFLAG2_MB(x) BIT((x) & 0x1f)
> > #define FLEXCAN_IFLAG_RX_FIFO_OVERFLOW BIT(7)
> > #define FLEXCAN_IFLAG_RX_FIFO_WARN BIT(6)
> > #define FLEXCAN_IFLAG_RX_FIFO_AVAILABLE BIT(5)
> > @@ -822,9 +824,15 @@ static inline u64
> > flexcan_read_reg_iflag_rx(struct flexcan_priv *priv)
> > struct flexcan_regs __iomem *regs = priv->regs;
> > u32 iflag1, iflag2;
> >
> > - iflag2 = priv->read(®s->iflag2) & priv->reg_imask2_default &
> > - ~FLEXCAN_IFLAG_MB(priv->tx_mb_idx);
> > - iflag1 = priv->read(®s->iflag1) & priv->reg_imask1_default;
> > + if (priv->tx_mb_idx >= FLEXCAN_IFLAG1_MB_NUM) {
> > + iflag2 = priv->read(®s->iflag2) & priv->reg_imask2_default &
> > + ~FLEXCAN_IFLAG2_MB(priv->tx_mb_idx);
> > + iflag1 = priv->read(®s->iflag1) & priv->reg_imask1_default;
> > + } else {
> > + iflag2 = priv->read(®s->iflag2) & priv->reg_imask2_default;
> > + iflag1 = priv->read(®s->iflag1) & priv->reg_imask1_default &
> > + ~FLEXCAN_IFLAG1_MB(priv->tx_mb_idx);
> > + }
> >
> > return (u64)iflag2 << 32 | iflag1;
> > }
> > @@ -836,7 +844,8 @@ static irqreturn_t flexcan_irq(int irq, void *dev_id)
> > struct flexcan_priv *priv = netdev_priv(dev);
> > struct flexcan_regs __iomem *regs = priv->regs;
> > irqreturn_t handled = IRQ_NONE;
> > - u32 reg_iflag2, reg_esr;
> > + u32 reg_tx_iflag, tx_iflag_idx, reg_esr;
> > + void __iomem *reg_iflag;
> > enum can_state last_state = priv->can.state;
> >
> > /* reception interrupt */
> > @@ -870,10 +879,18 @@ static irqreturn_t flexcan_irq(int irq, void *dev_id)
> > }
> > }
> >
> > - reg_iflag2 = priv->read(®s->iflag2);
> > + if (priv->tx_mb_idx >= FLEXCAN_IFLAG1_MB_NUM) {
> > + reg_tx_iflag = priv->read(®s->iflag2);
> > + tx_iflag_idx = FLEXCAN_IFLAG2_MB(priv->tx_mb_idx);
> > + reg_iflag = ®s->iflag2;
> > + } else {
> > + reg_tx_iflag = priv->read(®s->iflag1);
> > + tx_iflag_idx = FLEXCAN_IFLAG1_MB(priv->tx_mb_idx);
> > + reg_iflag = ®s->iflag1;
> > + }
> >
> > /* transmission complete interrupt */
> > - if (reg_iflag2 & FLEXCAN_IFLAG_MB(priv->tx_mb_idx)) {
> > + if (reg_tx_iflag & tx_iflag_idx) {
> > u32 reg_ctrl = priv->read(&priv->tx_mb->can_ctrl);
> >
> > handled = IRQ_HANDLED;
> > @@ -885,7 +902,7 @@ static irqreturn_t flexcan_irq(int irq, void *dev_id)
> > /* after sending a RTR frame MB is in RX mode */
> > priv->write(FLEXCAN_MB_CODE_TX_INACTIVE,
> > &priv->tx_mb->can_ctrl);
> > - priv->write(FLEXCAN_IFLAG_MB(priv->tx_mb_idx), ®s->iflag2);
> > + priv->write(tx_iflag_idx, reg_iflag);
> > netif_wake_queue(dev);
> > }
> >
> > @@ -1244,8 +1261,13 @@ static int flexcan_open(struct net_device *dev)
> > priv->tx_mb_idx = priv->mb_count - 1;
> > priv->tx_mb = flexcan_get_mb(priv, priv->tx_mb_idx);
> >
> > - priv->reg_imask1_default = 0;
> > - priv->reg_imask2_default = FLEXCAN_IFLAG_MB(priv->tx_mb_idx);
> > + if (priv->tx_mb_idx >= FLEXCAN_IFLAG1_MB_NUM) {
> > + priv->reg_imask1_default = 0;
> > + priv->reg_imask2_default = FLEXCAN_IFLAG2_MB(priv->tx_mb_idx);
> > + } else {
> > + priv->reg_imask1_default = FLEXCAN_IFLAG1_MB(priv->tx_mb_idx);
> > + priv->reg_imask2_default = 0;
> > + }
> >
> > priv->offload.mailbox_read = flexcan_mailbox_read;
> >
> > --
> > 2.17.1
^ permalink raw reply
* RE: [PATCH 0/3] can: flexcan: add imx8qm support
From: Joakim Zhang @ 2019-02-14 9:58 UTC (permalink / raw)
To: mkl@pengutronix.de, linux-can@vger.kernel.org
Cc: wg@grandegger.com, netdev@vger.kernel.org,
linux-kernel@vger.kernel.org, dl-linux-imx
In-Reply-To: <DB7PR04MB46187B4AABF3607876DA30D4E6830@DB7PR04MB4618.eurprd04.prod.outlook.com>
Kindly Ping...
Best Regards,
Joakim Zhang
> -----Original Message-----
> From: Joakim Zhang
> Sent: 2019年1月17日 14:28
> To: mkl@pengutronix.de; linux-can@vger.kernel.org
> Cc: wg@grandegger.com; netdev@vger.kernel.org;
> linux-kernel@vger.kernel.org; dl-linux-imx <linux-imx@nxp.com>
> Subject: RE: [PATCH 0/3] can: flexcan: add imx8qm support
>
>
> Kindly Ping...
>
> Best Regards,
> Joakim Zhang
>
> > -----Original Message-----
> > From: Joakim Zhang
> > Sent: 2018年12月19日 16:39
> > To: mkl@pengutronix.de; linux-can@vger.kernel.org
> > Cc: wg@grandegger.com; netdev@vger.kernel.org;
> > linux-kernel@vger.kernel.org; dl-linux-imx <linux-imx@nxp.com>; Joakim
> > Zhang <qiangqing.zhang@nxp.com>
> > Subject: [PATCH 0/3] can: flexcan: add imx8qm support
> >
> > This patch set intends to add Flexcan support for i.MX8QM platform
> > which defaultly supports CAN FD protocol. Although BRS enabled by
> > system reset when the driver sets CAN FD mode, now this patch serial
> > does not support BRS in the driver. And then I will send patch to
> > enable BRS by changing the register to set bit timing.
> >
> > Dong Aisheng (3):
> > can: rx-offload: add CANFD support based on offload
> > can: flexcan: add CAN FD mode support
> > can: flexcan: add imx8qm support
> >
> > drivers/net/can/flexcan.c | 114
> ++++++++++++++++++++++++++++++---
> > drivers/net/can/rx-offload.c | 16 +++--
> > include/linux/can/rx-offload.h | 4 +-
> > 3 files changed, 117 insertions(+), 17 deletions(-)
> >
> > --
> > 2.17.1
^ permalink raw reply
* Re: [PATCH] net: phy: at803x: disable delay only for RGMII mode
From: Peter Ujfalusi @ 2019-02-14 10:49 UTC (permalink / raw)
To: Niklas Cassel, Marc Gonzalez
Cc: Andrew Lunn, Florian Fainelli, Vinod Koul, David S Miller,
linux-arm-msm, Bjorn Andersson, netdev, Nori, Sekhar
In-Reply-To: <20190213174034.GA6954@centauri.lan>
Hi Niklas,
On 13/02/2019 19.40, Niklas Cassel wrote:
> On Wed, Feb 13, 2019 at 02:40:18PM +0100, Marc Gonzalez wrote:
>> On 13/02/2019 14:29, Andrew Lunn wrote:
>>
>>>> So we have these modes:
>>>>
>>>> PHY_INTERFACE_MODE_RGMII: TX and RX delays disabled
>>>> PHY_INTERFACE_MODE_RGMII_ID: TX and RX delays enabled
>>>> PHY_INTERFACE_MODE_RGMII_RXID: RX delay enabled, TX delay disabled
>>>> PHY_INTERFACE_MODE_RGMII_TXID: TX delay enabled, RX delay disabled
>>>>
>>>> What I don't like with this patch, is that if we specify phy-mode
>>>> PHY_INTERFACE_MODE_RGMII_TXID, this patch will enable TX delay,
>>>> but RX delay will not be explicitly set.
>>>
>>> That is not the behaviour we want. It is best to assume the device is
>>> in a random state, and correctly enable/disable all delays as
>>> requested. Only leave the hardware alone if PHY_INTERFACE_MODE_NA is
>>> used.
>>
>> That's what my patch did:
>> https://www.spinics.net/lists/netdev/msg445053.html
>>
>> But see Florian's remarks:
>> https://www.spinics.net/lists/netdev/msg445133.html
>
> Hello Marc,
>
> I saw that comment from Florian. However that was way back in 2017.
> Maybe the phy-modes were not as well defined back then?
>
> Andrew recently suggested to fix the driver so that it conforms with the
> phy-modes, and fix any SoC that specified an incorrect phy-mode in DT
> and thus relied upon the broken behavior of the PHY driver:
> https://www.spinics.net/lists/netdev/msg445133.html
>
>
> So, I've rebased your old patch, see attachment.
> I suggest that Peter test it on am335x-evm.
with the patch + s/rgmii-txid/rgmii-id in the am335x-evmsk.dts ethernet
is working.
I don't have am335x-evm to test, but it has the same PHY as evmsk.
> am335x-evm appears to rely on the current broken behavior of the PHY
> driver, so we will probably need to fix the am335x-evm according to this:
> https://www.spinics.net/lists/netdev/msg445117.html
> and merge that as well.
>
>
> Andrew, Florian, do you both agree?
>
>
> Kind regards,
> Niklas
>
- Péter
Texas Instruments Finland Oy, Porkkalankatu 22, 00180 Helsinki.
Y-tunnus/Business ID: 0615521-4. Kotipaikka/Domicile: Helsinki
^ permalink raw reply
* Re: [PATCH net-next 1/9] Documentation: networking: switchdev: Update port parent ID section
From: Ido Schimmel @ 2019-02-14 10:58 UTC (permalink / raw)
To: Florian Fainelli
Cc: netdev@vger.kernel.org, David S. Miller, open list,
open list:STAGING SUBSYSTEM, moderated list:ETHERNET BRIDGE,
Jiri Pirko, andrew@lunn.ch, vivien.didelot@gmail.com
In-Reply-To: <20190213220638.1552-2-f.fainelli@gmail.com>
On Wed, Feb 13, 2019 at 02:06:30PM -0800, Florian Fainelli wrote:
> Update the section about switchdev drivers having to implement a
> switchdev_port_attr_get() function to return
> SWITCHDEV_ATTR_ID_PORT_PARENT_ID since that is no longer valid after
> commit bccb30254a4a ("net: Get rid of
> SWITCHDEV_ATTR_ID_PORT_PARENT_ID").
>
> Fixes: bccb30254a4a ("net: Get rid of SWITCHDEV_ATTR_ID_PORT_PARENT_ID")
> Acked-by: Jiri Pirko <jiri@mellanox.com>
> Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
^ permalink raw reply
* Re: [PATCH net-next] net: ip6_gre: Give ERSPAN a fill_info link op of its own
From: Lorenzo Bianconi @ 2019-02-14 11:10 UTC (permalink / raw)
To: Petr Machata
Cc: netdev@vger.kernel.org, davem@davemloft.net, kuznet@ms2.inr.ac.ru,
yoshfuji@linux-ipv6.org, lucien.xin
In-Reply-To: <c14a9085e87ca9e36ba7f5feea46e5750a5baeeb.1550086179.git.petrm@mellanox.com>
> In commit c706863bc890 ("net: ip6_gre: always reports o_key to
> userspace"), ip6gre and ip6gretap tunnels started reporting a TUNNEL_KEY
> output flag even if one was not configured at the device.
>
> When an okey-less ip6gre or ip6gretap netdevice is created, it initially
> encapsulates the packets without okey. But any configuration change
> (even a non-change such as setting TOS to an already-configured value)
> then causes the okey flag from the reported configuration to be
> circulated back to actual configuration. From that point on, the device
> encapsulates packets with output key of 0.
>
> The intention was to implement this behavior for ERSPAN devices, not for
> all ip6gre devices. The ERSPAN netdevice should really have its own
> fill_info callback. Add one.
Hi Petr,
I was assuming erspan_ver is set just for erspan tunnels. In particular I guess
the issue is due to the default erspan_ver configuration done in
ip6gre_netlink_parms (commit 84581bdae9587).
What about adding a routine to set erspan_ver and moving it in
ip6erspan_newlink/ip6erspan_changelink? In this way erspan_ver will be
defined just for erspan tunnels.
Moreover do we have a similar issue for IFLA_GRE_ERSPAN_INDEX in
ip6gre_fill_info?
Something like:
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 65a4f96dc462..bb525abd860e 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1719,6 +1719,24 @@ static int ip6erspan_tap_validate(struct nlattr *tb[], struct nlattr *data[],
return 0;
}
+static void ip6erspan_set_version(struct nlattr *data[],
+ struct __ip6_tnl_parm *parms)
+{
+ parms->erspan_ver = 1;
+ if (data[IFLA_GRE_ERSPAN_VER])
+ parms->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
+
+ if (parms->erspan_ver == 1) {
+ if (data[IFLA_GRE_ERSPAN_INDEX])
+ parms->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
+ } else if (parms->erspan_ver == 2) {
+ if (data[IFLA_GRE_ERSPAN_DIR])
+ parms->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]);
+ if (data[IFLA_GRE_ERSPAN_HWID])
+ parms->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]);
+ }
+}
+
static void ip6gre_netlink_parms(struct nlattr *data[],
struct __ip6_tnl_parm *parms)
{
@@ -1767,20 +1785,6 @@ static void ip6gre_netlink_parms(struct nlattr *data[],
if (data[IFLA_GRE_COLLECT_METADATA])
parms->collect_md = true;
-
- parms->erspan_ver = 1;
- if (data[IFLA_GRE_ERSPAN_VER])
- parms->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
-
- if (parms->erspan_ver == 1) {
- if (data[IFLA_GRE_ERSPAN_INDEX])
- parms->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
- } else if (parms->erspan_ver == 2) {
- if (data[IFLA_GRE_ERSPAN_DIR])
- parms->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]);
- if (data[IFLA_GRE_ERSPAN_HWID])
- parms->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]);
- }
}
static int ip6gre_tap_init(struct net_device *dev)
@@ -2203,6 +2207,7 @@ static int ip6erspan_newlink(struct net *src_net, struct net_device *dev,
int err;
ip6gre_netlink_parms(data, &nt->parms);
+ ip6erspan_set_version(data, &nt->parms);
ign = net_generic(net, ip6gre_net_id);
if (nt->parms.collect_md) {
@@ -2248,6 +2253,7 @@ static int ip6erspan_changelink(struct net_device *dev, struct nlattr *tb[],
if (IS_ERR(t))
return PTR_ERR(t);
+ ip6erspan_set_version(data, &p);
ip6gre_tunnel_unlink_md(ign, t);
ip6gre_tunnel_unlink(ign, t);
ip6erspan_tnl_change(t, &p, !tb[IFLA_MTU]);
Does it fix reported issue?
Regards,
Lorenzo
>
> Fixes: c706863bc890 ("net: ip6_gre: always reports o_key to userspace")
> CC: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
> Signed-off-by: Petr Machata <petrm@mellanox.com>
> ---
> net/ipv6/ip6_gre.c | 31 ++++++++++++++++++++++++-------
> 1 file changed, 24 insertions(+), 7 deletions(-)
>
> diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
> index 65a4f96dc462..0a6087cffe54 100644
> --- a/net/ipv6/ip6_gre.c
> +++ b/net/ipv6/ip6_gre.c
> @@ -2094,15 +2094,13 @@ static size_t ip6gre_get_size(const struct net_device *dev)
> 0;
> }
>
> -static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev)
> +static int __ip6gre_fill_info(struct sk_buff *skb,
> + const struct net_device *dev,
> + __be16 base_o_flags)
> {
> struct ip6_tnl *t = netdev_priv(dev);
> struct __ip6_tnl_parm *p = &t->parms;
> - __be16 o_flags = p->o_flags;
> -
> - if ((p->erspan_ver == 1 || p->erspan_ver == 2) &&
> - !p->collect_md)
> - o_flags |= TUNNEL_KEY;
> + __be16 o_flags = p->o_flags | base_o_flags;
>
> if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
> nla_put_be16(skb, IFLA_GRE_IFLAGS,
> @@ -2155,6 +2153,11 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev)
> return -EMSGSIZE;
> }
>
> +static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev)
> +{
> + return __ip6gre_fill_info(skb, dev, 0);
> +}
> +
> static const struct nla_policy ip6gre_policy[IFLA_GRE_MAX + 1] = {
> [IFLA_GRE_LINK] = { .type = NLA_U32 },
> [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
> @@ -2256,6 +2259,20 @@ static int ip6erspan_changelink(struct net_device *dev, struct nlattr *tb[],
> return 0;
> }
>
> +static int ip6erspan_fill_info(struct sk_buff *skb,
> + const struct net_device *dev)
> +{
> + struct ip6_tnl *t = netdev_priv(dev);
> + struct __ip6_tnl_parm *p = &t->parms;
> + __be16 base_o_flags = 0;
> +
> + if ((p->erspan_ver == 1 || p->erspan_ver == 2) &&
> + !p->collect_md)
> + base_o_flags |= TUNNEL_KEY;
> +
> + return __ip6gre_fill_info(skb, dev, base_o_flags);
> +}
> +
> static struct rtnl_link_ops ip6gre_link_ops __read_mostly = {
> .kind = "ip6gre",
> .maxtype = IFLA_GRE_MAX,
> @@ -2295,7 +2312,7 @@ static struct rtnl_link_ops ip6erspan_tap_ops __read_mostly = {
> .newlink = ip6erspan_newlink,
> .changelink = ip6erspan_changelink,
> .get_size = ip6gre_get_size,
> - .fill_info = ip6gre_fill_info,
> + .fill_info = ip6erspan_fill_info,
> .get_link_net = ip6_tnl_get_link_net,
> };
>
> --
> 2.4.11
>
^ permalink raw reply related
* Re: [RFC PATCH] net act_vlan: use correct len in skb_pull
From: Zahari Doychev @ 2019-02-14 11:54 UTC (permalink / raw)
To: Toshiaki Makita
Cc: netdev, bridge, nikolay, roopa, jhs, jiri, xiyou.wangcong,
johannes
In-Reply-To: <19f5bdf9-c83b-c9ec-83c3-1e9a88763b30@lab.ntt.co.jp>
On Thu, Feb 14, 2019 at 06:16:12PM +0900, Toshiaki Makita wrote:
> On 2019/02/14 4:51, Zahari Doychev wrote:
> > The bridge and VLAN code expects that skb->data points to the start of the
> > VLAN header instead of the next (network) header. Currently after
> > tcf_vlan_act() on ingress filter skb->data points to the next network
> > header. In this case the Linux bridge does not forward correctly double
> > tagged VLAN packets added using tc vlan action as the outer vlan tag from
> > the skb is inserted at the wrong offset after the vlan tag in the payload.
> > Making skb->data to point to the VLAN header in tcf_vlan_act() by using
> > ETH_HLEN in skb_pull_rcsum() fixes the problem.
> >
> > The following commands were used for testing:
> >
> > ip link add name br0 type bridge vlan_filtering 1
> > ip link set dev br0 up
> >
> > ip link set dev net0 up
> > ip link set dev net0 master br0
> >
> > ip link set dev net1 up
> > ip link set dev net1 master br0
> >
> > bridge vlan add dev net0 vid 100 master
> > bridge vlan add dev br0 vid 100 self
> > bridge vlan add dev net1 vid 100 master
> >
> > tc qdisc add dev net0 handle ffff: clsact
> > tc qdisc add dev net1 handle ffff: clsact
> >
> > tc filter add dev net0 ingress pref 1 protocol all flower \
> > action vlan push id 10 pipe action vlan push id 100
> >
> > tc filter add dev net0 egress pref 1 protocol 802.1q flower \
> > vlan_id 100 vlan_ethtype 802.1q cvlan_id 10 \
> > action vlan pop pipe action vlan pop
> >
> > Signed-off-by: Zahari Doychev <zahari.doychev@linux.com>
> > ---
> > net/sched/act_vlan.c | 2 +-
> > 1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
> > index 93fdaf707313..308d7d89f925 100644
> > --- a/net/sched/act_vlan.c
> > +++ b/net/sched/act_vlan.c
> > @@ -86,7 +86,7 @@ static int tcf_vlan_act(struct sk_buff *skb, const struct tc_action *a,
> >
> > out:
> > if (skb_at_tc_ingress(skb))
> > - skb_pull_rcsum(skb, skb->mac_len);
> > + skb_pull_rcsum(skb, ETH_HLEN);
>
> As I said before, it would be safer to remember mac_len and use it later.
>
> __u16 mac_len = skb->mac_len;
> ...
> err = skb_vlan_push(...)
> ...
> if (skb_at_tc_ingress(skb))
> skb_pull_rcsum(skb, mac_len);
>
sorry, I misunderstood it. I will send an update.
Zahari
>
> --
> Toshiaki Makita
>
^ permalink raw reply
* Re: [PATCH net-next 7/9] net: bridge: Stop calling switchdev_port_attr_get()
From: Ido Schimmel @ 2019-02-14 11:20 UTC (permalink / raw)
To: Florian Fainelli
Cc: netdev@vger.kernel.org, David S. Miller, open list,
open list:STAGING SUBSYSTEM, moderated list:ETHERNET BRIDGE,
Jiri Pirko, andrew@lunn.ch, vivien.didelot@gmail.com
In-Reply-To: <20190213220638.1552-8-f.fainelli@gmail.com>
On Wed, Feb 13, 2019 at 02:06:36PM -0800, Florian Fainelli wrote:
> Now that all switchdev drivers have been converted to checking the
> bridge port flags during the prepare phase of the
> switchdev_port_attr_set() when the process
> SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS, we can avoid calling
> switchdev_port_attr_get() with
> SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT.
>
> Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
> ---
> net/bridge/br_switchdev.c | 16 +++++++---------
> 1 file changed, 7 insertions(+), 9 deletions(-)
>
> diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
> index db9e8ab96d48..8f88f8a1a7fa 100644
> --- a/net/bridge/br_switchdev.c
> +++ b/net/bridge/br_switchdev.c
> @@ -64,29 +64,27 @@ int br_switchdev_set_port_flag(struct net_bridge_port *p,
> {
> struct switchdev_attr attr = {
> .orig_dev = p->dev,
> - .id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT,
> + .id = SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS,
> + .u.brport_flags = flags,
> };
> int err;
>
> if (mask & ~BR_PORT_FLAGS_HW_OFFLOAD)
> return 0;
>
> - err = switchdev_port_attr_get(p->dev, &attr);
> - if (err == -EOPNOTSUPP)
> - return 0;
> - if (err)
> + err = switchdev_port_attr_set(p->dev, &attr);
> + if (err && err != -EOPNOTSUPP)
> return err;
>
> - /* Check if specific bridge flag attribute offload is supported */
> - if (!(attr.u.brport_flags_support & mask)) {
> + if (err == -EOPNOTSUPP) {
> br_warn(p->br, "bridge flag offload is not supported %u(%s)\n",
> (unsigned int)p->port_no, p->dev->name);
> - return -EOPNOTSUPP;
> + return err;
> }
I see that you return -EOPNOTSUPP from drivers in case of unsupported
flags. I believe this is problematic (I'll test soon). The same return
code is used by:
1. Switch drivers to indicate unsupported flags
2. switchdev code to indicate unsupported netdev (no switchdev ops)
I guess that with this patch any attempt to set bridge port flags on
veth/dummy device will result in an error.
>
> attr.id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS;
> attr.flags = SWITCHDEV_F_DEFER;
> - attr.u.brport_flags = flags;
> +
> err = switchdev_port_attr_set(p->dev, &attr);
> if (err) {
> br_warn(p->br, "error setting offload flag on port %u(%s)\n",
> --
> 2.17.1
>
^ permalink raw reply
* Re: [PATCH net-next 8/9] net: Remove SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT
From: Ido Schimmel @ 2019-02-14 11:27 UTC (permalink / raw)
To: Florian Fainelli
Cc: netdev@vger.kernel.org, David S. Miller, open list,
open list:STAGING SUBSYSTEM, moderated list:ETHERNET BRIDGE,
Jiri Pirko, andrew@lunn.ch, vivien.didelot@gmail.com
In-Reply-To: <20190213220638.1552-9-f.fainelli@gmail.com>
On Wed, Feb 13, 2019 at 02:06:37PM -0800, Florian Fainelli wrote:
> Now that we have converted the bridge code and the drivers to check for
> bridge port(s) flags at the time we try to set them, there is no need
> for a get() -> set() sequence anymore and
> SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT therefore becomes unused.
>
> Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
^ permalink raw reply
* Re: [PATCH net-next 9/9] net: Get rid of switchdev_port_attr_get()
From: Ido Schimmel @ 2019-02-14 12:10 UTC (permalink / raw)
To: Florian Fainelli
Cc: netdev@vger.kernel.org, David S. Miller, open list,
open list:STAGING SUBSYSTEM, moderated list:ETHERNET BRIDGE,
Jiri Pirko, andrew@lunn.ch, vivien.didelot@gmail.com
In-Reply-To: <20190213220638.1552-10-f.fainelli@gmail.com>
On Wed, Feb 13, 2019 at 02:06:38PM -0800, Florian Fainelli wrote:
> With the bridge no longer calling switchdev_port_attr_get() to obtain
> the supported bridge port flags from a driver but instead trying to set
> the bridge port flags directly and relying on driver to reject
> unsupported configurations, we can effectively get rid of
> switchdev_port_attr_get() entirely since this was the only place where
> it was called.
>
> Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
> ---
> Documentation/networking/switchdev.txt | 5 ++---
> drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c | 7 -------
> drivers/net/ethernet/rocker/rocker_main.c | 7 -------
> drivers/staging/fsl-dpaa2/ethsw/ethsw.c | 7 -------
> include/net/switchdev.h | 8 --------
> net/dsa/slave.c | 7 -------
> 6 files changed, 2 insertions(+), 39 deletions(-)
>
> diff --git a/Documentation/networking/switchdev.txt b/Documentation/networking/switchdev.txt
> index ea90243340a9..327afe754230 100644
> --- a/Documentation/networking/switchdev.txt
> +++ b/Documentation/networking/switchdev.txt
> @@ -233,9 +233,8 @@ the bridge's FDB. It's possible, but not optimal, to enable learning on the
> device port and on the bridge port, and disable learning_sync.
>
> To support learning and learning_sync port attributes, the driver implements
> -switchdev op switchdev_port_attr_get/set for
> -SWITCHDEV_ATTR_PORT_ID_BRIDGE_FLAGS. The driver should initialize the attributes
> -to the hardware defaults.
> +switchdev op switchdev_port_attr_set for SWITCHDEV_ATTR_PORT_ID_BRIDGE_FLAGS.
> +The driver should initialize the attributes to the hardware defaults.
Last sentence is not relevant anymore. learning_sync can also be dropped
^ permalink raw reply
* [rdma-rc PATCH 0/2] iw_cxgb4: Adjust the cq/qp mask
From: Raju Rangoju @ 2019-02-14 12:10 UTC (permalink / raw)
To: jgg, davem, linux-rdma; +Cc: netdev, swise, rajur
Export the LLD sge_host_page_size field to ULDs via
cxgb4_lld_info, so that iw_cxgb4 can adjust the cq/qp
mask based on no.of bar2 pages in a host page.
This series has both net(cxgb4) and rdma(iw_cxgb4) changes,
and I would request this merge via rdma repo.
I have made sure this series applies cleanly on both net-next
and rdma-for-rc and doesn't cause any merge conflicts.
Raju Rangoju (2):
cxgb4: export sge_host_page_size to ulds
iw_cxgb4: cq/qp mask depends on bar2 pages in a host page
drivers/infiniband/hw/cxgb4/device.c | 15 +++++++++++++--
drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c | 1 +
drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h | 1 +
3 files changed, 15 insertions(+), 2 deletions(-)
--
2.13.0
^ permalink raw reply
* [rdma-rc PATCH 1/2] cxgb4: export sge_host_page_size to ulds
From: Raju Rangoju @ 2019-02-14 12:10 UTC (permalink / raw)
To: jgg, davem, linux-rdma; +Cc: netdev, swise, rajur
In-Reply-To: <20190214121054.11693-1-rajur@chelsio.com>
Export the sge_host_page_size field to ULDs via
cxgb4_lld_info, so that iw_cxgb4 can make use of
this in calculating the correct qp/cq mask.
Fixes: 2391b0030e ("cxgb4: Remove SGE_HOST_PAGE_SIZE dependency on page
size")
Signed-off-by: Raju Rangoju <rajur@chelsio.com>
---
drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c | 1 +
drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h | 1 +
2 files changed, 2 insertions(+)
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
index c041f44324db..b3654598a2d5 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
@@ -660,6 +660,7 @@ static void uld_init(struct adapter *adap, struct cxgb4_lld_info *lld)
lld->cclk_ps = 1000000000 / adap->params.vpd.cclk;
lld->udb_density = 1 << adap->params.sge.eq_qpp;
lld->ucq_density = 1 << adap->params.sge.iq_qpp;
+ lld->sge_host_page_size = 1 << (adap->params.sge.hps + 10);
lld->filt_mode = adap->params.tp.vlan_pri_map;
/* MODQ_REQ_MAP sets queues 0-3 to chan 0-3 */
for (i = 0; i < NCHAN; i++)
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
index 5fa9a2d5fc4b..21da34a4ca24 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
@@ -336,6 +336,7 @@ struct cxgb4_lld_info {
unsigned int cclk_ps; /* Core clock period in psec */
unsigned short udb_density; /* # of user DB/page */
unsigned short ucq_density; /* # of user CQs/page */
+ unsigned int sge_host_page_size; /* SGE host page size */
unsigned short filt_mode; /* filter optional components */
unsigned short tx_modq[NCHAN]; /* maps each tx channel to a */
/* scheduler queue */
--
2.13.0
^ permalink raw reply related
* [rdma-rc PATCH 2/2] iw_cxgb4: cq/qp mask depends on bar2 pages in a host page
From: Raju Rangoju @ 2019-02-14 12:10 UTC (permalink / raw)
To: jgg, davem, linux-rdma; +Cc: netdev, swise, rajur
In-Reply-To: <20190214121054.11693-1-rajur@chelsio.com>
Adjust the cq/qp mask based on no.of bar2 pages in a host page.
For user-mode rdma, the granularity of the BAR2 memory mapped
to a user rdma process during queue allocation must be based
on the host page size. The lld attributes udb_density and
ucq_density are used to figure out how many sge contexts are
in a bar2 page. So the rdev->qpmask and rdev->cqmask in
iw_cxgb4 need to now be adjusted based on how many sge bar2
pages are in a host page.
Fixes: 2391b0030e ("cxgb4: Remove SGE_HOST_PAGE_SIZE dependency on page size")
Signed-off-by: Raju Rangoju <rajur@chelsio.com>
---
drivers/infiniband/hw/cxgb4/device.c | 15 +++++++++++++--
1 file changed, 13 insertions(+), 2 deletions(-)
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index c13c0ba30f63..d499cd61c0e8 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -783,6 +783,7 @@ void c4iw_init_dev_ucontext(struct c4iw_rdev *rdev,
static int c4iw_rdev_open(struct c4iw_rdev *rdev)
{
int err;
+ unsigned int factor;
c4iw_init_dev_ucontext(rdev, &rdev->uctx);
@@ -806,8 +807,18 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
return -EINVAL;
}
- rdev->qpmask = rdev->lldi.udb_density - 1;
- rdev->cqmask = rdev->lldi.ucq_density - 1;
+ /* This implementation requires a sge_host_page_size <= PAGE_SIZE. */
+ if (rdev->lldi.sge_host_page_size > PAGE_SIZE) {
+ pr_err("%s: unsupported sge host page size %u\n",
+ pci_name(rdev->lldi.pdev),
+ rdev->lldi.sge_host_page_size);
+ return -EINVAL;
+ }
+
+ factor = PAGE_SIZE / rdev->lldi.sge_host_page_size;
+ rdev->qpmask = (rdev->lldi.udb_density * factor) - 1;
+ rdev->cqmask = (rdev->lldi.ucq_density * factor) - 1;
+
pr_debug("dev %s stag start 0x%0x size 0x%0x num stags %d pbl start 0x%0x size 0x%0x rq start 0x%0x size 0x%0x qp qid start %u size %u cq qid start %u size %u srq size %u\n",
pci_name(rdev->lldi.pdev), rdev->lldi.vr->stag.start,
rdev->lldi.vr->stag.size, c4iw_num_stags(rdev),
--
2.13.0
^ permalink raw reply related
* Re: [PATCH 5/9] perf, bpf: save bpf_prog_info in a rbtree in perf_env
From: Jiri Olsa @ 2019-02-14 12:26 UTC (permalink / raw)
To: Song Liu; +Cc: netdev, linux-kernel, ast, daniel, kernel-team, peterz, acme
In-Reply-To: <20190209011705.2160185-6-songliubraving@fb.com>
On Fri, Feb 08, 2019 at 05:17:01PM -0800, Song Liu wrote:
SNIP
> diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
> index d01b8355f4ca..5894a177b7cf 100644
> --- a/tools/perf/util/env.h
> +++ b/tools/perf/util/env.h
> @@ -3,7 +3,10 @@
> #define __PERF_ENV_H
>
> #include <linux/types.h>
> +#include <linux/rbtree.h>
> #include "cpumap.h"
> +#include "rwsem.h"
> +#include "bpf-event.h"
>
> struct cpu_topology_map {
> int socket_id;
> @@ -64,6 +67,8 @@ struct perf_env {
> struct memory_node *memory_nodes;
> unsigned long long memory_bsize;
> u64 clockid_res_ns;
> + struct rw_semaphore bpf_info_lock;
why's the lock needed?
jirka
^ permalink raw reply
* Re: [PATCH 5/9] perf, bpf: save bpf_prog_info in a rbtree in perf_env
From: Jiri Olsa @ 2019-02-14 12:33 UTC (permalink / raw)
To: Song Liu; +Cc: netdev, linux-kernel, ast, daniel, kernel-team, peterz, acme
In-Reply-To: <20190209011705.2160185-6-songliubraving@fb.com>
On Fri, Feb 08, 2019 at 05:17:01PM -0800, Song Liu wrote:
> bpf_prog_info contains information necessary to annotate bpf programs.
> This patch saves bpf_prog_info for bpf programs loaded in the system.
>
> perf-record saves bpf_prog_info information as headers to perf.data.
> A new header type HEADER_BPF_PROG_INFO is introduced for this data.
please move those 2 changes into separate patches then
it's hard to make comments when I don't see the rest of
the patches on the list please resend the patchset
thanks,
jirka
^ permalink raw reply
* Re: [PATCH] net: phy: at803x: disable delay only for RGMII mode
From: Niklas Cassel @ 2019-02-14 12:39 UTC (permalink / raw)
To: Peter Ujfalusi
Cc: Marc Gonzalez, Andrew Lunn, Florian Fainelli, Vinod Koul,
David S Miller, linux-arm-msm, Bjorn Andersson, netdev,
Nori, Sekhar
In-Reply-To: <3356ff05-8d08-591e-03bf-9d846f79097b@ti.com>
On Thu, Feb 14, 2019 at 12:49:36PM +0200, Peter Ujfalusi wrote:
> Hi Niklas,
>
> On 13/02/2019 19.40, Niklas Cassel wrote:
> > On Wed, Feb 13, 2019 at 02:40:18PM +0100, Marc Gonzalez wrote:
> >> On 13/02/2019 14:29, Andrew Lunn wrote:
> >>
> >>>> So we have these modes:
> >>>>
> >>>> PHY_INTERFACE_MODE_RGMII: TX and RX delays disabled
> >>>> PHY_INTERFACE_MODE_RGMII_ID: TX and RX delays enabled
> >>>> PHY_INTERFACE_MODE_RGMII_RXID: RX delay enabled, TX delay disabled
> >>>> PHY_INTERFACE_MODE_RGMII_TXID: TX delay enabled, RX delay disabled
> >>>>
> >>>> What I don't like with this patch, is that if we specify phy-mode
> >>>> PHY_INTERFACE_MODE_RGMII_TXID, this patch will enable TX delay,
> >>>> but RX delay will not be explicitly set.
> >>>
> >>> That is not the behaviour we want. It is best to assume the device is
> >>> in a random state, and correctly enable/disable all delays as
> >>> requested. Only leave the hardware alone if PHY_INTERFACE_MODE_NA is
> >>> used.
> >>
> >> That's what my patch did:
> >> https://www.spinics.net/lists/netdev/msg445053.html
> >>
> >> But see Florian's remarks:
> >> https://www.spinics.net/lists/netdev/msg445133.html
> >
> > Hello Marc,
> >
> > I saw that comment from Florian. However that was way back in 2017.
> > Maybe the phy-modes were not as well defined back then?
> >
> > Andrew recently suggested to fix the driver so that it conforms with the
> > phy-modes, and fix any SoC that specified an incorrect phy-mode in DT
> > and thus relied upon the broken behavior of the PHY driver:
> > https://www.spinics.net/lists/netdev/msg445133.html
> >
> >
> > So, I've rebased your old patch, see attachment.
> > I suggest that Peter test it on am335x-evm.
>
> with the patch + s/rgmii-txid/rgmii-id in the am335x-evmsk.dts ethernet
> is working.
> I don't have am335x-evm to test, but it has the same PHY as evmsk.
>
Florian's concern was that this PHY driver looked at "phy-mode" from the
perspective of the MAC rather than the PHY.
However, if s/rgmii-txid/rgmii-id is the correct fix for am335x-evm,
then this means that this PHY driver was just broken.
If the driver had misinterpreted the perspective, then the correct
fix for am335x-evm would have been s/rgmii-txid/rgmii-rxid.
So considering that this driver seems to be really broken
(rather then just inverted perspective),
perhaps we can merge the patch I attached in my previous email after all?
(Together with a s/rgmii-txid/rgmii-id in the am335x-evmsk.dts.)
Kind regards,
Niklas
> > am335x-evm appears to rely on the current broken behavior of the PHY
> > driver, so we will probably need to fix the am335x-evm according to this:
> > https://www.spinics.net/lists/netdev/msg445117.html
> > and merge that as well.
> >
> >
> > Andrew, Florian, do you both agree?
> >
> >
> > Kind regards,
> > Niklas
> >
>
> - Péter
>
> Texas Instruments Finland Oy, Porkkalankatu 22, 00180 Helsinki.
> Y-tunnus/Business ID: 0615521-4. Kotipaikka/Domicile: Helsinki
^ permalink raw reply
* Re: TC stats / hw offload question
From: Jamal Hadi Salim @ 2019-02-14 12:39 UTC (permalink / raw)
To: Edward Cree, netdev
Cc: Jiri Pirko, Cong Wang, Or Gerlitz, Andy Gospodarek, PJ Waskiewicz,
Anjali Singhai Jain, Jakub Kicinski
In-Reply-To: <11ab2dc0-ec39-18cd-d170-0d5f954198b9@solarflare.com>
On 2019-02-11 6:44 a.m., Edward Cree wrote:
>> Hasnt been necessary thus far.
>> Is your end goal to match and count?
> My end goal is to implement TC offload in some hw we're designing
> here at Solarflare. So I'm trying to determine what hardware is
> expected/required to do.
> It might be possible to design our new hw so that we can attach a
> counter to every action, if that's what TC wants.
It makes sense to have a counter on every action - even if it is
for debugging purposes. The two most basic actions are "drop" or
"accept". In TC speak the default action is "classid x:y" which
typically is to select a queue or give the flow some identity
(one should be able to use the same action on h/w ingress as well
to select a rx DMA ring for example, but that seems uncommon).
Note, your counters should also be shareable; example, count all
the drops in one counter across multiple flows as in the following
case where counter index 1 is used.
tc flower match foo action drop index 1
tc flower match bar action drop index 1
> But since the
> other vendors don't seem to do that, I wondered if there was a
> reason, or if perhaps the counter resources (and PCI bw to read
> them) could be saved if all those separate counters aren't really
> needed.
Probably nobody has paid attention or asked as you did.
Will let the h/w folks speak for themselves. My understanding
based on experience is counters are cheap. Most modern NICs
and ASICs have a gazillion of them at their disposal.
> Right now the design we are considering would only count
> packets as-matched, i.e. before any edits. That's fine for encap
> — you can calculate the bytes correction in SW — but not for decap
> since in principle the length of the RXed outer headers could
> vary (e.g. you might have IP options there).
>
ok, so not much in terms of other types of actions.
But for abstraction sake maybe use "flowid x:y" and have counters
associated with that. Or even make this optional and only attach
a counter if someone says "action ok" and allow them to specify
the counter index (assuming you architecture has an indexed table
of counters).
cheers,
jamal
^ permalink raw reply
* [PATCH net-next v2 0/2]cxgb4/cxgb4vfSupport for SGE doorbell queue timer
From: Vishal Kulkarni @ 2019-02-14 12:49 UTC (permalink / raw)
To: netdev, davem; +Cc: nirranjan, indranil, dt, Vishal Kulkarni
This series of patchs add SGE doorbell queue timer for faster DMA completions.
Patch 1 Implements SGE doorbell queue timer
Patch 2 Adds ethtool capability to set/get SGE doorbell queue timer tick
---
v2
- Reverse christmas tree formatting for local variables.
---
Vishal Kulkarni (2):
cxgb4/cxgb4vf: Add support for SGE doorbell queue timer
cxgb4: Add capability to get/set SGE Doorbell Queue Timer Tick
drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 11 +-
drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c | 189 +++++++++++-
drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 29 +-
drivers/net/ethernet/chelsio/cxgb4/sge.c | 322 +++++++++++++++++----
drivers/net/ethernet/chelsio/cxgb4/t4_hw.c | 41 +++
drivers/net/ethernet/chelsio/cxgb4/t4_hw.h | 1 +
drivers/net/ethernet/chelsio/cxgb4/t4_values.h | 6 +
drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h | 24 +-
drivers/net/ethernet/chelsio/cxgb4vf/sge.c | 27 +-
9 files changed, 571 insertions(+), 79 deletions(-)
--
1.8.3.1
^ permalink raw reply
* [PATCH net-next v2 1/2] cxgb4/cxgb4vf: Add support for SGE doorbell queue timer
From: Vishal Kulkarni @ 2019-02-14 12:49 UTC (permalink / raw)
To: netdev, davem; +Cc: nirranjan, indranil, dt, Vishal Kulkarni
In-Reply-To: <1550148556-3531-1-git-send-email-vishal@chelsio.com>
T6 introduced a Timer Mechanism in SGE called the
SGE Doorbell Queue Timer. With this we can now configure
TX Queues to get CIDX Updates when:
Time(CIDX == PIDX) >= Timer
Previously we rely on TX Queue Status Page updates by hardware
for DMA completions. This will make Hardware/Firmware actually
deliver the CIDX Updates as Ingress Queue messages with
commensurate Interrupts.
So we now have a new RX Path component for processing CIDX Updates
and reclaiming TX Descriptors faster.
Original work by: Casey Leedom <leedom@chelsio.com>
Signed-off-by: Vishal Kulkarni <vishal@chelsio.com>
---
drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 10 +-
drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 19 +-
drivers/net/ethernet/chelsio/cxgb4/sge.c | 322 +++++++++++++++++++-----
drivers/net/ethernet/chelsio/cxgb4/t4_hw.c | 41 +++
drivers/net/ethernet/chelsio/cxgb4/t4_hw.h | 1 +
drivers/net/ethernet/chelsio/cxgb4/t4_values.h | 6 +
drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h | 24 +-
drivers/net/ethernet/chelsio/cxgb4vf/sge.c | 27 +-
8 files changed, 375 insertions(+), 75 deletions(-)
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index 568715a..68d0d45 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -617,6 +617,7 @@ enum { /* adapter flags */
FW_OFLD_CONN = (1 << 9),
ROOT_NO_RELAXED_ORDERING = (1 << 10),
SHUTTING_DOWN = (1 << 11),
+ SGE_DBQ_TIMER = (1 << 12),
};
enum {
@@ -756,6 +757,8 @@ struct sge_eth_txq { /* state for an SGE Ethernet Tx queue */
#ifdef CONFIG_CHELSIO_T4_DCB
u8 dcb_prio; /* DCB Priority bound to queue */
#endif
+ u8 dbqt; /* SGE Doorbell Queue Timer in use */
+ unsigned int dbqtimerix; /* SGE Doorbell Queue Timer Index */
unsigned long tso; /* # of TSO requests */
unsigned long tx_cso; /* # of Tx checksum offloads */
unsigned long vlan_ins; /* # of Tx VLAN insertions */
@@ -816,6 +819,7 @@ struct sge {
u16 nqs_per_uld; /* # of Rx queues per ULD */
u16 timer_val[SGE_NTIMERS];
u8 counter_val[SGE_NCOUNTERS];
+ u16 dbqtimer_val[SGE_NDBQTIMERS];
u32 fl_pg_order; /* large page allocation size */
u32 stat_len; /* length of status page at ring end */
u32 pktshift; /* padding between CPL & packet data */
@@ -1402,7 +1406,7 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
rspq_flush_handler_t flush_handler, int cong);
int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq,
struct net_device *dev, struct netdev_queue *netdevq,
- unsigned int iqid);
+ unsigned int iqid, u8 dbqt);
int t4_sge_alloc_ctrl_txq(struct adapter *adap, struct sge_ctrl_txq *txq,
struct net_device *dev, unsigned int iqid,
unsigned int cmplqid);
@@ -1415,6 +1419,8 @@ int t4_sge_alloc_uld_txq(struct adapter *adap, struct sge_uld_txq *txq,
int t4_sge_init(struct adapter *adap);
void t4_sge_start(struct adapter *adap);
void t4_sge_stop(struct adapter *adap);
+int t4_sge_eth_txq_egress_update(struct adapter *adap, struct sge_eth_txq *q,
+ int maxreclaim);
void cxgb4_set_ethtool_ops(struct net_device *netdev);
int cxgb4_write_rss(const struct port_info *pi, const u16 *queues);
enum cpl_tx_tnl_lso_type cxgb_encap_offload_supported(struct sk_buff *skb);
@@ -1821,6 +1827,8 @@ int t4_ctrl_eq_free(struct adapter *adap, unsigned int mbox, unsigned int pf,
int t4_ofld_eq_free(struct adapter *adap, unsigned int mbox, unsigned int pf,
unsigned int vf, unsigned int eqid);
int t4_sge_ctxt_flush(struct adapter *adap, unsigned int mbox, int ctxt_type);
+int t4_read_sge_dbqtimers(struct adapter *adap, unsigned int ndbqtimers,
+ u16 *dbqtimers);
void t4_handle_get_port_info(struct port_info *pi, const __be64 *rpl);
int t4_update_port_info(struct port_info *pi);
int t4_get_link_params(struct port_info *pi, unsigned int *link_okp,
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index adf75d1..bdd11a6 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -575,7 +575,7 @@ static int fwevtq_handler(struct sge_rspq *q, const __be64 *rsp,
struct sge_eth_txq *eq;
eq = container_of(txq, struct sge_eth_txq, q);
- netif_tx_wake_queue(eq->txq);
+ t4_sge_eth_txq_egress_update(q->adap, eq, -1);
} else {
struct sge_uld_txq *oq;
@@ -933,10 +933,13 @@ static int setup_sge_queues(struct adapter *adap)
q->rspq.idx = j;
memset(&q->stats, 0, sizeof(q->stats));
}
- for (j = 0; j < pi->nqsets; j++, t++) {
+
+ q = &s->ethrxq[pi->first_qset];
+ for (j = 0; j < pi->nqsets; j++, t++, q++) {
err = t4_sge_alloc_eth_txq(adap, t, dev,
netdev_get_tx_queue(dev, j),
- s->fw_evtq.cntxt_id);
+ q->rspq.cntxt_id,
+ !!(adap->flags & SGE_DBQ_TIMER));
if (err)
goto freeout;
}
@@ -958,7 +961,7 @@ static int setup_sge_queues(struct adapter *adap)
if (!is_t4(adap->params.chip)) {
err = t4_sge_alloc_eth_txq(adap, &s->ptptxq, adap->port[0],
netdev_get_tx_queue(adap->port[0], 0)
- , s->fw_evtq.cntxt_id);
+ , s->fw_evtq.cntxt_id, false);
if (err)
goto freeout;
}
@@ -4325,6 +4328,14 @@ static int adap_init0(struct adapter *adap)
if (ret < 0)
goto bye;
+ /* Grab the SGE Doorbell Queue Timer values. If successful, that
+ * indicates that the Firmware and Hardware support this.
+ */
+ ret = t4_read_sge_dbqtimers(adap, ARRAY_SIZE(adap->sge.dbqtimer_val),
+ adap->sge.dbqtimer_val);
+ if (!ret)
+ adap->flags |= SGE_DBQ_TIMER;
+
if (is_bypass_device(adap->pdev->device))
adap->params.bypass = 1;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index fc0bc64..f18493f 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -80,9 +80,10 @@
* Max number of Tx descriptors we clean up at a time. Should be modest as
* freeing skbs isn't cheap and it happens while holding locks. We just need
* to free packets faster than they arrive, we eventually catch up and keep
- * the amortized cost reasonable. Must be >= 2 * TXQ_STOP_THRES.
+ * the amortized cost reasonable. Must be >= 2 * TXQ_STOP_THRES. It should
+ * also match the CIDX Flush Threshold.
*/
-#define MAX_TX_RECLAIM 16
+#define MAX_TX_RECLAIM 32
/*
* Max number of Rx buffers we replenish at a time. Again keep this modest,
@@ -401,31 +402,52 @@ static inline int reclaimable(const struct sge_txq *q)
}
/**
- * cxgb4_reclaim_completed_tx - reclaims completed Tx descriptors
+ * reclaim_completed_tx - reclaims completed TX Descriptors
* @adap: the adapter
* @q: the Tx queue to reclaim completed descriptors from
+ * @maxreclaim: the maximum number of TX Descriptors to reclaim or -1
* @unmap: whether the buffers should be unmapped for DMA
*
- * Reclaims Tx descriptors that the SGE has indicated it has processed,
- * and frees the associated buffers if possible. Called with the Tx
- * queue locked.
+ * Reclaims Tx Descriptors that the SGE has indicated it has processed,
+ * and frees the associated buffers if possible. If @max == -1, then
+ * we'll use a defaiult maximum. Called with the TX Queue locked.
*/
-inline void cxgb4_reclaim_completed_tx(struct adapter *adap, struct sge_txq *q,
- bool unmap)
+static inline int reclaim_completed_tx(struct adapter *adap, struct sge_txq *q,
+ int maxreclaim, bool unmap)
{
- int avail = reclaimable(q);
+ int reclaim = reclaimable(q);
- if (avail) {
+ if (reclaim) {
/*
* Limit the amount of clean up work we do at a time to keep
* the Tx lock hold time O(1).
*/
- if (avail > MAX_TX_RECLAIM)
- avail = MAX_TX_RECLAIM;
+ if (maxreclaim < 0)
+ maxreclaim = MAX_TX_RECLAIM;
+ if (reclaim > maxreclaim)
+ reclaim = maxreclaim;
- free_tx_desc(adap, q, avail, unmap);
- q->in_use -= avail;
+ free_tx_desc(adap, q, reclaim, unmap);
+ q->in_use -= reclaim;
}
+
+ return reclaim;
+}
+
+/**
+ * cxgb4_reclaim_completed_tx - reclaims completed Tx descriptors
+ * @adap: the adapter
+ * @q: the Tx queue to reclaim completed descriptors from
+ * @unmap: whether the buffers should be unmapped for DMA
+ *
+ * Reclaims Tx descriptors that the SGE has indicated it has processed,
+ * and frees the associated buffers if possible. Called with the Tx
+ * queue locked.
+ */
+void cxgb4_reclaim_completed_tx(struct adapter *adap, struct sge_txq *q,
+ bool unmap)
+{
+ (void)reclaim_completed_tx(adap, q, -1, unmap);
}
EXPORT_SYMBOL(cxgb4_reclaim_completed_tx);
@@ -1288,6 +1310,44 @@ static inline void t6_fill_tnl_lso(struct sk_buff *skb,
}
/**
+ * t4_sge_eth_txq_egress_update - handle Ethernet TX Queue update
+ * @adap: the adapter
+ * @eq: the Ethernet TX Queue
+ * @maxreclaim: the maximum number of TX Descriptors to reclaim or -1
+ *
+ * We're typically called here to update the state of an Ethernet TX
+ * Queue with respect to the hardware's progress in consuming the TX
+ * Work Requests that we've put on that Egress Queue. This happens
+ * when we get Egress Queue Update messages and also prophylactically
+ * in regular timer-based Ethernet TX Queue maintenance.
+ */
+int t4_sge_eth_txq_egress_update(struct adapter *adap, struct sge_eth_txq *eq,
+ int maxreclaim)
+{
+ struct sge_txq *q = &eq->q;
+ unsigned int reclaimed;
+
+ if (!q->in_use || !__netif_tx_trylock(eq->txq))
+ return 0;
+
+ /* Reclaim pending completed TX Descriptors. */
+ reclaimed = reclaim_completed_tx(adap, &eq->q, maxreclaim, true);
+
+ /* If the TX Queue is currently stopped and there's now more than half
+ * the queue available, restart it. Otherwise bail out since the rest
+ * of what we want do here is with the possibility of shipping any
+ * currently buffered Coalesced TX Work Request.
+ */
+ if (netif_tx_queue_stopped(eq->txq) && txq_avail(q) > (q->size / 2)) {
+ netif_tx_wake_queue(eq->txq);
+ eq->q.restarts++;
+ }
+
+ __netif_tx_unlock(eq->txq);
+ return reclaimed;
+}
+
+/**
* cxgb4_eth_xmit - add a packet to an Ethernet Tx queue
* @skb: the packet
* @dev: the egress net device
@@ -1357,7 +1417,7 @@ static netdev_tx_t cxgb4_eth_xmit(struct sk_buff *skb, struct net_device *dev)
}
skb_tx_timestamp(skb);
- cxgb4_reclaim_completed_tx(adap, &q->q, true);
+ reclaim_completed_tx(adap, &q->q, -1, true);
cntrl = TXPKT_L4CSUM_DIS_F | TXPKT_IPCSUM_DIS_F;
#ifdef CONFIG_CHELSIO_T4_FCOE
@@ -1400,8 +1460,25 @@ static netdev_tx_t cxgb4_eth_xmit(struct sk_buff *skb, struct net_device *dev)
wr_mid = FW_WR_LEN16_V(DIV_ROUND_UP(flits, 2));
if (unlikely(credits < ETHTXQ_STOP_THRES)) {
+ /* After we're done injecting the Work Request for this
+ * packet, we'll be below our "stop threshold" so stop the TX
+ * Queue now and schedule a request for an SGE Egress Queue
+ * Update message. The queue will get started later on when
+ * the firmware processes this Work Request and sends us an
+ * Egress Queue Status Update message indicating that space
+ * has opened up.
+ */
eth_txq_stop(q);
- wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F;
+
+ /* If we're using the SGE Doorbell Queue Timer facility, we
+ * don't need to ask the Firmware to send us Egress Queue CIDX
+ * Updates: the Hardware will do this automatically. And
+ * since we send the Ingress Queue CIDX Updates to the
+ * corresponding Ethernet Response Queue, we'll get them very
+ * quickly.
+ */
+ if (!q->dbqt)
+ wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F;
}
wr = (void *)&q->q.desc[q->q.pidx];
@@ -1671,7 +1748,7 @@ static netdev_tx_t cxgb4_vf_eth_xmit(struct sk_buff *skb,
/* Take this opportunity to reclaim any TX Descriptors whose DMA
* transfers have completed.
*/
- cxgb4_reclaim_completed_tx(adapter, &txq->q, true);
+ reclaim_completed_tx(adapter, &txq->q, -1, true);
/* Calculate the number of flits and TX Descriptors we're going to
* need along with how many TX Descriptors will be left over after
@@ -1715,7 +1792,16 @@ static netdev_tx_t cxgb4_vf_eth_xmit(struct sk_buff *skb,
* has opened up.
*/
eth_txq_stop(txq);
- wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F;
+
+ /* If we're using the SGE Doorbell Queue Timer facility, we
+ * don't need to ask the Firmware to send us Egress Queue CIDX
+ * Updates: the Hardware will do this automatically. And
+ * since we send the Ingress Queue CIDX Updates to the
+ * corresponding Ethernet Response Queue, we'll get them very
+ * quickly.
+ */
+ if (!txq->dbqt)
+ wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F;
}
/* Start filling in our Work Request. Note that we do _not_ handle
@@ -2794,6 +2880,74 @@ static int t4_tx_hststamp(struct adapter *adapter, struct sk_buff *skb,
}
/**
+ * t4_tx_completion_handler - handle CPL_SGE_EGR_UPDATE messages
+ * @rspq: Ethernet RX Response Queue associated with Ethernet TX Queue
+ * @rsp: Response Entry pointer into Response Queue
+ * @gl: Gather List pointer
+ *
+ * For adapters which support the SGE Doorbell Queue Timer facility,
+ * we configure the Ethernet TX Queues to send CIDX Updates to the
+ * Associated Ethernet RX Response Queue with CPL_SGE_EGR_UPDATE
+ * messages. This adds a small load to PCIe Link RX bandwidth and,
+ * potentially, higher CPU Interrupt load, but allows us to respond
+ * much more quickly to the CIDX Updates. This is important for
+ * Upper Layer Software which isn't willing to have a large amount
+ * of TX Data outstanding before receiving DMA Completions.
+ */
+static void t4_tx_completion_handler(struct sge_rspq *rspq,
+ const __be64 *rsp,
+ const struct pkt_gl *gl)
+{
+ u8 opcode = ((const struct rss_header *)rsp)->opcode;
+ struct port_info *pi = netdev_priv(rspq->netdev);
+ struct adapter *adapter = rspq->adap;
+ struct sge *s = &adapter->sge;
+ struct sge_eth_txq *txq;
+
+ /* skip RSS header */
+ rsp++;
+
+ /* FW can send EGR_UPDATEs encapsulated in a CPL_FW4_MSG.
+ */
+ if (unlikely(opcode == CPL_FW4_MSG &&
+ ((const struct cpl_fw4_msg *)rsp)->type ==
+ FW_TYPE_RSSCPL)) {
+ rsp++;
+ opcode = ((const struct rss_header *)rsp)->opcode;
+ rsp++;
+ }
+
+ if (unlikely(opcode != CPL_SGE_EGR_UPDATE)) {
+ pr_info("%s: unexpected FW4/CPL %#x on Rx queue\n",
+ __func__, opcode);
+ return;
+ }
+
+ txq = &s->ethtxq[pi->first_qset + rspq->idx];
+
+ /* We've got the Hardware Consumer Index Update in the Egress Update
+ * message. If we're using the SGE Doorbell Queue Timer mechanism,
+ * these Egress Update messages will be our sole CIDX Updates we get
+ * since we don't want to chew up PCIe bandwidth for both Ingress
+ * Messages and Status Page writes. However, The code which manages
+ * reclaiming successfully DMA'ed TX Work Requests uses the CIDX value
+ * stored in the Status Page at the end of the TX Queue. It's easiest
+ * to simply copy the CIDX Update value from the Egress Update message
+ * to the Status Page. Also note that no Endian issues need to be
+ * considered here since both are Big Endian and we're just copying
+ * bytes consistently ...
+ */
+ if (txq->dbqt) {
+ struct cpl_sge_egr_update *egr;
+
+ egr = (struct cpl_sge_egr_update *)rsp;
+ WRITE_ONCE(txq->q.stat->cidx, egr->cidx);
+ }
+
+ t4_sge_eth_txq_egress_update(adapter, txq, -1);
+}
+
+/**
* t4_ethrx_handler - process an ingress ethernet packet
* @q: the response queue that received the packet
* @rsp: the response queue descriptor holding the RX_PKT message
@@ -2816,6 +2970,15 @@ int t4_ethrx_handler(struct sge_rspq *q, const __be64 *rsp,
struct port_info *pi;
int ret = 0;
+ /* If we're looking at TX Queue CIDX Update, handle that separately
+ * and return.
+ */
+ if (unlikely((*(u8 *)rsp == CPL_FW4_MSG) ||
+ (*(u8 *)rsp == CPL_SGE_EGR_UPDATE))) {
+ t4_tx_completion_handler(q, rsp, si);
+ return 0;
+ }
+
if (unlikely(*(u8 *)rsp == cpl_trace_pkt))
return handle_trace_pkt(q->adap, si);
@@ -3289,10 +3452,10 @@ static void sge_rx_timer_cb(struct timer_list *t)
static void sge_tx_timer_cb(struct timer_list *t)
{
- unsigned long m;
- unsigned int i, budget;
struct adapter *adap = from_timer(adap, t, sge.tx_timer);
struct sge *s = &adap->sge;
+ unsigned long m, period;
+ unsigned int i, budget;
for (i = 0; i < BITS_TO_LONGS(s->egr_sz); i++)
for (m = s->txq_maperr[i]; m; m &= m - 1) {
@@ -3320,29 +3483,29 @@ static void sge_tx_timer_cb(struct timer_list *t)
budget = MAX_TIMER_TX_RECLAIM;
i = s->ethtxq_rover;
do {
- struct sge_eth_txq *q = &s->ethtxq[i];
-
- if (q->q.in_use &&
- time_after_eq(jiffies, q->txq->trans_start + HZ / 100) &&
- __netif_tx_trylock(q->txq)) {
- int avail = reclaimable(&q->q);
-
- if (avail) {
- if (avail > budget)
- avail = budget;
-
- free_tx_desc(adap, &q->q, avail, true);
- q->q.in_use -= avail;
- budget -= avail;
- }
- __netif_tx_unlock(q->txq);
- }
+ budget -= t4_sge_eth_txq_egress_update(adap, &s->ethtxq[i],
+ budget);
+ if (!budget)
+ break;
if (++i >= s->ethqsets)
i = 0;
- } while (budget && i != s->ethtxq_rover);
+ } while (i != s->ethtxq_rover);
s->ethtxq_rover = i;
- mod_timer(&s->tx_timer, jiffies + (budget ? TX_QCHECK_PERIOD : 2));
+
+ if (budget == 0) {
+ /* If we found too many reclaimable packets schedule a timer
+ * in the near future to continue where we left off.
+ */
+ period = 2;
+ } else {
+ /* We reclaimed all reclaimable TX Descriptors, so reschedule
+ * at the normal period.
+ */
+ period = TX_QCHECK_PERIOD;
+ }
+
+ mod_timer(&s->tx_timer, jiffies + period);
}
/**
@@ -3421,7 +3584,8 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
: FW_IQ_IQTYPE_OFLD));
if (fl) {
- enum chip_type chip = CHELSIO_CHIP_VERSION(adap->params.chip);
+ unsigned int chip_ver =
+ CHELSIO_CHIP_VERSION(adap->params.chip);
/* Allocate the ring for the hardware free list (with space
* for its status page) along with the associated software
@@ -3459,10 +3623,10 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
* the smaller 64-byte value there).
*/
c.fl0dcaen_to_fl0cidxfthresh =
- htons(FW_IQ_CMD_FL0FBMIN_V(chip <= CHELSIO_T5 ?
+ htons(FW_IQ_CMD_FL0FBMIN_V(chip_ver <= CHELSIO_T5 ?
FETCHBURSTMIN_128B_X :
- FETCHBURSTMIN_64B_X) |
- FW_IQ_CMD_FL0FBMAX_V((chip <= CHELSIO_T5) ?
+ FETCHBURSTMIN_64B_T6_X) |
+ FW_IQ_CMD_FL0FBMAX_V((chip_ver <= CHELSIO_T5) ?
FETCHBURSTMAX_512B_X :
FETCHBURSTMAX_256B_X));
c.fl0size = htons(flsz);
@@ -3584,14 +3748,24 @@ static void init_txq(struct adapter *adap, struct sge_txq *q, unsigned int id)
adap->sge.egr_map[id - adap->sge.egr_start] = q;
}
+/**
+ * t4_sge_alloc_eth_txq - allocate an Ethernet TX Queue
+ * @adap: the adapter
+ * @txq: the SGE Ethernet TX Queue to initialize
+ * @dev: the Linux Network Device
+ * @netdevq: the corresponding Linux TX Queue
+ * @iqid: the Ingress Queue to which to deliver CIDX Update messages
+ * @dbqt: whether this TX Queue will use the SGE Doorbell Queue Timers
+ */
int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq,
struct net_device *dev, struct netdev_queue *netdevq,
- unsigned int iqid)
+ unsigned int iqid, u8 dbqt)
{
- int ret, nentries;
- struct fw_eq_eth_cmd c;
- struct sge *s = &adap->sge;
+ unsigned int chip_ver = CHELSIO_CHIP_VERSION(adap->params.chip);
struct port_info *pi = netdev_priv(dev);
+ struct sge *s = &adap->sge;
+ struct fw_eq_eth_cmd c;
+ int ret, nentries;
/* Add status entries */
nentries = txq->q.size + s->stat_len / sizeof(struct tx_desc);
@@ -3610,19 +3784,47 @@ int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq,
FW_EQ_ETH_CMD_VFN_V(0));
c.alloc_to_len16 = htonl(FW_EQ_ETH_CMD_ALLOC_F |
FW_EQ_ETH_CMD_EQSTART_F | FW_LEN16(c));
- c.viid_pkd = htonl(FW_EQ_ETH_CMD_AUTOEQUEQE_F |
- FW_EQ_ETH_CMD_VIID_V(pi->viid));
+
+ /* For TX Ethernet Queues using the SGE Doorbell Queue Timer
+ * mechanism, we use Ingress Queue messages for Hardware Consumer
+ * Index Updates on the TX Queue. Otherwise we have the Hardware
+ * write the CIDX Updates into the Status Page at the end of the
+ * TX Queue.
+ */
+ c.autoequiqe_to_viid = htonl((dbqt
+ ? FW_EQ_ETH_CMD_AUTOEQUIQE_F
+ : FW_EQ_ETH_CMD_AUTOEQUEQE_F) |
+ FW_EQ_ETH_CMD_VIID_V(pi->viid));
+
c.fetchszm_to_iqid =
- htonl(FW_EQ_ETH_CMD_HOSTFCMODE_V(HOSTFCMODE_STATUS_PAGE_X) |
+ htonl(FW_EQ_ETH_CMD_HOSTFCMODE_V(dbqt
+ ? HOSTFCMODE_INGRESS_QUEUE_X
+ : HOSTFCMODE_STATUS_PAGE_X) |
FW_EQ_ETH_CMD_PCIECHN_V(pi->tx_chan) |
FW_EQ_ETH_CMD_FETCHRO_F | FW_EQ_ETH_CMD_IQID_V(iqid));
+
+ /* Note that the CIDX Flush Threshold should match MAX_TX_RECLAIM. */
c.dcaen_to_eqsize =
- htonl(FW_EQ_ETH_CMD_FBMIN_V(FETCHBURSTMIN_64B_X) |
+ htonl(FW_EQ_ETH_CMD_FBMIN_V(chip_ver <= CHELSIO_T5
+ ? FETCHBURSTMIN_64B_X
+ : FETCHBURSTMIN_64B_T6_X) |
FW_EQ_ETH_CMD_FBMAX_V(FETCHBURSTMAX_512B_X) |
FW_EQ_ETH_CMD_CIDXFTHRESH_V(CIDXFLUSHTHRESH_32_X) |
FW_EQ_ETH_CMD_EQSIZE_V(nentries));
+
c.eqaddr = cpu_to_be64(txq->q.phys_addr);
+ /* If we're using the SGE Doorbell Queue Timer mechanism, pass in the
+ * currently configured Timer Index. THis can be changed later via an
+ * ethtool -C tx-usecs {Timer Val} command. Note that the SGE
+ * Doorbell Queue mode is currently automatically enabled in the
+ * Firmware by setting either AUTOEQUEQE or AUTOEQUIQE ...
+ */
+ if (dbqt)
+ c.timeren_timerix =
+ cpu_to_be32(FW_EQ_ETH_CMD_TIMEREN_F |
+ FW_EQ_ETH_CMD_TIMERIX_V(txq->dbqtimerix));
+
ret = t4_wr_mbox(adap, adap->mbox, &c, sizeof(c), &c);
if (ret) {
kfree(txq->q.sdesc);
@@ -3639,6 +3841,8 @@ int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq,
txq->txq = netdevq;
txq->tso = txq->tx_cso = txq->vlan_ins = 0;
txq->mapping_err = 0;
+ txq->dbqt = dbqt;
+
return 0;
}
@@ -3646,10 +3850,11 @@ int t4_sge_alloc_ctrl_txq(struct adapter *adap, struct sge_ctrl_txq *txq,
struct net_device *dev, unsigned int iqid,
unsigned int cmplqid)
{
- int ret, nentries;
- struct fw_eq_ctrl_cmd c;
- struct sge *s = &adap->sge;
+ unsigned int chip_ver = CHELSIO_CHIP_VERSION(adap->params.chip);
struct port_info *pi = netdev_priv(dev);
+ struct sge *s = &adap->sge;
+ struct fw_eq_ctrl_cmd c;
+ int ret, nentries;
/* Add status entries */
nentries = txq->q.size + s->stat_len / sizeof(struct tx_desc);
@@ -3673,7 +3878,9 @@ int t4_sge_alloc_ctrl_txq(struct adapter *adap, struct sge_ctrl_txq *txq,
FW_EQ_CTRL_CMD_PCIECHN_V(pi->tx_chan) |
FW_EQ_CTRL_CMD_FETCHRO_F | FW_EQ_CTRL_CMD_IQID_V(iqid));
c.dcaen_to_eqsize =
- htonl(FW_EQ_CTRL_CMD_FBMIN_V(FETCHBURSTMIN_64B_X) |
+ htonl(FW_EQ_CTRL_CMD_FBMIN_V(chip_ver <= CHELSIO_T5
+ ? FETCHBURSTMIN_64B_X
+ : FETCHBURSTMIN_64B_T6_X) |
FW_EQ_CTRL_CMD_FBMAX_V(FETCHBURSTMAX_512B_X) |
FW_EQ_CTRL_CMD_CIDXFTHRESH_V(CIDXFLUSHTHRESH_32_X) |
FW_EQ_CTRL_CMD_EQSIZE_V(nentries));
@@ -3713,6 +3920,7 @@ int t4_sge_alloc_uld_txq(struct adapter *adap, struct sge_uld_txq *txq,
struct net_device *dev, unsigned int iqid,
unsigned int uld_type)
{
+ unsigned int chip_ver = CHELSIO_CHIP_VERSION(adap->params.chip);
int ret, nentries;
struct fw_eq_ofld_cmd c;
struct sge *s = &adap->sge;
@@ -3743,7 +3951,9 @@ int t4_sge_alloc_uld_txq(struct adapter *adap, struct sge_uld_txq *txq,
FW_EQ_OFLD_CMD_PCIECHN_V(pi->tx_chan) |
FW_EQ_OFLD_CMD_FETCHRO_F | FW_EQ_OFLD_CMD_IQID_V(iqid));
c.dcaen_to_eqsize =
- htonl(FW_EQ_OFLD_CMD_FBMIN_V(FETCHBURSTMIN_64B_X) |
+ htonl(FW_EQ_OFLD_CMD_FBMIN_V(chip_ver <= CHELSIO_T5
+ ? FETCHBURSTMIN_64B_X
+ : FETCHBURSTMIN_64B_T6_X) |
FW_EQ_OFLD_CMD_FBMAX_V(FETCHBURSTMAX_512B_X) |
FW_EQ_OFLD_CMD_CIDXFTHRESH_V(CIDXFLUSHTHRESH_32_X) |
FW_EQ_OFLD_CMD_EQSIZE_V(nentries));
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index c5e5466..27af347 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -6713,6 +6713,47 @@ int t4_sge_ctxt_flush(struct adapter *adap, unsigned int mbox, int ctxt_type)
}
/**
+ * t4_read_sge_dbqtimers - reag SGE Doorbell Queue Timer values
+ * @adap - the adapter
+ * @ndbqtimers: size of the provided SGE Doorbell Queue Timer table
+ * @dbqtimers: SGE Doorbell Queue Timer table
+ *
+ * Reads the SGE Doorbell Queue Timer values into the provided table.
+ * Returns 0 on success (Firmware and Hardware support this feature),
+ * an error on failure.
+ */
+int t4_read_sge_dbqtimers(struct adapter *adap, unsigned int ndbqtimers,
+ u16 *dbqtimers)
+{
+ int ret, dbqtimerix;
+
+ ret = 0;
+ dbqtimerix = 0;
+ while (dbqtimerix < ndbqtimers) {
+ int nparams, param;
+ u32 params[7], vals[7];
+
+ nparams = ndbqtimers - dbqtimerix;
+ if (nparams > ARRAY_SIZE(params))
+ nparams = ARRAY_SIZE(params);
+
+ for (param = 0; param < nparams; param++)
+ params[param] =
+ (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) |
+ FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_DBQ_TIMER) |
+ FW_PARAMS_PARAM_Y_V(dbqtimerix + param));
+ ret = t4_query_params(adap, adap->mbox, adap->pf, 0,
+ nparams, params, vals);
+ if (ret)
+ break;
+
+ for (param = 0; param < nparams; param++)
+ dbqtimers[dbqtimerix++] = vals[param];
+ }
+ return ret;
+}
+
+/**
* t4_fw_hello - establish communication with FW
* @adap: the adapter
* @mbox: mailbox to use for the FW command
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h
index 361d503..002fc62 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h
@@ -91,6 +91,7 @@ enum {
SGE_CTXT_SIZE = 24, /* size of SGE context */
SGE_NTIMERS = 6, /* # of interrupt holdoff timer values */
SGE_NCOUNTERS = 4, /* # of interrupt packet counter values */
+ SGE_NDBQTIMERS = 8, /* # of Doorbell Queue Timer values */
SGE_MAX_IQ_SIZE = 65520,
SGE_TIMER_RSTRT_CNTR = 6, /* restart RX packet threshold counter */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_values.h b/drivers/net/ethernet/chelsio/cxgb4/t4_values.h
index f6558cb..eb1aa82 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_values.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_values.h
@@ -71,12 +71,18 @@
#define FETCHBURSTMIN_64B_X 2
#define FETCHBURSTMIN_128B_X 3
+/* T6 and later use a single-bit encoding for FetchBurstMin */
+#define FETCHBURSTMIN_64B_T6_X 0
+#define FETCHBURSTMIN_128B_T6_X 1
+
#define FETCHBURSTMAX_256B_X 2
#define FETCHBURSTMAX_512B_X 3
+#define HOSTFCMODE_INGRESS_QUEUE_X 1
#define HOSTFCMODE_STATUS_PAGE_X 2
#define CIDXFLUSHTHRESH_32_X 5
+#define CIDXFLUSHTHRESH_128_X 7
#define UPDATEDELIVERY_INTERRUPT_X 1
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
index 1d9b3e1..631f166 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
@@ -1254,6 +1254,8 @@ enum fw_params_param_dev {
FW_PARAMS_PARAM_DEV_RDMA_WRITE_WITH_IMM = 0x21,
FW_PARAMS_PARAM_DEV_RI_WRITE_CMPL_WR = 0x24,
FW_PARAMS_PARAM_DEV_OPAQUE_VIID_SMT_EXTN = 0x27,
+ FW_PARAMS_PARAM_DEV_DBQ_TIMER = 0x29,
+ FW_PARAMS_PARAM_DEV_DBQ_TIMERTICK = 0x2A,
};
/*
@@ -1322,6 +1324,7 @@ enum fw_params_param_dmaq {
FW_PARAMS_PARAM_DMAQ_EQ_CMPLIQID_CTRL = 0x11,
FW_PARAMS_PARAM_DMAQ_EQ_SCHEDCLASS_ETH = 0x12,
FW_PARAMS_PARAM_DMAQ_EQ_DCBPRIO_ETH = 0x13,
+ FW_PARAMS_PARAM_DMAQ_EQ_TIMERIX = 0x15,
FW_PARAMS_PARAM_DMAQ_CONM_CTXT = 0x20,
};
@@ -1751,8 +1754,8 @@ struct fw_eq_eth_cmd {
__be32 fetchszm_to_iqid;
__be32 dcaen_to_eqsize;
__be64 eqaddr;
- __be32 viid_pkd;
- __be32 r8_lo;
+ __be32 autoequiqe_to_viid;
+ __be32 timeren_timerix;
__be64 r9;
};
@@ -1847,6 +1850,10 @@ struct fw_eq_eth_cmd {
#define FW_EQ_ETH_CMD_EQSIZE_S 0
#define FW_EQ_ETH_CMD_EQSIZE_V(x) ((x) << FW_EQ_ETH_CMD_EQSIZE_S)
+#define FW_EQ_ETH_CMD_AUTOEQUIQE_S 31
+#define FW_EQ_ETH_CMD_AUTOEQUIQE_V(x) ((x) << FW_EQ_ETH_CMD_AUTOEQUIQE_S)
+#define FW_EQ_ETH_CMD_AUTOEQUIQE_F FW_EQ_ETH_CMD_AUTOEQUIQE_V(1U)
+
#define FW_EQ_ETH_CMD_AUTOEQUEQE_S 30
#define FW_EQ_ETH_CMD_AUTOEQUEQE_V(x) ((x) << FW_EQ_ETH_CMD_AUTOEQUEQE_S)
#define FW_EQ_ETH_CMD_AUTOEQUEQE_F FW_EQ_ETH_CMD_AUTOEQUEQE_V(1U)
@@ -1854,6 +1861,19 @@ struct fw_eq_eth_cmd {
#define FW_EQ_ETH_CMD_VIID_S 16
#define FW_EQ_ETH_CMD_VIID_V(x) ((x) << FW_EQ_ETH_CMD_VIID_S)
+#define FW_EQ_ETH_CMD_TIMEREN_S 3
+#define FW_EQ_ETH_CMD_TIMEREN_M 0x1
+#define FW_EQ_ETH_CMD_TIMEREN_V(x) ((x) << FW_EQ_ETH_CMD_TIMEREN_S)
+#define FW_EQ_ETH_CMD_TIMEREN_G(x) \
+ (((x) >> FW_EQ_ETH_CMD_TIMEREN_S) & FW_EQ_ETH_CMD_TIMEREN_M)
+#define FW_EQ_ETH_CMD_TIMEREN_F FW_EQ_ETH_CMD_TIMEREN_V(1U)
+
+#define FW_EQ_ETH_CMD_TIMERIX_S 0
+#define FW_EQ_ETH_CMD_TIMERIX_M 0x7
+#define FW_EQ_ETH_CMD_TIMERIX_V(x) ((x) << FW_EQ_ETH_CMD_TIMERIX_S)
+#define FW_EQ_ETH_CMD_TIMERIX_G(x) \
+ (((x) >> FW_EQ_ETH_CMD_TIMERIX_S) & FW_EQ_ETH_CMD_TIMERIX_M)
+
struct fw_eq_ctrl_cmd {
__be32 op_to_vfn;
__be32 alloc_to_len16;
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
index 1d534f0..11d2ba0 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
@@ -2268,7 +2268,7 @@ int t4vf_sge_alloc_rxq(struct adapter *adapter, struct sge_rspq *rspq,
cmd.iqaddr = cpu_to_be64(rspq->phys_addr);
if (fl) {
- enum chip_type chip =
+ unsigned int chip_ver =
CHELSIO_CHIP_VERSION(adapter->params.chip);
/*
* Allocate the ring for the hardware free list (with space
@@ -2319,10 +2319,10 @@ int t4vf_sge_alloc_rxq(struct adapter *adapter, struct sge_rspq *rspq,
*/
cmd.fl0dcaen_to_fl0cidxfthresh =
cpu_to_be16(
- FW_IQ_CMD_FL0FBMIN_V(chip <= CHELSIO_T5 ?
- FETCHBURSTMIN_128B_X :
- FETCHBURSTMIN_64B_X) |
- FW_IQ_CMD_FL0FBMAX_V((chip <= CHELSIO_T5) ?
+ FW_IQ_CMD_FL0FBMIN_V(chip_ver <= CHELSIO_T5
+ ? FETCHBURSTMIN_128B_X
+ : FETCHBURSTMIN_64B_T6_X) |
+ FW_IQ_CMD_FL0FBMAX_V((chip_ver <= CHELSIO_T5) ?
FETCHBURSTMAX_512B_X :
FETCHBURSTMAX_256B_X));
cmd.fl0size = cpu_to_be16(flsz);
@@ -2411,10 +2411,11 @@ int t4vf_sge_alloc_eth_txq(struct adapter *adapter, struct sge_eth_txq *txq,
struct net_device *dev, struct netdev_queue *devq,
unsigned int iqid)
{
+ unsigned int chip_ver = CHELSIO_CHIP_VERSION(adapter->params.chip);
+ struct port_info *pi = netdev_priv(dev);
+ struct fw_eq_eth_cmd cmd, rpl;
struct sge *s = &adapter->sge;
int ret, nentries;
- struct fw_eq_eth_cmd cmd, rpl;
- struct port_info *pi = netdev_priv(dev);
/*
* Calculate the size of the hardware TX Queue (including the Status
@@ -2448,17 +2449,19 @@ int t4vf_sge_alloc_eth_txq(struct adapter *adapter, struct sge_eth_txq *txq,
cmd.alloc_to_len16 = cpu_to_be32(FW_EQ_ETH_CMD_ALLOC_F |
FW_EQ_ETH_CMD_EQSTART_F |
FW_LEN16(cmd));
- cmd.viid_pkd = cpu_to_be32(FW_EQ_ETH_CMD_AUTOEQUEQE_F |
- FW_EQ_ETH_CMD_VIID_V(pi->viid));
+ cmd.autoequiqe_to_viid = cpu_to_be32(FW_EQ_ETH_CMD_AUTOEQUEQE_F |
+ FW_EQ_ETH_CMD_VIID_V(pi->viid));
cmd.fetchszm_to_iqid =
cpu_to_be32(FW_EQ_ETH_CMD_HOSTFCMODE_V(SGE_HOSTFCMODE_STPG) |
FW_EQ_ETH_CMD_PCIECHN_V(pi->port_id) |
FW_EQ_ETH_CMD_IQID_V(iqid));
cmd.dcaen_to_eqsize =
- cpu_to_be32(FW_EQ_ETH_CMD_FBMIN_V(SGE_FETCHBURSTMIN_64B) |
- FW_EQ_ETH_CMD_FBMAX_V(SGE_FETCHBURSTMAX_512B) |
+ cpu_to_be32(FW_EQ_ETH_CMD_FBMIN_V(chip_ver <= CHELSIO_T5
+ ? FETCHBURSTMIN_64B_X
+ : FETCHBURSTMIN_64B_T6_X) |
+ FW_EQ_ETH_CMD_FBMAX_V(FETCHBURSTMAX_512B_X) |
FW_EQ_ETH_CMD_CIDXFTHRESH_V(
- SGE_CIDXFLUSHTHRESH_32) |
+ CIDXFLUSHTHRESH_32_X) |
FW_EQ_ETH_CMD_EQSIZE_V(nentries));
cmd.eqaddr = cpu_to_be64(txq->q.phys_addr);
--
1.8.3.1
^ permalink raw reply related
* [PATCH net-next v2 2/2] cxgb4: Add capability to get/set SGE Doorbell Queue Timer Tick
From: Vishal Kulkarni @ 2019-02-14 12:49 UTC (permalink / raw)
To: netdev, davem; +Cc: nirranjan, indranil, dt, Vishal Kulkarni
In-Reply-To: <1550148556-3531-1-git-send-email-vishal@chelsio.com>
This patch gets/sets SGE Doorbell Queue timer ticks via ethtool
Original work by: Casey Leedom <leedom@chelsio.com>
Signed-off-by: Vishal Kulkarni <vishal@chelsio.com>
---
drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 1 +
drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c | 189 ++++++++++++++++++++-
drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 14 +-
3 files changed, 198 insertions(+), 6 deletions(-)
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index 68d0d45..b7b0eb1 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -819,6 +819,7 @@ struct sge {
u16 nqs_per_uld; /* # of Rx queues per ULD */
u16 timer_val[SGE_NTIMERS];
u8 counter_val[SGE_NCOUNTERS];
+ u16 dbqtimer_tick;
u16 dbqtimer_val[SGE_NDBQTIMERS];
u32 fl_pg_order; /* large page allocation size */
u32 stat_len; /* length of status page at ring end */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
index 7960435..65b8dc7 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
@@ -932,11 +932,190 @@ static int get_adaptive_rx_setting(struct net_device *dev)
return q->rspq.adaptive_rx;
}
-static int set_coalesce(struct net_device *dev, struct ethtool_coalesce *c)
+/* Return the current global Adapter SGE Doorbell Queue Timer Tick for all
+ * Ethernet TX Queues.
+ */
+static int get_dbqtimer_tick(struct net_device *dev)
+{
+ struct port_info *pi = netdev_priv(dev);
+ struct adapter *adap = pi->adapter;
+
+ if (!(adap->flags & SGE_DBQ_TIMER))
+ return 0;
+
+ return adap->sge.dbqtimer_tick;
+}
+
+/* Return the SGE Doorbell Queue Timer Value for the Ethernet TX Queues
+ * associated with a Network Device.
+ */
+static int get_dbqtimer(struct net_device *dev)
+{
+ struct port_info *pi = netdev_priv(dev);
+ struct adapter *adap = pi->adapter;
+ struct sge_eth_txq *txq;
+
+ txq = &adap->sge.ethtxq[pi->first_qset];
+
+ if (!(adap->flags & SGE_DBQ_TIMER))
+ return 0;
+
+ /* all of the TX Queues use the same Timer Index */
+ return adap->sge.dbqtimer_val[txq->dbqtimerix];
+}
+
+/* Set the global Adapter SGE Doorbell Queue Timer Tick for all Ethernet TX
+ * Queues. This is the fundamental "Tick" that sets the scale of values which
+ * can be used. Individual Ethernet TX Queues index into a relatively small
+ * array of Tick Multipliers. Changing the base Tick will thus change all of
+ * the resulting Timer Values associated with those multipliers for all
+ * Ethernet TX Queues.
+ */
+static int set_dbqtimer_tick(struct net_device *dev, int usecs)
+{
+ struct port_info *pi = netdev_priv(dev);
+ struct adapter *adap = pi->adapter;
+ struct sge *s = &adap->sge;
+ u32 param, val;
+ int ret;
+
+ if (!(adap->flags & SGE_DBQ_TIMER))
+ return 0;
+
+ /* return early if it's the same Timer Tick we're already using */
+ if (s->dbqtimer_tick == usecs)
+ return 0;
+
+ /* attempt to set the new Timer Tick value */
+ param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) |
+ FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_DBQ_TIMERTICK));
+ val = usecs;
+ ret = t4_set_params(adap, adap->mbox, adap->pf, 0, 1, ¶m, &val);
+ if (ret)
+ return ret;
+ s->dbqtimer_tick = usecs;
+
+ /* if successful, reread resulting dependent Timer values */
+ ret = t4_read_sge_dbqtimers(adap, ARRAY_SIZE(s->dbqtimer_val),
+ s->dbqtimer_val);
+ return ret;
+}
+
+/* Set the SGE Doorbell Queue Timer Value for the Ethernet TX Queues
+ * associated with a Network Device. There is a relatively small array of
+ * possible Timer Values so we need to pick the closest value available.
+ */
+static int set_dbqtimer(struct net_device *dev, int usecs)
+{
+ int qix, timerix, min_timerix, delta, min_delta;
+ struct port_info *pi = netdev_priv(dev);
+ struct adapter *adap = pi->adapter;
+ struct sge *s = &adap->sge;
+ struct sge_eth_txq *txq;
+ u32 param, val;
+ int ret;
+
+ if (!(adap->flags & SGE_DBQ_TIMER))
+ return 0;
+
+ /* Find the SGE Doorbell Timer Value that's closest to the requested
+ * value.
+ */
+ min_delta = INT_MAX;
+ min_timerix = 0;
+ for (timerix = 0; timerix < ARRAY_SIZE(s->dbqtimer_val); timerix++) {
+ delta = s->dbqtimer_val[timerix] - usecs;
+ if (delta < 0)
+ delta = -delta;
+ if (delta < min_delta) {
+ min_delta = delta;
+ min_timerix = timerix;
+ }
+ }
+
+ /* Return early if it's the same Timer Index we're already using.
+ * We use the same Timer Index for all of the TX Queues for an
+ * interface so it's only necessary to check the first one.
+ */
+ txq = &s->ethtxq[pi->first_qset];
+ if (txq->dbqtimerix == min_timerix)
+ return 0;
+
+ for (qix = 0; qix < pi->nqsets; qix++, txq++) {
+ if (adap->flags & FULL_INIT_DONE) {
+ param =
+ (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DMAQ) |
+ FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DMAQ_EQ_TIMERIX) |
+ FW_PARAMS_PARAM_YZ_V(txq->q.cntxt_id));
+ val = min_timerix;
+ ret = t4_set_params(adap, adap->mbox, adap->pf, 0,
+ 1, ¶m, &val);
+ if (ret)
+ return ret;
+ }
+ txq->dbqtimerix = min_timerix;
+ }
+ return 0;
+}
+
+/* Set the global Adapter SGE Doorbell Queue Timer Tick for all Ethernet TX
+ * Queues and the Timer Value for the Ethernet TX Queues associated with a
+ * Network Device. Since changing the global Tick changes all of the
+ * available Timer Values, we need to do this first before selecting the
+ * resulting closest Timer Value. Moreover, since the Tick is global,
+ * changing it affects the Timer Values for all Network Devices on the
+ * adapter. So, before changing the Tick, we grab all of the current Timer
+ * Values for other Network Devices on this Adapter and then attempt to select
+ * new Timer Values which are close to the old values ...
+ */
+static int set_dbqtimer_tickval(struct net_device *dev,
+ int tick_usecs, int timer_usecs)
+{
+ struct port_info *pi = netdev_priv(dev);
+ struct adapter *adap = pi->adapter;
+ int timer[MAX_NPORTS];
+ unsigned int port;
+ int ret;
+
+ /* Grab the other adapter Network Interface current timers and fill in
+ * the new one for this Network Interface.
+ */
+ for_each_port(adap, port)
+ if (port == pi->port_id)
+ timer[port] = timer_usecs;
+ else
+ timer[port] = get_dbqtimer(adap->port[port]);
+
+ /* Change the global Tick first ... */
+ ret = set_dbqtimer_tick(dev, tick_usecs);
+ if (ret)
+ return ret;
+
+ /* ... and then set all of the Network Interface Timer Values ... */
+ for_each_port(adap, port) {
+ ret = set_dbqtimer(adap->port[port], timer[port]);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int set_coalesce(struct net_device *dev,
+ struct ethtool_coalesce *coalesce)
{
- set_adaptive_rx_setting(dev, c->use_adaptive_rx_coalesce);
- return set_rx_intr_params(dev, c->rx_coalesce_usecs,
- c->rx_max_coalesced_frames);
+ int ret;
+
+ set_adaptive_rx_setting(dev, coalesce->use_adaptive_rx_coalesce);
+
+ ret = set_rx_intr_params(dev, coalesce->rx_coalesce_usecs,
+ coalesce->rx_max_coalesced_frames);
+ if (ret)
+ return ret;
+
+ return set_dbqtimer_tickval(dev,
+ coalesce->tx_coalesce_usecs_irq,
+ coalesce->tx_coalesce_usecs);
}
static int get_coalesce(struct net_device *dev, struct ethtool_coalesce *c)
@@ -949,6 +1128,8 @@ static int get_coalesce(struct net_device *dev, struct ethtool_coalesce *c)
c->rx_max_coalesced_frames = (rq->intr_params & QINTR_CNT_EN_F) ?
adap->sge.counter_val[rq->pktcnt_idx] : 0;
c->use_adaptive_rx_coalesce = get_adaptive_rx_setting(dev);
+ c->tx_coalesce_usecs_irq = get_dbqtimer_tick(dev);
+ c->tx_coalesce_usecs = get_dbqtimer(dev);
return 0;
}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index bdd11a6..bcbac24 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -4331,8 +4331,18 @@ static int adap_init0(struct adapter *adap)
/* Grab the SGE Doorbell Queue Timer values. If successful, that
* indicates that the Firmware and Hardware support this.
*/
- ret = t4_read_sge_dbqtimers(adap, ARRAY_SIZE(adap->sge.dbqtimer_val),
- adap->sge.dbqtimer_val);
+ params[0] = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) |
+ FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_DBQ_TIMERTICK));
+ ret = t4_query_params(adap, adap->mbox, adap->pf, 0,
+ 1, params, val);
+
+ if (!ret) {
+ adap->sge.dbqtimer_tick = val[0];
+ ret = t4_read_sge_dbqtimers(adap,
+ ARRAY_SIZE(adap->sge.dbqtimer_val),
+ adap->sge.dbqtimer_val);
+ }
+
if (!ret)
adap->flags |= SGE_DBQ_TIMER;
--
1.8.3.1
^ permalink raw reply related
* Re: [PATCH net-next 7/9] net: bridge: Stop calling switchdev_port_attr_get()
From: Ido Schimmel @ 2019-02-14 13:02 UTC (permalink / raw)
To: Florian Fainelli
Cc: netdev@vger.kernel.org, David S. Miller, open list,
open list:STAGING SUBSYSTEM, moderated list:ETHERNET BRIDGE,
Jiri Pirko, andrew@lunn.ch, vivien.didelot@gmail.com
In-Reply-To: <20190214112002.GA8698@splinter>
On Thu, Feb 14, 2019 at 01:20:02PM +0200, Ido Schimmel wrote:
> On Wed, Feb 13, 2019 at 02:06:36PM -0800, Florian Fainelli wrote:
> > Now that all switchdev drivers have been converted to checking the
> > bridge port flags during the prepare phase of the
> > switchdev_port_attr_set() when the process
> > SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS, we can avoid calling
> > switchdev_port_attr_get() with
> > SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT.
> >
> > Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
> > ---
> > net/bridge/br_switchdev.c | 16 +++++++---------
> > 1 file changed, 7 insertions(+), 9 deletions(-)
> >
> > diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
> > index db9e8ab96d48..8f88f8a1a7fa 100644
> > --- a/net/bridge/br_switchdev.c
> > +++ b/net/bridge/br_switchdev.c
> > @@ -64,29 +64,27 @@ int br_switchdev_set_port_flag(struct net_bridge_port *p,
> > {
> > struct switchdev_attr attr = {
> > .orig_dev = p->dev,
> > - .id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT,
> > + .id = SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS,
> > + .u.brport_flags = flags,
> > };
> > int err;
> >
> > if (mask & ~BR_PORT_FLAGS_HW_OFFLOAD)
> > return 0;
> >
> > - err = switchdev_port_attr_get(p->dev, &attr);
> > - if (err == -EOPNOTSUPP)
> > - return 0;
> > - if (err)
> > + err = switchdev_port_attr_set(p->dev, &attr);
> > + if (err && err != -EOPNOTSUPP)
> > return err;
> >
> > - /* Check if specific bridge flag attribute offload is supported */
> > - if (!(attr.u.brport_flags_support & mask)) {
> > + if (err == -EOPNOTSUPP) {
> > br_warn(p->br, "bridge flag offload is not supported %u(%s)\n",
> > (unsigned int)p->port_no, p->dev->name);
> > - return -EOPNOTSUPP;
> > + return err;
> > }
>
> I see that you return -EOPNOTSUPP from drivers in case of unsupported
> flags. I believe this is problematic (I'll test soon). The same return
> code is used by:
>
> 1. Switch drivers to indicate unsupported flags
> 2. switchdev code to indicate unsupported netdev (no switchdev ops)
>
> I guess that with this patch any attempt to set bridge port flags on
> veth/dummy device will result in an error.
Yea, that's the case. You can test with
tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh and other
bridge-related tests we have there.
Another problem is that during PORT_PRE_BRIDGE_FLAGS you pass 'flags'
and not 'mask'. This breaks mlxsw (and probably others as well) given
BR_BCAST_FLOOD is set by default.
>
> >
> > attr.id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS;
> > attr.flags = SWITCHDEV_F_DEFER;
> > - attr.u.brport_flags = flags;
> > +
> > err = switchdev_port_attr_set(p->dev, &attr);
> > if (err) {
> > br_warn(p->br, "error setting offload flag on port %u(%s)\n",
> > --
> > 2.17.1
> >
^ permalink raw reply
* [PATCH net] i40e: fix XDP_REDIRECT/XDP xmit ring cleanup race
From: Björn Töpel @ 2019-02-14 13:03 UTC (permalink / raw)
To: intel-wired-lan
Cc: Björn Töpel, magnus.karlsson, magnus.karlsson, brouer,
netdev, maciej.fijalkowski
From: Björn Töpel <bjorn.topel@intel.com>
When the driver clears the XDP xmit ring due to re-configuration or
teardown, in-progress ndo_xdp_xmit must be taken into consideration.
The ndo_xdp_xmit function is typically called from a napi context that
the driver does not control. Therefore, we must be careful not to
clear the XDP ring, while the call is on-going. This patch adds a
synchronize_rcu() to wait for napi(s) (preempt-disable regions and
softirqs), prior clearing the queue. Further, the __I40E_CONFIG_BUSY
flag is checked in the ndo_xdp_xmit implementation to avoid touching
the XDP xmit queue during re-configuration.
Fixes: d9314c474d4f ("i40e: add support for XDP_REDIRECT")
Fixes: 123cecd427b6 ("i40e: added queue pair disable/enable functions")
Reported-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
---
drivers/net/ethernet/intel/i40e/i40e_main.c | 14 ++++++++++++--
drivers/net/ethernet/intel/i40e/i40e_txrx.c | 4 +++-
2 files changed, 15 insertions(+), 3 deletions(-)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index f52e2c46e6a7..6a54a1e2f4ae 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -6725,8 +6725,13 @@ void i40e_down(struct i40e_vsi *vsi)
for (i = 0; i < vsi->num_queue_pairs; i++) {
i40e_clean_tx_ring(vsi->tx_rings[i]);
- if (i40e_enabled_xdp_vsi(vsi))
+ if (i40e_enabled_xdp_vsi(vsi)) {
+ /* Make sure that in-progress ndo_xdp_xmit
+ * calls are completed.
+ */
+ synchronize_rcu();
i40e_clean_tx_ring(vsi->xdp_rings[i]);
+ }
i40e_clean_rx_ring(vsi->rx_rings[i]);
}
@@ -11955,8 +11960,13 @@ static void i40e_queue_pair_reset_stats(struct i40e_vsi *vsi, int queue_pair)
static void i40e_queue_pair_clean_rings(struct i40e_vsi *vsi, int queue_pair)
{
i40e_clean_tx_ring(vsi->tx_rings[queue_pair]);
- if (i40e_enabled_xdp_vsi(vsi))
+ if (i40e_enabled_xdp_vsi(vsi)) {
+ /* Make sure that in-progress ndo_xdp_xmit calls are
+ * completed.
+ */
+ synchronize_rcu();
i40e_clean_tx_ring(vsi->xdp_rings[queue_pair]);
+ }
i40e_clean_rx_ring(vsi->rx_rings[queue_pair]);
}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index a7e14e98889f..6c97667d20ef 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -3709,6 +3709,7 @@ int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
struct i40e_netdev_priv *np = netdev_priv(dev);
unsigned int queue_index = smp_processor_id();
struct i40e_vsi *vsi = np->vsi;
+ struct i40e_pf *pf = vsi->back;
struct i40e_ring *xdp_ring;
int drops = 0;
int i;
@@ -3716,7 +3717,8 @@ int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
if (test_bit(__I40E_VSI_DOWN, vsi->state))
return -ENETDOWN;
- if (!i40e_enabled_xdp_vsi(vsi) || queue_index >= vsi->num_queue_pairs)
+ if (!i40e_enabled_xdp_vsi(vsi) || queue_index >= vsi->num_queue_pairs ||
+ test_bit(__I40E_CONFIG_BUSY, pf->state))
return -ENXIO;
if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
--
2.19.1
^ permalink raw reply related
* Re: [RFC PATCH net-next 2/5] net: 8021q: vlan_dev: add vid tag for uc and mc address lists
From: Ivan Khoronzhuk @ 2019-02-14 13:03 UTC (permalink / raw)
To: Florian Fainelli; +Cc: davem, linux-omap, netdev, linux-kernel, jiri, andrew
In-Reply-To: <BF485558-C29E-4F25-84B5-4C6C80BDF337@gmail.com>
On Wed, Feb 13, 2019 at 08:49:39PM -0800, Florian Fainelli wrote:
>
>
>On February 13, 2019 8:17:16 AM PST, Ivan Khoronzhuk <ivan.khoronzhuk@linaro.org> wrote:
>>On Tue, Jan 22, 2019 at 03:12:41PM +0200, Ivan Khoronzhuk wrote:
>>>On Mon, Jan 21, 2019 at 03:37:41PM -0800, Florian Fainelli wrote:
>>>>On 12/4/18 3:42 PM, Ivan Khoronzhuk wrote:
>>>>>On Tue, Dec 04, 2018 at 11:49:27AM -0800, Florian Fainelli wrote:
>>>
>>>[...]
>>>
>>>>
>>>>Ivan, based on the recent submission I copied you on [1], it sounds
>>like
>>>>we want to move ahead with your proposal to extend netdev_hw_addr
>>with a
>>>>vid member.
>>>>
>>>>On second thought, your approach is good and if we enclose the vid
>>>>member within an #if IS_ENABLED(CONFIG_VLAN)8021Q) we should be good
>>for
>>>>most foreseeable use cases, if not, we can always introduce a
>>variable
>>>>size/defined context in the future.
>>>>
>>>>Can you resubmit this patch series as non-RFC in the next few days so
>>I
>>>>can also repost mine [1] and take advantage of these changes for
>>>>multicast over VLAN when VLAN filtering is globally enabled on the
>>device.
>>>>
>>>>[1]: https://www.spinics.net/lists/netdev/msg544722.html
>>>>
>>>>Thanks!
>>>
>>>Yes, sure. I can start to do that in several days.
>>>Just a little busy right now.
>>>
>>>Just before doing this, maybe some comments could be added as it has
>>more
>>>attention now. Meanwhile I can send alternative variant but based on
>>>real dev splitting addresses between vlans. In this approach it leaves
>>address
>>>space w/o vid extension but requires more changes to vlan core.
>>Drawback here
>>>that to change one address alg traverses all related vlan addresses,
>>it can be
>>>cpu/time wasteful, if it's done regularly, but saves memory....
>>>
>>>Basically it's implemented locally in cpsw and requires more changes
>>to move
>>>it as some vlan core auxiliary functions to be reused. But it can work
>>only
>>>with vlans directly on top of real dev, which is fixable.
>>>
>>>Core function here:
>>>__hw_addr_ref_sync_dev
>>>it is called only for address the link of which was
>>increased/decreased, thus
>>>update made only on one address, comparing it for every vlan dev.
>>>
>>>It was added with this patch:
>>>[1] net: core: dev_addr_lists: add auxiliary func to handle reference
>>>address update e7946760de5852f32
>>>
>>>And used by this patch:
>>>[2] net: ethernet: ti: cpsw: fix vlan mcast 15180eca569bfe1d4d
>>>
>>>So, idea is to move [2] to be vlan core auxiliary function to be
>>reused
>>>by NIC drivers.
>>>
>>>But potentially it can bring a little more changes I assume:
>>>
>>>1) add priv_flag |= IFF_IV_FLT (independent vlan filtering). It allows
>>to reuse
>>>this flag for farther changes, probably for per vlan allmulti or so.
>>>
>>>2) real dev has to have complete list for vlans, not only their vids,
>>but also
>>>all vlandevs in device chain above it. So changes in add_vid can be
>>required.
>>>Vlan core can assign vlan dev pointer to real device only after it's
>>completely
>>>initialized. And for propagation reasons it requires every device in
>>>infrastructure to be aware. That seems doable, but depends not only on
>>me.
>>>
>>>3) Move code from [2] to be auxiliary vlan core API for setting mc and
>>uc.
>>>From this patch only one function is cpsw specific: cpsw_set_mc(). The
>>rest can
>>>be applicable on every NIC supporting IFF_IV_FLT.
>>>
>>>4) Move code from link below to do the same but for uc addresses:
>>>https://git.linaro.org/people/ivan.khoronzhuk/tsn_kernel.git/commit/?h=ucast_vlan_fix&id=ebc88a7d8758759322d9ff88f25f8bac51ce7219
>>>here only one func cpsw specific: cpsw_set_uc()
>>>the rest can be generic.
>>>
>>>As third alternative, we can think about how to reduce memory for
>>addresses by
>>>reusing them or else, but this is as continuation of addr+vid
>>approach, and API
>>>probably would be the same.
>>>
>>>Then all this can be compared for proper decision.
>>
>>
>>Hi Florian,
>>
>>After several more investigations and tries probably better left this
>>idea as is.
>
>Thank you for keeping the thread alive, does that mean you are going to resubmit this patch series as-is (rebased) or are you saying that you are abandoning the idea and leaving the situation the way it is in cpsw?
I will resubmit this one. But:
I have to try one more approach before this.
The idea is to create simple rx flt device tree while mc/us sync.
Then use it at real device to dispatch addresses.
It increases hw_addr struct a little and code base,
But:
- no need to keep linearly all vlan addresses in one address space.
- replicates RX filtering struct of net devices,
(but not logical tree of netdevs)
- keeps devs info per address.
- no need to change addr lenth and modify existent API
- access at any net dev to above rx flt device structure per address
- potentially can be use not only for vlan devs identification but for
other rx path offloads.
Idea is simple but not completely verified it yet,
need a little bit more time to prove/clean ...or drop it.
>
>>
>>Here actually several explanations for this:
>>1) If even assume that we can get access to vlan devices in the above
>>ndev
>>tree (we can) that doesn't guarantee that receive vlan filters are set
>>replicating this structure. For example bond device can have one active
>>slave
>>but both of them in the tree having vid set, in this case addresses are
>>syched only with active slave, no filters should be applied to not
>>active slave.
>>this can be achieved only each address has vid context.
>>
>>2) According to 1) rx filters device structure can be created while
>>mc_sync()
>>in each rx_mode(), and then used as orthogonal info. I've tried and it
>>looks
>>not cool and consumes anyway memory and even if it's less it's still
>>not very
>>scalable. (+ no normal signal "in complex structure case" when address
>>should
>>be undated to avoid redundant cpu cycles). Not sure it can have
>>practical
>>results and be universal enouph.
>>
>>3) Assuming that every device in the tree (bond, team or else) is legal
>>to
>>modify its own address space, the real end device cannot be sure the
>>vlan device
>>address spaces reflects vid addresses that device tree want's from him.
>>According to this each address in address space must hold its own
>>context at
>>every device and this context is comparable with address size.
>>
>>>-- Regards,
>>>Ivan Khoronzhuk
>
>--
>Florian
--
Regards,
Ivan Khoronzhuk
^ permalink raw reply
* Re: [PATCH net-next 3/9] mlxsw: spectrum: Check bridge flags during prepare phase
From: Ido Schimmel @ 2019-02-14 13:11 UTC (permalink / raw)
To: Florian Fainelli
Cc: netdev@vger.kernel.org, David S. Miller, open list,
open list:STAGING SUBSYSTEM, moderated list:ETHERNET BRIDGE,
Jiri Pirko, andrew@lunn.ch, vivien.didelot@gmail.com
In-Reply-To: <20190213220638.1552-4-f.fainelli@gmail.com>
On Wed, Feb 13, 2019 at 02:06:32PM -0800, Florian Fainelli wrote:
> In preparation for getting rid of switchdev_port_attr_get(), have mlxsw
> check for the bridge flags being set through switchdev_port_attr_set()
> when the SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS attribute identifier is
> used.
>
> Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
> ---
> .../ethernet/mellanox/mlxsw/spectrum_switchdev.c | 14 +++++++++++---
> 1 file changed, 11 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
> index 1f492b7dbea8..7616eab50035 100644
> --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
> +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
> @@ -598,13 +598,17 @@ mlxsw_sp_bridge_port_learning_set(struct mlxsw_sp_port *mlxsw_sp_port,
> static int mlxsw_sp_port_attr_br_flags_set(struct mlxsw_sp_port *mlxsw_sp_port,
> struct switchdev_trans *trans,
> struct net_device *orig_dev,
> - unsigned long brport_flags)
> + unsigned long brport_flags,
> + bool pre_set)
> {
> struct mlxsw_sp_bridge_port *bridge_port;
> int err;
>
> - if (switchdev_trans_ph_prepare(trans))
> + if (switchdev_trans_ph_prepare(trans) && pre_set) {
> + if (brport_flags & ~(BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD))
> + return -EOPNOTSUPP;
> return 0;
> + }
When we get SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS we only want to
perform a check. With this code in case it's not prepare phase, then we
continue to set the flags. Better do:
if (pre_set) {
if (switchdev_trans_ph_commit(trans))
return 0;
// perform check here
}
>
> bridge_port = mlxsw_sp_bridge_port_find(mlxsw_sp_port->mlxsw_sp->bridge,
> orig_dev);
> @@ -833,6 +837,7 @@ static int mlxsw_sp_port_attr_set(struct net_device *dev,
> struct switchdev_trans *trans)
> {
> struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
> + bool pre_set = false;
> int err;
>
> switch (attr->id) {
> @@ -841,10 +846,13 @@ static int mlxsw_sp_port_attr_set(struct net_device *dev,
> attr->orig_dev,
> attr->u.stp_state);
> break;
> + case SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS:
> + pre_set = true; /* fall through */
> case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS:
> err = mlxsw_sp_port_attr_br_flags_set(mlxsw_sp_port, trans,
> attr->orig_dev,
> - attr->u.brport_flags);
> + attr->u.brport_flags,
> + pre_set);
> break;
> case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME:
> err = mlxsw_sp_port_attr_br_ageing_set(mlxsw_sp_port, trans,
> --
> 2.17.1
>
^ permalink raw reply
* [PATCH net-next 1/2] net/mlx5e: Introduce mlx5e_flow_esw_attr_init() helper
From: xiangxia.m.yue @ 2019-02-12 3:39 UTC (permalink / raw)
To: saeedm, gerlitz.or; +Cc: netdev, Tonghao Zhang
From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Introduce the mlx5e_flow_esw_attr_init() helper
for simplifying codes.
Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
---
drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 41 +++++++++++++++++--------
1 file changed, 29 insertions(+), 12 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index e943775..98b002c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -2736,6 +2736,30 @@ static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow)
return err;
}
+static void
+mlx5e_flow_esw_attr_init(struct mlx5_esw_flow_attr *esw_attr,
+ struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
+ struct tc_cls_flower_offload *f,
+ struct mlx5_eswitch_rep *in_rep,
+ struct mlx5_core_dev *in_mdev)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+ esw_attr->parse_attr = parse_attr;
+ esw_attr->chain = f->common.chain_index;
+ esw_attr->prio = TC_H_MAJ(f->common.prio) >> 16;
+
+ esw_attr->in_rep = in_rep;
+ esw_attr->in_mdev = in_mdev;
+
+ if (MLX5_CAP_ESW(esw->dev, counter_eswitch_affinity) ==
+ MLX5_COUNTER_SOURCE_ESWITCH)
+ esw_attr->counter_dev = in_mdev;
+ else
+ esw_attr->counter_dev = priv->mdev;
+}
+
static struct mlx5e_tc_flow *
__mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
struct tc_cls_flower_offload *f,
@@ -2757,28 +2781,21 @@ static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow)
&parse_attr, &flow);
if (err)
goto out;
+
parse_attr->filter_dev = filter_dev;
- flow->esw_attr->parse_attr = parse_attr;
+ mlx5e_flow_esw_attr_init(flow->esw_attr,
+ priv, parse_attr,
+ f, in_rep, in_mdev);
+
err = parse_cls_flower(flow->priv, flow, &parse_attr->spec,
f, filter_dev);
if (err)
goto err_free;
- flow->esw_attr->chain = f->common.chain_index;
- flow->esw_attr->prio = TC_H_MAJ(f->common.prio) >> 16;
err = parse_tc_fdb_actions(priv, &rule->action, parse_attr, flow, extack);
if (err)
goto err_free;
- flow->esw_attr->in_rep = in_rep;
- flow->esw_attr->in_mdev = in_mdev;
-
- if (MLX5_CAP_ESW(esw->dev, counter_eswitch_affinity) ==
- MLX5_COUNTER_SOURCE_ESWITCH)
- flow->esw_attr->counter_dev = in_mdev;
- else
- flow->esw_attr->counter_dev = priv->mdev;
-
err = mlx5e_tc_add_fdb_flow(priv, parse_attr, flow, extack);
if (err)
goto err_free;
--
1.8.3.1
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox