* [PATCH 2/2] net: mvpp2: support multiple comphy lanes
From: Matt Pelland @ 2019-08-01 20:45 UTC (permalink / raw)
To: netdev; +Cc: davem, maxime.chevallier, antoine.tenart, Matt Pelland
In-Reply-To: <20190801204523.26454-1-mpelland@starry.com>
mvpp 2.2 supports RXAUI which requires a pair of serdes lanes instead of
the usual single lane required by other interface modes. This patch
expands the number of lanes that can be associated to a port so that
both lanes are correctly configured at the appropriate times when RXAUI
is in use.
Signed-off-by: Matt Pelland <mpelland@starry.com>
---
drivers/net/ethernet/marvell/mvpp2/mvpp2.h | 7 +-
.../net/ethernet/marvell/mvpp2/mvpp2_main.c | 66 +++++++++++++------
2 files changed, 53 insertions(+), 20 deletions(-)
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
index 256e7c796631..9ae2b3d9d0c7 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
@@ -655,6 +655,11 @@
#define MVPP2_F_LOOPBACK BIT(0)
#define MVPP2_F_DT_COMPAT BIT(1)
+/* MVPP22 supports RXAUI which requires two comphy lanes, all other modes
+ * require one.
+ */
+#define MVPP22_MAX_COMPHYS 2
+
/* Marvell tag types */
enum mvpp2_tag_type {
MVPP2_TAG_TYPE_NONE = 0,
@@ -935,7 +940,7 @@ struct mvpp2_port {
phy_interface_t phy_interface;
struct phylink *phylink;
struct phylink_config phylink_config;
- struct phy *comphy;
+ struct phy *comphys[MVPP22_MAX_COMPHYS];
struct mvpp2_bm_pool *pool_long;
struct mvpp2_bm_pool *pool_short;
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index 8b633af4a684..97dfe2e71b03 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -1186,17 +1186,40 @@ static void mvpp22_gop_setup_irq(struct mvpp2_port *port)
*/
static int mvpp22_comphy_init(struct mvpp2_port *port)
{
- int ret;
+ int i, ret;
- if (!port->comphy)
- return 0;
+ for (i = 0; i < ARRAY_SIZE(port->comphys); i++) {
+ if (!port->comphys[i])
+ return 0;
- ret = phy_set_mode_ext(port->comphy, PHY_MODE_ETHERNET,
- port->phy_interface);
- if (ret)
- return ret;
+ ret = phy_set_mode_ext(port->comphys[i],
+ PHY_MODE_ETHERNET,
+ port->phy_interface);
+ if (ret)
+ return ret;
- return phy_power_on(port->comphy);
+ ret = phy_power_on(port->comphys[i]);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int mvpp22_comphy_deinit(struct mvpp2_port *port)
+{
+ int i, ret;
+
+ for (i = 0; i < ARRAY_SIZE(port->comphys); i++) {
+ if (!port->comphys[i])
+ return 0;
+
+ ret = phy_power_off(port->comphys[i]);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
}
static void mvpp2_port_enable(struct mvpp2_port *port)
@@ -3375,7 +3398,9 @@ static void mvpp2_stop_dev(struct mvpp2_port *port)
if (port->phylink)
phylink_stop(port->phylink);
- phy_power_off(port->comphy);
+
+ if (port->priv->hw_version == MVPP22)
+ mvpp22_comphy_deinit(port);
}
static int mvpp2_check_ringparam_valid(struct net_device *dev,
@@ -4947,7 +4972,7 @@ static void mvpp2_mac_config(struct phylink_config *config, unsigned int mode,
port->phy_interface = state->interface;
/* Reconfigure the serdes lanes */
- phy_power_off(port->comphy);
+ mvpp22_comphy_deinit(port);
mvpp22_mode_reconfigure(port);
}
@@ -5038,7 +5063,6 @@ static int mvpp2_port_probe(struct platform_device *pdev,
struct fwnode_handle *port_fwnode,
struct mvpp2 *priv)
{
- struct phy *comphy = NULL;
struct mvpp2_port *port;
struct mvpp2_port_pcpu *port_pcpu;
struct device_node *port_node = to_of_node(port_fwnode);
@@ -5085,14 +5109,20 @@ static int mvpp2_port_probe(struct platform_device *pdev,
goto err_free_netdev;
}
+ port = netdev_priv(dev);
+
if (port_node) {
- comphy = devm_of_phy_get(&pdev->dev, port_node, NULL);
- if (IS_ERR(comphy)) {
- if (PTR_ERR(comphy) == -EPROBE_DEFER) {
- err = -EPROBE_DEFER;
- goto err_free_netdev;
+ for (i = 0; i < ARRAY_SIZE(port->comphys); i++) {
+ port->comphys[i] = devm_of_phy_get_by_index(&pdev->dev,
+ port_node,
+ i);
+ if (IS_ERR(port->comphys[i])) {
+ err = PTR_ERR(port->comphys[i]);
+ port->comphys[i] = NULL;
+ if (err == -EPROBE_DEFER)
+ goto err_free_netdev;
+ err = 0;
}
- comphy = NULL;
}
}
@@ -5107,7 +5137,6 @@ static int mvpp2_port_probe(struct platform_device *pdev,
dev->netdev_ops = &mvpp2_netdev_ops;
dev->ethtool_ops = &mvpp2_eth_tool_ops;
- port = netdev_priv(dev);
port->dev = dev;
port->fwnode = port_fwnode;
port->has_phy = !!of_find_property(port_node, "phy", NULL);
@@ -5144,7 +5173,6 @@ static int mvpp2_port_probe(struct platform_device *pdev,
port->of_node = port_node;
port->phy_interface = phy_mode;
- port->comphy = comphy;
if (priv->hw_version == MVPP21) {
port->base = devm_platform_ioremap_resource(pdev, 2 + id);
--
2.21.0
^ permalink raw reply related
* [PATCH 1/2] net: mvpp2: implement RXAUI support
From: Matt Pelland @ 2019-08-01 20:45 UTC (permalink / raw)
To: netdev; +Cc: davem, maxime.chevallier, antoine.tenart, Matt Pelland
In-Reply-To: <20190801204523.26454-1-mpelland@starry.com>
Marvell's mvpp2 packet processor supports RXAUI on port zero in a
similar manner to the existing 10G protocols that have already been
implemented. This patch implements the miscellaneous extra configuration
steps required for RXAUI operation.
Signed-off-by: Matt Pelland <mpelland@starry.com>
---
drivers/net/ethernet/marvell/mvpp2/mvpp2.h | 1 +
.../net/ethernet/marvell/mvpp2/mvpp2_main.c | 30 +++++++++++++++++++
2 files changed, 31 insertions(+)
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
index 4d9564ba68f6..256e7c796631 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
@@ -481,6 +481,7 @@
#define MVPP22_XLG_CTRL4_REG 0x184
#define MVPP22_XLG_CTRL4_FWD_FC BIT(5)
#define MVPP22_XLG_CTRL4_FWD_PFC BIT(6)
+#define MVPP22_XLG_CTRL4_USE_XPCS BIT(8)
#define MVPP22_XLG_CTRL4_MACMODSELECT_GMAC BIT(12)
#define MVPP22_XLG_CTRL4_EN_IDLE_CHECK BIT(14)
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index e9d8ffe897e9..8b633af4a684 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -968,6 +968,7 @@ mvpp2_shared_interrupt_mask_unmask(struct mvpp2_port *port, bool mask)
static bool mvpp2_is_xlg(phy_interface_t interface)
{
return interface == PHY_INTERFACE_MODE_10GKR ||
+ interface == PHY_INTERFACE_MODE_RXAUI ||
interface == PHY_INTERFACE_MODE_XAUI;
}
@@ -1008,6 +1009,27 @@ static void mvpp22_gop_init_sgmii(struct mvpp2_port *port)
}
}
+static void mvpp22_gop_init_rxaui(struct mvpp2_port *port)
+{
+ struct mvpp2 *priv = port->priv;
+ void __iomem *xpcs = priv->iface_base + MVPP22_XPCS_BASE(port->gop_id);
+ u32 val;
+
+ val = readl(xpcs + MVPP22_XPCS_CFG0);
+ val &= ~MVPP22_XPCS_CFG0_RESET_DIS;
+ writel(val, xpcs + MVPP22_XPCS_CFG0);
+
+ val = readl(xpcs + MVPP22_XPCS_CFG0);
+ val &= ~(MVPP22_XPCS_CFG0_PCS_MODE(0x3) |
+ MVPP22_XPCS_CFG0_ACTIVE_LANE(0x3));
+ val |= MVPP22_XPCS_CFG0_ACTIVE_LANE(2);
+ writel(val, xpcs + MVPP22_XPCS_CFG0);
+
+ val = readl(xpcs + MVPP22_XPCS_CFG0);
+ val |= MVPP22_XPCS_CFG0_RESET_DIS;
+ writel(val, xpcs + MVPP22_XPCS_CFG0);
+}
+
static void mvpp22_gop_init_10gkr(struct mvpp2_port *port)
{
struct mvpp2 *priv = port->priv;
@@ -1053,6 +1075,9 @@ static int mvpp22_gop_init(struct mvpp2_port *port)
case PHY_INTERFACE_MODE_2500BASEX:
mvpp22_gop_init_sgmii(port);
break;
+ case PHY_INTERFACE_MODE_RXAUI:
+ mvpp22_gop_init_rxaui(port);
+ break;
case PHY_INTERFACE_MODE_10GKR:
if (port->gop_id != 0)
goto invalid_conf;
@@ -4570,6 +4595,7 @@ static void mvpp2_phylink_validate(struct phylink_config *config,
switch (state->interface) {
case PHY_INTERFACE_MODE_10GKR:
case PHY_INTERFACE_MODE_XAUI:
+ case PHY_INTERFACE_MODE_RXAUI:
if (port->gop_id != 0)
goto empty_set;
break;
@@ -4592,6 +4618,7 @@ static void mvpp2_phylink_validate(struct phylink_config *config,
switch (state->interface) {
case PHY_INTERFACE_MODE_10GKR:
case PHY_INTERFACE_MODE_XAUI:
+ case PHY_INTERFACE_MODE_RXAUI:
case PHY_INTERFACE_MODE_NA:
if (port->gop_id == 0) {
phylink_set(mask, 10000baseT_Full);
@@ -4744,6 +4771,9 @@ static void mvpp2_xlg_config(struct mvpp2_port *port, unsigned int mode,
ctrl4 |= MVPP22_XLG_CTRL4_FWD_FC | MVPP22_XLG_CTRL4_FWD_PFC |
MVPP22_XLG_CTRL4_EN_IDLE_CHECK;
+ if (state->interface == PHY_INTERFACE_MODE_RXAUI)
+ ctrl4 |= MVPP22_XLG_CTRL4_USE_XPCS;
+
if (old_ctrl0 != ctrl0)
writel(ctrl0, port->base + MVPP22_XLG_CTRL0_REG);
if (old_ctrl4 != ctrl4)
--
2.21.0
^ permalink raw reply related
* [PATCH] net: mvpp2: Implement RXAUI Support
From: Matt Pelland @ 2019-08-01 20:45 UTC (permalink / raw)
To: netdev; +Cc: davem, maxime.chevallier, antoine.tenart
This patch set implements support for configuring Marvell's mvpp2 hardware for
RXAUI operation. There are two other patches necessary for this to work
correctly that concern Marvell's cp110 comphy that were emailed to the general
linux-kernel mailing list earlier on. I can post them here if need be. This
patch set was successfully tested on a Marvell Armada 7040 based platform.
Cheers,
Matt
^ permalink raw reply
* Re: [PATCH bpf 1/2] bpf: fix x64 JIT code generation for jmp to 1st insn
From: Alexei Starovoitov @ 2019-08-01 20:44 UTC (permalink / raw)
To: Song Liu
Cc: Alexei Starovoitov, David S. Miller, Daniel Borkmann,
netdev@vger.kernel.org, bpf@vger.kernel.org, Kernel Team
In-Reply-To: <CAADnVQJu9s4a=tc0+C5hgSPX4KnpYDzKu0AxxU4nCoU1QaWVEg@mail.gmail.com>
On Wed, Jul 31, 2019 at 8:43 PM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> On Wed, Jul 31, 2019 at 12:36 PM Song Liu <songliubraving@fb.com> wrote:
> >
> >
> >
> > > On Jul 30, 2019, at 6:38 PM, Alexei Starovoitov <ast@kernel.org> wrote:
> > >
> > > Introduction of bounded loops exposed old bug in x64 JIT.
> > > JIT maintains the array of offsets to the end of all instructions to
> > > compute jmp offsets.
> > > addrs[0] - offset of the end of the 1st insn (that includes prologue).
> > > addrs[1] - offset of the end of the 2nd insn.
> > > JIT didn't keep the offset of the beginning of the 1st insn,
> > > since classic BPF didn't have backward jumps and valid extended BPF
> > > couldn't have a branch to 1st insn, because it didn't allow loops.
> > > With bounded loops it's possible to construct a valid program that
> > > jumps backwards to the 1st insn.
> > > Fix JIT by computing:
> > > addrs[0] - offset of the end of prologue == start of the 1st insn.
> > > addrs[1] - offset of the end of 1st insn.
> > >
> > > Reported-by: syzbot+35101610ff3e83119b1b@syzkaller.appspotmail.com
> > > Fixes: 2589726d12a1 ("bpf: introduce bounded loops")
> > > Fixes: 0a14842f5a3c ("net: filter: Just In Time compiler for x86-64")
> > > Signed-off-by: Alexei Starovoitov <ast@kernel.org>
> >
> > Acked-by: Song Liu <songliubraving@fb.com>
> >
> > Do we need similar fix for x86_32?
>
> Right. x86_32 would need similar fix.
>
> Applied to bpf tree.
Yonghong noticed that it subtly changes jited linfo.
Surprisingly perf annotated output for source code in jited bpf progs
looks exactly the same for several large bpf progs that I've looked at.
This to be investigated later.
I've applied the fix:
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index a56c95805732..991549a1c5f3 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1181,7 +1181,7 @@ struct bpf_prog *bpf_int_jit_compile(struct
bpf_prog *prog)
if (!image || !prog->is_func || extra_pass) {
if (image)
- bpf_prog_fill_jited_linfo(prog, addrs);
+ bpf_prog_fill_jited_linfo(prog, addrs + 1);
out_addrs:
kfree(addrs);
kfree(jit_data);
and re-pushed bpf tree.
The new commit is here:
https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git/commit/?id=7c2e988f400e83501e0a3568250780609b7c8263
Thanks Yonghong!
For bpf-next we need to figure out how to make test_btf more robust.
We can probably check first few insns for specific jited offsets,
but I don't yet see how to make it work for all archs.
And it will be annoying to keep it working with every change to jit.
^ permalink raw reply related
* Re: [PATCH net-next 0/5] net: dsa: mv88e6xxx: avoid some redundant VTU operations
From: David Miller @ 2019-08-01 20:43 UTC (permalink / raw)
To: vivien.didelot; +Cc: linux-kernel, rasmus.villemoes, f.fainelli, andrew, netdev
In-Reply-To: <20190801183637.24841-1-vivien.didelot@gmail.com>
From: Vivien Didelot <vivien.didelot@gmail.com>
Date: Thu, 1 Aug 2019 14:36:32 -0400
> The mv88e6xxx driver currently uses a mv88e6xxx_vtu_get wrapper to get a
> single entry and uses a boolean to eventually initialize a fresh one.
>
> However the fresh entry is only needed in one place and mv88e6xxx_vtu_getnext
> is simple enough to call it directly. Doing so makes the code easier to read,
> especially for the return code expected by switchdev to honor software VLANs.
>
> In addition to not loading the VTU again when an entry is already correctly
> programmed, this also allows to avoid programming the broadcast entries
> again when updating a port's membership, from e.g. tagged to untagged.
>
> This patch series removes the mv88e6xxx_vtu_get wrapper in favor of direct
> calls to mv88e6xxx_vtu_getnext, and also renames the _mv88e6xxx_port_vlan_add
> and _mv88e6xxx_port_vlan_del helpers using an old underscore prefix convention.
>
> In case the port's membership is already correctly programmed in hardware,
> the following debug message may be printed:
>
> [ 745.989884] mv88e6085 2188000.ethernet-1:00: p4: already a member of VLAN 42
Series applied, thanks Vivien.
^ permalink raw reply
* Re: [PATCH] net: bridge: Allow bridge to joing multicast groups
From: Vivien Didelot @ 2019-08-01 20:08 UTC (permalink / raw)
To: Horatiu Vultur
Cc: roopa, nikolay, davem, bridge, netdev, linux-kernel,
allan.nielsen
In-Reply-To: <20190801194801.rqv5jvb5vxjo2dor@soft-dev3.microsemi.net>
Hi Horatiu,
On Thu, 1 Aug 2019 21:48:02 +0200, Horatiu Vultur <horatiu.vultur@microchip.com> wrote:
> > I'm a bit late in the conversation. Isn't this what you want?
> >
> > ip address add <multicast IPv4 address> dev br0 autojoin
> >
>
> Not really, I was looking in a way to register the ports to link layer
> multicast address. Sorry for the confusion, my description of the patch
> was totally missleaning.
>
> If you follow this thread you will get a better idea what we wanted to
> achive. We got some really good comments and based on these we send a
> RFC[1].
OK great! Keep me in the loop, I enjoy bridge and multicast very much ;-)
Thanks,
Vivien
^ permalink raw reply
* [net-next 12/12] net/mlx5e: Allow dropping specific tunnel packets
From: Saeed Mahameed @ 2019-08-01 19:57 UTC (permalink / raw)
To: David S. Miller
Cc: netdev@vger.kernel.org, Tonghao Zhang, Roi Dayan, Saeed Mahameed
In-Reply-To: <20190801195620.26180-1-saeedm@mellanox.com>
From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
In some case, we don't want to allow specific tunnel packets
to host that can avoid to take up high CPU (e.g network attacks).
But other tunnel packets which not matched in hardware will be
sent to host too.
$ tc filter add dev vxlan_sys_4789 \
protocol ip chain 0 parent ffff: prio 1 handle 1 \
flower dst_ip 1.1.1.100 ip_proto tcp dst_port 80 \
enc_dst_ip 2.2.2.100 enc_key_id 100 enc_dst_port 4789 \
action tunnel_key unset pipe action drop
Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index dc5fc3350b65..c5d75e2ecf54 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -2485,7 +2485,8 @@ static bool actions_match_supported(struct mlx5e_priv *priv,
if (flow_flag_test(flow, EGRESS) &&
!((actions & MLX5_FLOW_CONTEXT_ACTION_DECAP) ||
- (actions & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP)))
+ (actions & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) ||
+ (actions & MLX5_FLOW_CONTEXT_ACTION_DROP)))
return false;
if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
--
2.21.0
^ permalink raw reply related
* [net-next 11/12] net/mlx5e: TX reporter cleanup
From: Saeed Mahameed @ 2019-08-01 19:57 UTC (permalink / raw)
To: David S. Miller
Cc: netdev@vger.kernel.org, Aya Levin, Tariq Toukan, Jiri Pirko,
Saeed Mahameed
In-Reply-To: <20190801195620.26180-1-saeedm@mellanox.com>
From: Aya Levin <ayal@mellanox.com>
Remove redundant include files.
Signed-off-by: Aya Levin <ayal@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
drivers/net/ethernet/mellanox/mlx5/core/en/reporter.h | 1 -
drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c | 1 -
2 files changed, 2 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter.h b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter.h
index e78e92753d73..ed7a3881d2c5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter.h
@@ -4,7 +4,6 @@
#ifndef __MLX5E_EN_REPORTER_H
#define __MLX5E_EN_REPORTER_H
-#include <linux/mlx5/driver.h>
#include "en.h"
int mlx5e_tx_reporter_create(struct mlx5e_priv *priv);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
index f1c652f75718..6e54fefea410 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
@@ -1,7 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (c) 2019 Mellanox Technologies. */
-#include <net/devlink.h>
#include "reporter.h"
#include "lib/eq.h"
--
2.21.0
^ permalink raw reply related
* [net-next 10/12] net/mlx5e: Set tx reporter only on successful creation
From: Saeed Mahameed @ 2019-08-01 19:57 UTC (permalink / raw)
To: David S. Miller
Cc: netdev@vger.kernel.org, Aya Levin, Tariq Toukan, Jiri Pirko,
Saeed Mahameed
In-Reply-To: <20190801195620.26180-1-saeedm@mellanox.com>
From: Aya Levin <ayal@mellanox.com>
When failing to create tx reporter, don't set the reporter's pointer.
Creating a reporter is not mandatory for driver load, avoid
garbage/error pointer.
Signed-off-by: Aya Levin <ayal@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
.../ethernet/mellanox/mlx5/core/en/reporter_tx.c | 14 ++++++++------
drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 +-
2 files changed, 9 insertions(+), 7 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
index 383ecfd85d8a..f1c652f75718 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
@@ -117,7 +117,7 @@ static int mlx5_tx_health_report(struct devlink_health_reporter *tx_reporter,
char *err_str,
struct mlx5e_tx_err_ctx *err_ctx)
{
- if (IS_ERR_OR_NULL(tx_reporter)) {
+ if (!tx_reporter) {
netdev_err(err_ctx->sq->channel->netdev, err_str);
return err_ctx->recover(err_ctx->sq);
}
@@ -289,25 +289,27 @@ static const struct devlink_health_reporter_ops mlx5_tx_reporter_ops = {
int mlx5e_tx_reporter_create(struct mlx5e_priv *priv)
{
+ struct devlink_health_reporter *reporter;
struct mlx5_core_dev *mdev = priv->mdev;
struct devlink *devlink = priv_to_devlink(mdev);
- priv->tx_reporter =
+ reporter =
devlink_health_reporter_create(devlink, &mlx5_tx_reporter_ops,
MLX5_REPORTER_TX_GRACEFUL_PERIOD,
true, priv);
- if (IS_ERR(priv->tx_reporter)) {
+ if (IS_ERR(reporter)) {
netdev_warn(priv->netdev,
"Failed to create tx reporter, err = %ld\n",
- PTR_ERR(priv->tx_reporter));
- return PTR_ERR(priv->tx_reporter);
+ PTR_ERR(reporter));
+ return PTR_ERR(reporter);
}
+ priv->tx_reporter = reporter;
return 0;
}
void mlx5e_tx_reporter_destroy(struct mlx5e_priv *priv)
{
- if (IS_ERR_OR_NULL(priv->tx_reporter))
+ if (!priv->tx_reporter)
return;
devlink_health_reporter_destroy(priv->tx_reporter);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index e75cb18c2256..4db595a7eb03 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -2325,7 +2325,7 @@ int mlx5e_open_channels(struct mlx5e_priv *priv,
goto err_close_channels;
}
- if (!IS_ERR_OR_NULL(priv->tx_reporter))
+ if (priv->tx_reporter)
devlink_health_reporter_state_update(priv->tx_reporter,
DEVLINK_HEALTH_REPORTER_STATE_HEALTHY);
--
2.21.0
^ permalink raw reply related
* [net-next 06/12] net/mlx5e: XDP, Slight enhancement for WQE fetch function
From: Saeed Mahameed @ 2019-08-01 19:57 UTC (permalink / raw)
To: David S. Miller; +Cc: netdev@vger.kernel.org, Tariq Toukan, Saeed Mahameed
In-Reply-To: <20190801195620.26180-1-saeedm@mellanox.com>
From: Tariq Toukan <tariqt@mellanox.com>
Instead of passing an output param, let function return the
WQE pointer.
In addition, pass &pi so it gets its value in the function,
and save the redundant assignment that comes after it.
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c | 4 +---
drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h | 13 ++++++++-----
2 files changed, 9 insertions(+), 8 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
index 8cb98326531f..1ed5c33e022f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -187,14 +187,12 @@ static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq)
if (unlikely(contig_wqebbs < MLX5_SEND_WQE_MAX_WQEBBS))
mlx5e_fill_xdpsq_frag_edge(sq, wq, pi, contig_wqebbs);
- mlx5e_xdpsq_fetch_wqe(sq, &session->wqe);
+ session->wqe = mlx5e_xdpsq_fetch_wqe(sq, &pi);
prefetchw(session->wqe->data);
session->ds_count = MLX5E_XDP_TX_EMPTY_DS_COUNT;
session->pkt_count = 0;
- pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
-
mlx5e_xdp_update_inline_state(sq);
stats->mpwqe++;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
index e0ed7710f5f1..36ac1e3816b9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
@@ -190,14 +190,17 @@ mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq,
session->ds_count++;
}
-static inline void mlx5e_xdpsq_fetch_wqe(struct mlx5e_xdpsq *sq,
- struct mlx5e_tx_wqe **wqe)
+static inline struct mlx5e_tx_wqe *
+mlx5e_xdpsq_fetch_wqe(struct mlx5e_xdpsq *sq, u16 *pi)
{
struct mlx5_wq_cyc *wq = &sq->wq;
- u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+ struct mlx5e_tx_wqe *wqe;
- *wqe = mlx5_wq_cyc_get_wqe(wq, pi);
- memset(*wqe, 0, sizeof(**wqe));
+ *pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+ wqe = mlx5_wq_cyc_get_wqe(wq, *pi);
+ memset(wqe, 0, sizeof(*wqe));
+
+ return wqe;
}
static inline void
--
2.21.0
^ permalink raw reply related
* [net-next 09/12] net/mlx5e: Fix mlx5e_tx_reporter_create return value
From: Saeed Mahameed @ 2019-08-01 19:57 UTC (permalink / raw)
To: David S. Miller
Cc: netdev@vger.kernel.org, Aya Levin, Tariq Toukan, Jiri Pirko,
Saeed Mahameed
In-Reply-To: <20190801195620.26180-1-saeedm@mellanox.com>
From: Aya Levin <ayal@mellanox.com>
Return error when failing to create a reporter in devlink. Since
NET_DEVLINK mandatory to MLX5_CORE in Kconfig, returned pointer
can't be NULL and can only hold an error in bad path.
Signed-off-by: Aya Levin <ayal@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
index f3d98748b211..383ecfd85d8a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
@@ -296,11 +296,13 @@ int mlx5e_tx_reporter_create(struct mlx5e_priv *priv)
devlink_health_reporter_create(devlink, &mlx5_tx_reporter_ops,
MLX5_REPORTER_TX_GRACEFUL_PERIOD,
true, priv);
- if (IS_ERR(priv->tx_reporter))
+ if (IS_ERR(priv->tx_reporter)) {
netdev_warn(priv->netdev,
"Failed to create tx reporter, err = %ld\n",
PTR_ERR(priv->tx_reporter));
- return IS_ERR_OR_NULL(priv->tx_reporter);
+ return PTR_ERR(priv->tx_reporter);
+ }
+ return 0;
}
void mlx5e_tx_reporter_destroy(struct mlx5e_priv *priv)
--
2.21.0
^ permalink raw reply related
* [net-next 08/12] net/mlx5e: Rx, checksum handling refactoring
From: Saeed Mahameed @ 2019-08-01 19:57 UTC (permalink / raw)
To: David S. Miller; +Cc: netdev@vger.kernel.org, Saeed Mahameed
In-Reply-To: <20190801195620.26180-1-saeedm@mellanox.com>
Move vlan checksum fixup flow into mlx5e_skb_padding_csum(), which is
supposed to fixup SKB checksum if needed. And rename
mlx5e_skb_padding_csum() to mlx5e_skb_csum_fixup().
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
.../net/ethernet/mellanox/mlx5/core/en_rx.c | 26 ++++++++++---------
1 file changed, 14 insertions(+), 12 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index ac6e586d403d..60570b442fff 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -859,13 +859,24 @@ tail_padding_csum(struct sk_buff *skb, int offset,
}
static void
-mlx5e_skb_padding_csum(struct sk_buff *skb, int network_depth, __be16 proto,
- struct mlx5e_rq_stats *stats)
+mlx5e_skb_csum_fixup(struct sk_buff *skb, int network_depth, __be16 proto,
+ struct mlx5e_rq_stats *stats)
{
struct ipv6hdr *ip6;
struct iphdr *ip4;
int pkt_len;
+ /* Fixup vlan headers, if any */
+ if (network_depth > ETH_HLEN)
+ /* CQE csum is calculated from the IP header and does
+ * not cover VLAN headers (if present). This will add
+ * the checksum manually.
+ */
+ skb->csum = csum_partial(skb->data + ETH_HLEN,
+ network_depth - ETH_HLEN,
+ skb->csum);
+
+ /* Fixup tail padding, if any */
switch (proto) {
case htons(ETH_P_IP):
ip4 = (struct iphdr *)(skb->data + network_depth);
@@ -931,16 +942,7 @@ static inline void mlx5e_handle_csum(struct net_device *netdev,
return; /* CQE csum covers all received bytes */
/* csum might need some fixups ...*/
- if (network_depth > ETH_HLEN)
- /* CQE csum is calculated from the IP header and does
- * not cover VLAN headers (if present). This will add
- * the checksum manually.
- */
- skb->csum = csum_partial(skb->data + ETH_HLEN,
- network_depth - ETH_HLEN,
- skb->csum);
-
- mlx5e_skb_padding_csum(skb, network_depth, proto, stats);
+ mlx5e_skb_csum_fixup(skb, network_depth, proto, stats);
return;
}
--
2.21.0
^ permalink raw reply related
* [net-next 07/12] net/mlx5e: Tx, Soften inline mode VLAN dependencies
From: Saeed Mahameed @ 2019-08-01 19:57 UTC (permalink / raw)
To: David S. Miller
Cc: netdev@vger.kernel.org, Tariq Toukan, Noam Stolero,
Saeed Mahameed
In-Reply-To: <20190801195620.26180-1-saeedm@mellanox.com>
From: Tariq Toukan <tariqt@mellanox.com>
If capable, use zero inline mode in TX WQE for non-VLAN packets.
For VLAN ones, keep the enforcement of at least L2 inline mode,
unless the WQE VLAN insertion offload cap is on.
Performance:
Tested single core packet rate of 64Bytes.
NIC: ConnectX-5
CPU: Intel(R) Xeon(R) Gold 6154 CPU @ 3.00GHz
pktgen:
Before: 12.46 Mpps
After: 14.65 Mpps (+17.5%)
XDP_TX:
The MPWQE flow is not affected, as it already has this optimization.
So we test with priv-flag xdp_tx_mpwqe: off.
Before: 9.90 Mpps
After: 10.20 Mpps (+3%)
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Tested-by: Noam Stolero <noams@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 +-
.../net/ethernet/mellanox/mlx5/core/en/txrx.h | 22 +++++++++++++++++--
.../ethernet/mellanox/mlx5/core/en_common.c | 12 ----------
.../ethernet/mellanox/mlx5/core/en_dcbnl.c | 2 +-
.../net/ethernet/mellanox/mlx5/core/en_main.c | 4 +++-
.../net/ethernet/mellanox/mlx5/core/en_tx.c | 7 +++---
.../net/ethernet/mellanox/mlx5/core/vport.c | 7 +++---
7 files changed, 33 insertions(+), 23 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 745bcc25c6f8..30f13f81c965 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -359,6 +359,7 @@ enum {
MLX5E_SQ_STATE_IPSEC,
MLX5E_SQ_STATE_AM,
MLX5E_SQ_STATE_TLS,
+ MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE,
};
struct mlx5e_sq_wqe_info {
@@ -1132,7 +1133,6 @@ void mlx5e_build_rq_params(struct mlx5_core_dev *mdev,
struct mlx5e_params *params);
void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params,
u16 num_channels);
-u8 mlx5e_params_calculate_tx_min_inline(struct mlx5_core_dev *mdev);
void mlx5e_rx_dim_work(struct work_struct *work);
void mlx5e_tx_dim_work(struct work_struct *work);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
index 7da22b413a48..87be96747902 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
@@ -117,9 +117,27 @@ mlx5e_notify_hw(struct mlx5_wq_cyc *wq, u16 pc, void __iomem *uar_map,
mlx5_write64((__be32 *)ctrl, uar_map);
}
-static inline bool mlx5e_transport_inline_tx_wqe(struct mlx5e_tx_wqe *wqe)
+static inline bool mlx5e_transport_inline_tx_wqe(struct mlx5_wqe_ctrl_seg *cseg)
{
- return !!wqe->ctrl.tisn;
+ return cseg && !!cseg->tisn;
+}
+
+static inline u8
+mlx5e_tx_wqe_inline_mode(struct mlx5e_txqsq *sq, struct mlx5_wqe_ctrl_seg *cseg,
+ struct sk_buff *skb)
+{
+ u8 mode;
+
+ if (mlx5e_transport_inline_tx_wqe(cseg))
+ return MLX5_INLINE_MODE_TCP_UDP;
+
+ mode = sq->min_inline_mode;
+
+ if (skb_vlan_tag_present(skb) &&
+ test_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state))
+ mode = max_t(u8, MLX5_INLINE_MODE_L2, mode);
+
+ return mode;
}
static inline void mlx5e_cq_arm(struct mlx5e_cq *cq)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
index 1539cf3de5dc..f7890e0ce96c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
@@ -180,15 +180,3 @@ int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb)
return err;
}
-
-u8 mlx5e_params_calculate_tx_min_inline(struct mlx5_core_dev *mdev)
-{
- u8 min_inline_mode;
-
- mlx5_query_min_inline(mdev, &min_inline_mode);
- if (min_inline_mode == MLX5_INLINE_MODE_NONE &&
- !MLX5_CAP_ETH(mdev, wqe_vlan_insert))
- min_inline_mode = MLX5_INLINE_MODE_L2;
-
- return min_inline_mode;
-}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
index 8dd31b5c740c..01f2918063af 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
@@ -1101,7 +1101,7 @@ void mlx5e_dcbnl_delete_app(struct mlx5e_priv *priv)
static void mlx5e_trust_update_tx_min_inline_mode(struct mlx5e_priv *priv,
struct mlx5e_params *params)
{
- params->tx_min_inline_mode = mlx5e_params_calculate_tx_min_inline(priv->mdev);
+ mlx5_query_min_inline(priv->mdev, ¶ms->tx_min_inline_mode);
if (priv->dcbx_dp.trust_state == MLX5_QPTS_TRUST_DSCP &&
params->tx_min_inline_mode == MLX5_INLINE_MODE_L2)
params->tx_min_inline_mode = MLX5_INLINE_MODE_IP;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index b2618dd6dd10..e75cb18c2256 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -1131,6 +1131,8 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
sq->stats = &c->priv->channel_stats[c->ix].sq[tc];
sq->stop_room = MLX5E_SQ_STOP_ROOM;
INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work);
+ if (!MLX5_CAP_ETH(mdev, wqe_vlan_insert))
+ set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state);
if (MLX5_IPSEC_DEV(c->priv->mdev))
set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state);
if (mlx5_accel_is_tls_device(c->priv->mdev)) {
@@ -4777,7 +4779,7 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
mlx5e_set_tx_cq_mode_params(params, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
/* TX inline */
- params->tx_min_inline_mode = mlx5e_params_calculate_tx_min_inline(mdev);
+ mlx5_query_min_inline(mdev, ¶ms->tx_min_inline_mode);
/* RSS */
mlx5e_build_rss_params(rss_params, params->num_channels);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index acf25cc38fa1..d3a67a9b4eba 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -292,8 +292,7 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs;
stats->packets += skb_shinfo(skb)->gso_segs;
} else {
- u8 mode = mlx5e_transport_inline_tx_wqe(wqe) ?
- MLX5_INLINE_MODE_TCP_UDP : sq->min_inline_mode;
+ u8 mode = mlx5e_tx_wqe_inline_mode(sq, &wqe->ctrl, skb);
opcode = MLX5_OPCODE_SEND;
mss = 0;
@@ -608,9 +607,11 @@ netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs;
stats->packets += skb_shinfo(skb)->gso_segs;
} else {
+ u8 mode = mlx5e_tx_wqe_inline_mode(sq, NULL, skb);
+
opcode = MLX5_OPCODE_SEND;
mss = 0;
- ihs = mlx5e_calc_min_inline(sq->min_inline_mode, skb);
+ ihs = mlx5e_calc_min_inline(mode, skb);
num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN);
stats->packets++;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index c912d82ca64b..30f7848a6f88 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -122,12 +122,13 @@ void mlx5_query_min_inline(struct mlx5_core_dev *mdev,
u8 *min_inline_mode)
{
switch (MLX5_CAP_ETH(mdev, wqe_inline_mode)) {
+ case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT:
+ if (!mlx5_query_nic_vport_min_inline(mdev, 0, min_inline_mode))
+ break;
+ /* fall through */
case MLX5_CAP_INLINE_MODE_L2:
*min_inline_mode = MLX5_INLINE_MODE_L2;
break;
- case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT:
- mlx5_query_nic_vport_min_inline(mdev, 0, min_inline_mode);
- break;
case MLX5_CAP_INLINE_MODE_NOT_REQUIRED:
*min_inline_mode = MLX5_INLINE_MODE_NONE;
break;
--
2.21.0
^ permalink raw reply related
* [net-next 05/12] net/mlx5e: XDP, Close TX MPWQE session when no room for inline packet left
From: Saeed Mahameed @ 2019-08-01 19:56 UTC (permalink / raw)
To: David S. Miller
Cc: netdev@vger.kernel.org, Shay Agroskin, Tariq Toukan,
Saeed Mahameed
In-Reply-To: <20190801195620.26180-1-saeedm@mellanox.com>
From: Shay Agroskin <shayag@mellanox.com>
In MPWQE mode, when transmitting packets with XDP, a packet that is smaller
than a certain size (set to 256 bytes) would be sent inline within its WQE
TX descriptor (mem-copied), in case the hardware tx queue is congested
beyond a pre-defined water-mark.
If a MPWQE cannot contain an additional inline packet, we close this
MPWQE session, and send the packet inlined within the next MPWQE.
To save some MPWQE session close+open operations, we don't open MPWQE
sessions that are contiguously smaller than certain size (set to the
HW MPWQE maximum size). If there isn't enough contiguous room in the
send queue, we fill it with NOPs and wrap the send queue index around.
This way, qualified packets are always sent inline.
Perf tests:
Tested packet rate for UDP 64Byte multi-stream
over two dual port ConnectX-5 100Gbps NICs.
CPU: Intel(R) Xeon(R) CPU E5-2680 v3 @ 2.50GHz
XDP_TX:
With 24 channels:
| ------ | bounced packets | inlined packets | inline ratio |
| before | 113.6Mpps | 96.3Mpps | 84% |
| after | 115Mpps | 99.5Mpps | 86% |
With one channel:
| ------ | bounced packets | inlined packets | inline ratio |
| before | 6.7Mpps | 0pps | 0% |
| after | 6.8Mpps | 0pps | 0% |
As we can see, there is improvement in both inline ratio and overall
packet rate for 24 channels. Also, we see no degradation for the
one-channel case.
Signed-off-by: Shay Agroskin <shayag@mellanox.com>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 -
.../net/ethernet/mellanox/mlx5/core/en/xdp.c | 32 ++++-------
.../net/ethernet/mellanox/mlx5/core/en/xdp.h | 53 +++++++++++++++----
.../ethernet/mellanox/mlx5/core/en_stats.c | 6 +++
.../ethernet/mellanox/mlx5/core/en_stats.h | 3 ++
5 files changed, 63 insertions(+), 33 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 79d93d6c7d7a..745bcc25c6f8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -483,8 +483,6 @@ struct mlx5e_xdp_mpwqe {
struct mlx5e_tx_wqe *wqe;
u8 ds_count;
u8 pkt_count;
- u8 max_ds_count;
- u8 complete;
u8 inline_on;
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
index b0b982cf69bb..8cb98326531f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -179,34 +179,22 @@ static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq)
struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
struct mlx5e_xdpsq_stats *stats = sq->stats;
struct mlx5_wq_cyc *wq = &sq->wq;
- u8 wqebbs;
- u16 pi;
+ u16 pi, contig_wqebbs;
+
+ pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+ contig_wqebbs = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
+
+ if (unlikely(contig_wqebbs < MLX5_SEND_WQE_MAX_WQEBBS))
+ mlx5e_fill_xdpsq_frag_edge(sq, wq, pi, contig_wqebbs);
mlx5e_xdpsq_fetch_wqe(sq, &session->wqe);
prefetchw(session->wqe->data);
session->ds_count = MLX5E_XDP_TX_EMPTY_DS_COUNT;
session->pkt_count = 0;
- session->complete = 0;
pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
-/* The mult of MLX5_SEND_WQE_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS
- * (16 * 4 == 64) does not fit in the 6-bit DS field of Ctrl Segment.
- * We use a bound lower that MLX5_SEND_WQE_MAX_WQEBBS to let a
- * full-session WQE be cache-aligned.
- */
-#if L1_CACHE_BYTES < 128
-#define MLX5E_XDP_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 1)
-#else
-#define MLX5E_XDP_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 2)
-#endif
-
- wqebbs = min_t(u16, mlx5_wq_cyc_get_contig_wqebbs(wq, pi),
- MLX5E_XDP_MPW_MAX_WQEBBS);
-
- session->max_ds_count = MLX5_SEND_WQEBB_NUM_DS * wqebbs;
-
mlx5e_xdp_update_inline_state(sq);
stats->mpwqe++;
@@ -244,7 +232,7 @@ static int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq)
{
if (unlikely(!sq->mpwqe.wqe)) {
if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc,
- MLX5_SEND_WQE_MAX_WQEBBS))) {
+ MLX5E_XDPSQ_STOP_ROOM))) {
/* SQ is full, ring doorbell */
mlx5e_xmit_xdp_doorbell(sq);
sq->stats->full++;
@@ -285,8 +273,8 @@ static bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq,
mlx5e_xdp_mpwqe_add_dseg(sq, xdptxd, stats);
- if (unlikely(session->complete ||
- session->ds_count == session->max_ds_count))
+ if (unlikely(mlx5e_xdp_no_room_for_inline_pkt(session) ||
+ session->ds_count == MLX5E_XDP_MPW_MAX_NUM_DS))
mlx5e_xdp_mpwqe_complete(sq);
mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
index b90923932668..e0ed7710f5f1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
@@ -40,6 +40,26 @@
(sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS)
#define MLX5E_XDP_TX_DS_COUNT (MLX5E_XDP_TX_EMPTY_DS_COUNT + 1 /* SG DS */)
+#define MLX5E_XDPSQ_STOP_ROOM (MLX5E_SQ_STOP_ROOM)
+
+#define MLX5E_XDP_INLINE_WQE_SZ_THRSD (256 - sizeof(struct mlx5_wqe_inline_seg))
+#define MLX5E_XDP_INLINE_WQE_MAX_DS_CNT \
+ DIV_ROUND_UP(MLX5E_XDP_INLINE_WQE_SZ_THRSD, MLX5_SEND_WQE_DS)
+
+/* The mult of MLX5_SEND_WQE_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS
+ * (16 * 4 == 64) does not fit in the 6-bit DS field of Ctrl Segment.
+ * We use a bound lower that MLX5_SEND_WQE_MAX_WQEBBS to let a
+ * full-session WQE be cache-aligned.
+ */
+#if L1_CACHE_BYTES < 128
+#define MLX5E_XDP_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 1)
+#else
+#define MLX5E_XDP_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 2)
+#endif
+
+#define MLX5E_XDP_MPW_MAX_NUM_DS \
+ (MLX5E_XDP_MPW_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS)
+
struct mlx5e_xsk_param;
int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk);
bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
@@ -114,6 +134,30 @@ static inline void mlx5e_xdp_update_inline_state(struct mlx5e_xdpsq *sq)
session->inline_on = 1;
}
+static inline bool
+mlx5e_xdp_no_room_for_inline_pkt(struct mlx5e_xdp_mpwqe *session)
+{
+ return session->inline_on &&
+ session->ds_count + MLX5E_XDP_INLINE_WQE_MAX_DS_CNT > MLX5E_XDP_MPW_MAX_NUM_DS;
+}
+
+static inline void
+mlx5e_fill_xdpsq_frag_edge(struct mlx5e_xdpsq *sq, struct mlx5_wq_cyc *wq,
+ u16 pi, u16 nnops)
+{
+ struct mlx5e_xdp_wqe_info *edge_wi, *wi = &sq->db.wqe_info[pi];
+
+ edge_wi = wi + nnops;
+ /* fill sq frag edge with nops to avoid wqe wrapping two pages */
+ for (; wi < edge_wi; wi++) {
+ wi->num_wqebbs = 1;
+ wi->num_pkts = 0;
+ mlx5e_post_nop(wq, sq->sqn, &sq->pc);
+ }
+
+ sq->stats->nops += nnops;
+}
+
static inline void
mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq,
struct mlx5e_xdp_xmit_data *xdptxd,
@@ -126,20 +170,12 @@ mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq,
session->pkt_count++;
-#define MLX5E_XDP_INLINE_WQE_SZ_THRSD (256 - sizeof(struct mlx5_wqe_inline_seg))
-
if (session->inline_on && dma_len <= MLX5E_XDP_INLINE_WQE_SZ_THRSD) {
struct mlx5_wqe_inline_seg *inline_dseg =
(struct mlx5_wqe_inline_seg *)dseg;
u16 ds_len = sizeof(*inline_dseg) + dma_len;
u16 ds_cnt = DIV_ROUND_UP(ds_len, MLX5_SEND_WQE_DS);
- if (unlikely(session->ds_count + ds_cnt > session->max_ds_count)) {
- /* Not enough space for inline wqe, send with memory pointer */
- session->complete = true;
- goto no_inline;
- }
-
inline_dseg->byte_count = cpu_to_be32(dma_len | MLX5_INLINE_SEG);
memcpy(inline_dseg->data, xdptxd->data, dma_len);
@@ -148,7 +184,6 @@ mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq,
return;
}
-no_inline:
dseg->addr = cpu_to_be64(xdptxd->dma_addr);
dseg->byte_count = cpu_to_be32(dma_len);
dseg->lkey = sq->mkey_be;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index 539b4d3656da..6eee3c7d4b06 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -74,6 +74,7 @@ static const struct counter_desc sw_stats_desc[] = {
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_xmit) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_mpwqe) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_inlnw) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_nops) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_full) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_err) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_cqe) },
@@ -90,6 +91,7 @@ static const struct counter_desc sw_stats_desc[] = {
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_xmit) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_mpwqe) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_inlnw) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_nops) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_full) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_err) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_cqes) },
@@ -200,6 +202,7 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv)
s->rx_xdp_tx_xmit += xdpsq_stats->xmit;
s->rx_xdp_tx_mpwqe += xdpsq_stats->mpwqe;
s->rx_xdp_tx_inlnw += xdpsq_stats->inlnw;
+ s->rx_xdp_tx_nops += xdpsq_stats->nops;
s->rx_xdp_tx_full += xdpsq_stats->full;
s->rx_xdp_tx_err += xdpsq_stats->err;
s->rx_xdp_tx_cqe += xdpsq_stats->cqes;
@@ -227,6 +230,7 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv)
s->tx_xdp_xmit += xdpsq_red_stats->xmit;
s->tx_xdp_mpwqe += xdpsq_red_stats->mpwqe;
s->tx_xdp_inlnw += xdpsq_red_stats->inlnw;
+ s->tx_xdp_nops += xdpsq_red_stats->nops;
s->tx_xdp_full += xdpsq_red_stats->full;
s->tx_xdp_err += xdpsq_red_stats->err;
s->tx_xdp_cqes += xdpsq_red_stats->cqes;
@@ -1331,6 +1335,7 @@ static const struct counter_desc rq_xdpsq_stats_desc[] = {
{ MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, xmit) },
{ MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, mpwqe) },
{ MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, inlnw) },
+ { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, nops) },
{ MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, full) },
{ MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, err) },
{ MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, cqes) },
@@ -1340,6 +1345,7 @@ static const struct counter_desc xdpsq_stats_desc[] = {
{ MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, xmit) },
{ MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, mpwqe) },
{ MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, inlnw) },
+ { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, nops) },
{ MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, full) },
{ MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, err) },
{ MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, cqes) },
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index 76ac111e14d0..bf645d42c833 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -81,6 +81,7 @@ struct mlx5e_sw_stats {
u64 rx_xdp_tx_xmit;
u64 rx_xdp_tx_mpwqe;
u64 rx_xdp_tx_inlnw;
+ u64 rx_xdp_tx_nops;
u64 rx_xdp_tx_full;
u64 rx_xdp_tx_err;
u64 rx_xdp_tx_cqe;
@@ -97,6 +98,7 @@ struct mlx5e_sw_stats {
u64 tx_xdp_xmit;
u64 tx_xdp_mpwqe;
u64 tx_xdp_inlnw;
+ u64 tx_xdp_nops;
u64 tx_xdp_full;
u64 tx_xdp_err;
u64 tx_xdp_cqes;
@@ -288,6 +290,7 @@ struct mlx5e_xdpsq_stats {
u64 xmit;
u64 mpwqe;
u64 inlnw;
+ u64 nops;
u64 full;
u64 err;
/* dirtied @completion */
--
2.21.0
^ permalink raw reply related
* [net-next 01/12] net/mlx5: E-Switch, add ingress rate support
From: Saeed Mahameed @ 2019-08-01 19:56 UTC (permalink / raw)
To: David S. Miller
Cc: netdev@vger.kernel.org, Eli Cohen, Paul Blakey, Saeed Mahameed
In-Reply-To: <20190801195620.26180-1-saeedm@mellanox.com>
From: Eli Cohen <eli@mellanox.com>
Use the scheduling elements to implement ingress rate limiter on an
eswitch ports ingress traffic. Since the ingress of eswitch port is the
egress of VF port, we control eswitch ingress by controlling VF egress.
Configuration is done using the ports' representor net devices.
Please note that burst size configuration is not supported by devices
ConnectX-5 and earlier generations.
Configuration examples:
tc:
tc filter add dev enp59s0f0_0 root protocol ip matchall action police rate 1mbit burst 20k
ovs:
ovs-vsctl set interface eth0 ingress_policing_rate=1000
Signed-off-by: Eli Cohen <eli@mellanox.com>
Reviewed-by: Paul Blakey <paulb@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
.../net/ethernet/mellanox/mlx5/core/en_rep.c | 19 ++++
.../net/ethernet/mellanox/mlx5/core/en_rep.h | 1 +
.../net/ethernet/mellanox/mlx5/core/en_tc.c | 100 ++++++++++++++++++
.../net/ethernet/mellanox/mlx5/core/en_tc.h | 7 ++
.../net/ethernet/mellanox/mlx5/core/eswitch.c | 16 +++
.../net/ethernet/mellanox/mlx5/core/eswitch.h | 2 +
6 files changed, 145 insertions(+)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 6edf0aeb1e26..bf6f4835457e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -1156,6 +1156,23 @@ mlx5e_rep_setup_tc_cls_flower(struct mlx5e_priv *priv,
}
}
+static
+int mlx5e_rep_setup_tc_cls_matchall(struct mlx5e_priv *priv,
+ struct tc_cls_matchall_offload *ma)
+{
+ switch (ma->command) {
+ case TC_CLSMATCHALL_REPLACE:
+ return mlx5e_tc_configure_matchall(priv, ma);
+ case TC_CLSMATCHALL_DESTROY:
+ return mlx5e_tc_delete_matchall(priv, ma);
+ case TC_CLSMATCHALL_STATS:
+ mlx5e_tc_stats_matchall(priv, ma);
+ return 0;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data,
void *cb_priv)
{
@@ -1165,6 +1182,8 @@ static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data,
switch (type) {
case TC_SETUP_CLSFLOWER:
return mlx5e_rep_setup_tc_cls_flower(priv, type_data, flags);
+ case TC_SETUP_CLSMATCHALL:
+ return mlx5e_rep_setup_tc_cls_matchall(priv, type_data);
default:
return -EOPNOTSUPP;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
index 10fafd5fa17b..43eeebe9c8d2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
@@ -88,6 +88,7 @@ struct mlx5e_rep_priv {
struct mlx5_flow_handle *vport_rx_rule;
struct list_head vport_sqs_list;
struct mlx5_rep_uplink_priv uplink_priv; /* valid for uplink rep */
+ struct rtnl_link_stats64 prev_vf_vport_stats;
struct devlink_port dl_port;
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index f3ed028d5017..dc5fc3350b65 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -3638,6 +3638,106 @@ int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv,
return err;
}
+static int apply_police_params(struct mlx5e_priv *priv, u32 rate,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch *esw;
+ u16 vport_num;
+ u32 rate_mbps;
+ int err;
+
+ esw = priv->mdev->priv.eswitch;
+ /* rate is given in bytes/sec.
+ * First convert to bits/sec and then round to the nearest mbit/secs.
+ * mbit means million bits.
+ * Moreover, if rate is non zero we choose to configure to a minimum of
+ * 1 mbit/sec.
+ */
+ rate_mbps = rate ? max_t(u32, (rate * 8 + 500000) / 1000000, 1) : 0;
+ vport_num = rpriv->rep->vport;
+
+ err = mlx5_esw_modify_vport_rate(esw, vport_num, rate_mbps);
+ if (err)
+ NL_SET_ERR_MSG_MOD(extack, "failed applying action to hardware");
+
+ return err;
+}
+
+static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv,
+ struct flow_action *flow_action,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ const struct flow_action_entry *act;
+ int err;
+ int i;
+
+ if (!flow_action_has_entries(flow_action)) {
+ NL_SET_ERR_MSG_MOD(extack, "matchall called with no action");
+ return -EINVAL;
+ }
+
+ if (!flow_offload_has_one_action(flow_action)) {
+ NL_SET_ERR_MSG_MOD(extack, "matchall policing support only a single action");
+ return -EOPNOTSUPP;
+ }
+
+ flow_action_for_each(i, act, flow_action) {
+ switch (act->id) {
+ case FLOW_ACTION_POLICE:
+ err = apply_police_params(priv, act->police.rate_bytes_ps, extack);
+ if (err)
+ return err;
+
+ rpriv->prev_vf_vport_stats = priv->stats.vf_vport;
+ break;
+ default:
+ NL_SET_ERR_MSG_MOD(extack, "mlx5 supports only police action for matchall");
+ return -EOPNOTSUPP;
+ }
+ }
+
+ return 0;
+}
+
+int mlx5e_tc_configure_matchall(struct mlx5e_priv *priv,
+ struct tc_cls_matchall_offload *ma)
+{
+ struct netlink_ext_ack *extack = ma->common.extack;
+ int prio = TC_H_MAJ(ma->common.prio) >> 16;
+
+ if (prio != 1) {
+ NL_SET_ERR_MSG_MOD(extack, "only priority 1 is supported");
+ return -EINVAL;
+ }
+
+ return scan_tc_matchall_fdb_actions(priv, &ma->rule->action, extack);
+}
+
+int mlx5e_tc_delete_matchall(struct mlx5e_priv *priv,
+ struct tc_cls_matchall_offload *ma)
+{
+ struct netlink_ext_ack *extack = ma->common.extack;
+
+ return apply_police_params(priv, 0, extack);
+}
+
+void mlx5e_tc_stats_matchall(struct mlx5e_priv *priv,
+ struct tc_cls_matchall_offload *ma)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct rtnl_link_stats64 cur_stats;
+ u64 dbytes;
+ u64 dpkts;
+
+ cur_stats = priv->stats.vf_vport;
+ dpkts = cur_stats.rx_packets - rpriv->prev_vf_vport_stats.rx_packets;
+ dbytes = cur_stats.rx_bytes - rpriv->prev_vf_vport_stats.rx_bytes;
+ rpriv->prev_vf_vport_stats = cur_stats;
+ flow_stats_update(&ma->stats, dpkts, dbytes, jiffies);
+}
+
static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv *priv,
struct mlx5e_priv *peer_priv)
{
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
index 1cb66bf76997..20f045e96c92 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
@@ -63,6 +63,13 @@ int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv,
int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv,
struct flow_cls_offload *f, unsigned long flags);
+int mlx5e_tc_configure_matchall(struct mlx5e_priv *priv,
+ struct tc_cls_matchall_offload *f);
+int mlx5e_tc_delete_matchall(struct mlx5e_priv *priv,
+ struct tc_cls_matchall_offload *f);
+void mlx5e_tc_stats_matchall(struct mlx5e_priv *priv,
+ struct tc_cls_matchall_offload *ma);
+
struct mlx5e_encap_entry;
void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
struct mlx5e_encap_entry *e);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index f4ace5f8e884..5fbebee7254d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -1585,6 +1585,22 @@ static int esw_vport_qos_config(struct mlx5_eswitch *esw,
return 0;
}
+int mlx5_esw_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num,
+ u32 rate_mbps)
+{
+ u32 ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
+ struct mlx5_vport *vport;
+
+ vport = mlx5_eswitch_get_vport(esw, vport_num);
+ MLX5_SET(scheduling_context, ctx, max_average_bw, rate_mbps);
+
+ return mlx5_modify_scheduling_element_cmd(esw->dev,
+ SCHEDULING_HIERARCHY_E_SWITCH,
+ ctx,
+ vport->qos.esw_tsar_ix,
+ MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW);
+}
+
static void node_guid_gen_from_mac(u64 *node_guid, u8 mac[ETH_ALEN])
{
((u8 *)node_guid)[7] = mac[0];
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 4a03fdadb47e..804912e38dee 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -261,6 +261,8 @@ void esw_vport_disable_ingress_acl(struct mlx5_eswitch *esw,
struct mlx5_vport *vport);
void esw_vport_del_ingress_acl_modify_metadata(struct mlx5_eswitch *esw,
struct mlx5_vport *vport);
+int mlx5_esw_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num,
+ u32 rate_mbps);
/* E-Switch API */
int mlx5_eswitch_init(struct mlx5_core_dev *dev);
--
2.21.0
^ permalink raw reply related
* [net-next 04/12] net/mlx5e: Tx, Strict the room needed for SQ edge NOPs
From: Saeed Mahameed @ 2019-08-01 19:56 UTC (permalink / raw)
To: David S. Miller; +Cc: netdev@vger.kernel.org, Tariq Toukan, Saeed Mahameed
In-Reply-To: <20190801195620.26180-1-saeedm@mellanox.com>
From: Tariq Toukan <tariqt@mellanox.com>
We use NOPs to populate the WQ fragment edge if the WQE does not fit
in frag, to avoid WQEs crossing a page boundary (or wrap-around the WQ).
The upper bound on the needed number of NOPs is one WQEBB less than
the largest possible WQE, for otherwise the WQE would certainly fit.
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
index ddfe19adb3d9..7da22b413a48 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
@@ -6,7 +6,7 @@
#include "en.h"
-#define MLX5E_SQ_NOPS_ROOM MLX5_SEND_WQE_MAX_WQEBBS
+#define MLX5E_SQ_NOPS_ROOM (MLX5_SEND_WQE_MAX_WQEBBS - 1)
#define MLX5E_SQ_STOP_ROOM (MLX5_SEND_WQE_MAX_WQEBBS +\
MLX5E_SQ_NOPS_ROOM)
--
2.21.0
^ permalink raw reply related
* [net-next 03/12] net/mlx5: Add flow counter pool
From: Saeed Mahameed @ 2019-08-01 19:56 UTC (permalink / raw)
To: David S. Miller
Cc: netdev@vger.kernel.org, Gavi Teitz, Vlad Buslov, Saeed Mahameed
In-Reply-To: <20190801195620.26180-1-saeedm@mellanox.com>
From: Gavi Teitz <gavi@mellanox.com>
Add a pool of flow counters, based on flow counter bulks, removing the
need to allocate a new counter via a costly FW command during the flow
creation process. The time it takes to acquire/release a flow counter
is cut from ~50 [us] to ~50 [ns].
The pool is part of the mlx5 driver instance, and provides flow
counters for aging flows. mlx5_fc_create() was modified to provide
counters for aging flows from the pool by default, and
mlx5_destroy_fc() was modified to release counters back to the pool
for later reuse. If bulk allocation is not supported or fails, and for
non-aging flows, the fallback behavior is to allocate and free
individual counters.
The pool is comprised of three lists of flow counter bulks, one of
fully used bulks, one of partially used bulks, and one of unused
bulks. Counters are provided from the partially used bulks first, to
help limit bulk fragmentation.
The pool maintains a threshold, and strives to maintain the amount of
available counters below it. The pool is increased in size when a
counter acquisition request is made and there are no available
counters, and it is decreased in size when the last counter in a bulk
is released and there are more available counters than the threshold.
All pool size changes are done in the context of the
acquiring/releasing process.
The value of the threshold is directly correlated to the amount of
used counters the pool is providing, while constrained by a hard
maximum, and is recalculated every time a bulk is allocated/freed.
This ensures that the pool only consumes large amounts of memory for
available counters if the pool is being used heavily. When fully
populated and at the hard maximum, the buffer of available counters
consumes ~40 [MB].
Signed-off-by: Gavi Teitz <gavi@mellanox.com>
Reviewed-by: Vlad Buslov <vladbu@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
.../ethernet/mellanox/mlx5/core/fs_counters.c | 231 ++++++++++++++++--
include/linux/mlx5/driver.h | 12 +
2 files changed, 218 insertions(+), 25 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
index 3e734e62a6cd..51f1736c455d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
@@ -40,6 +40,8 @@
#define MLX5_FC_STATS_PERIOD msecs_to_jiffies(1000)
/* Max number of counters to query in bulk read is 32K */
#define MLX5_SW_MAX_COUNTERS_BULK BIT(15)
+#define MLX5_FC_POOL_MAX_THRESHOLD BIT(18)
+#define MLX5_FC_POOL_USED_BUFF_RATIO 10
struct mlx5_fc_cache {
u64 packets;
@@ -65,6 +67,11 @@ struct mlx5_fc {
struct mlx5_fc_cache cache ____cacheline_aligned_in_smp;
};
+static void mlx5_fc_pool_init(struct mlx5_fc_pool *fc_pool, struct mlx5_core_dev *dev);
+static void mlx5_fc_pool_cleanup(struct mlx5_fc_pool *fc_pool);
+static struct mlx5_fc *mlx5_fc_pool_acquire_counter(struct mlx5_fc_pool *fc_pool);
+static void mlx5_fc_pool_release_counter(struct mlx5_fc_pool *fc_pool, struct mlx5_fc *fc);
+
/* locking scheme:
*
* It is the responsibility of the user to prevent concurrent calls or bad
@@ -202,13 +209,22 @@ static void mlx5_fc_stats_query_counter_range(struct mlx5_core_dev *dev,
}
}
-static void mlx5_free_fc(struct mlx5_core_dev *dev,
- struct mlx5_fc *counter)
+static void mlx5_fc_free(struct mlx5_core_dev *dev, struct mlx5_fc *counter)
{
mlx5_cmd_fc_free(dev, counter->id);
kfree(counter);
}
+static void mlx5_fc_release(struct mlx5_core_dev *dev, struct mlx5_fc *counter)
+{
+ struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+
+ if (counter->bulk)
+ mlx5_fc_pool_release_counter(&fc_stats->fc_pool, counter);
+ else
+ mlx5_fc_free(dev, counter);
+}
+
static void mlx5_fc_stats_work(struct work_struct *work)
{
struct mlx5_core_dev *dev = container_of(work, struct mlx5_core_dev,
@@ -232,7 +248,7 @@ static void mlx5_fc_stats_work(struct work_struct *work)
llist_for_each_entry_safe(counter, tmp, dellist, dellist) {
mlx5_fc_stats_remove(dev, counter);
- mlx5_free_fc(dev, counter);
+ mlx5_fc_release(dev, counter);
}
if (time_before(now, fc_stats->next_query) ||
@@ -248,26 +264,56 @@ static void mlx5_fc_stats_work(struct work_struct *work)
fc_stats->next_query = now + fc_stats->sampling_interval;
}
-struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging)
+static struct mlx5_fc *mlx5_fc_single_alloc(struct mlx5_core_dev *dev)
{
- struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
struct mlx5_fc *counter;
int err;
counter = kzalloc(sizeof(*counter), GFP_KERNEL);
if (!counter)
return ERR_PTR(-ENOMEM);
- INIT_LIST_HEAD(&counter->list);
err = mlx5_cmd_fc_alloc(dev, &counter->id);
- if (err)
- goto err_out;
+ if (err) {
+ kfree(counter);
+ return ERR_PTR(err);
+ }
+
+ return counter;
+}
+
+static struct mlx5_fc *mlx5_fc_acquire(struct mlx5_core_dev *dev, bool aging)
+{
+ struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+ struct mlx5_fc *counter;
+
+ if (aging && MLX5_CAP_GEN(dev, flow_counter_bulk_alloc) != 0) {
+ counter = mlx5_fc_pool_acquire_counter(&fc_stats->fc_pool);
+ if (!IS_ERR(counter))
+ return counter;
+ }
+
+ return mlx5_fc_single_alloc(dev);
+}
+
+struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging)
+{
+ struct mlx5_fc *counter = mlx5_fc_acquire(dev, aging);
+ struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+ int err;
+
+ if (IS_ERR(counter))
+ return counter;
+
+ INIT_LIST_HEAD(&counter->list);
+ counter->aging = aging;
if (aging) {
u32 id = counter->id;
counter->cache.lastuse = jiffies;
- counter->aging = true;
+ counter->lastbytes = counter->cache.bytes;
+ counter->lastpackets = counter->cache.packets;
idr_preload(GFP_KERNEL);
spin_lock(&fc_stats->counters_idr_lock);
@@ -288,10 +334,7 @@ struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging)
return counter;
err_out_alloc:
- mlx5_cmd_fc_free(dev, counter->id);
-err_out:
- kfree(counter);
-
+ mlx5_fc_release(dev, counter);
return ERR_PTR(err);
}
EXPORT_SYMBOL(mlx5_fc_create);
@@ -315,7 +358,7 @@ void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter)
return;
}
- mlx5_free_fc(dev, counter);
+ mlx5_fc_release(dev, counter);
}
EXPORT_SYMBOL(mlx5_fc_destroy);
@@ -344,6 +387,7 @@ int mlx5_init_fc_stats(struct mlx5_core_dev *dev)
fc_stats->sampling_interval = MLX5_FC_STATS_PERIOD;
INIT_DELAYED_WORK(&fc_stats->work, mlx5_fc_stats_work);
+ mlx5_fc_pool_init(&fc_stats->fc_pool, dev);
return 0;
err_wq_create:
@@ -358,6 +402,7 @@ void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev)
struct mlx5_fc *counter;
struct mlx5_fc *tmp;
+ mlx5_fc_pool_cleanup(&fc_stats->fc_pool);
cancel_delayed_work_sync(&dev->priv.fc_stats.work);
destroy_workqueue(dev->priv.fc_stats.wq);
dev->priv.fc_stats.wq = NULL;
@@ -368,10 +413,10 @@ void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev)
tmplist = llist_del_all(&fc_stats->addlist);
llist_for_each_entry_safe(counter, tmp, tmplist, addlist)
- mlx5_free_fc(dev, counter);
+ mlx5_fc_release(dev, counter);
list_for_each_entry_safe(counter, tmp, &fc_stats->counters, list)
- mlx5_free_fc(dev, counter);
+ mlx5_fc_release(dev, counter);
}
int mlx5_fc_query(struct mlx5_core_dev *dev, struct mlx5_fc *counter,
@@ -417,14 +462,15 @@ void mlx5_fc_update_sampling_interval(struct mlx5_core_dev *dev,
/* Flow counter bluks */
struct mlx5_fc_bulk {
+ struct list_head pool_list;
u32 base_id;
int bulk_len;
unsigned long *bitmask;
struct mlx5_fc fcs[0];
};
-static void
-mlx5_fc_init(struct mlx5_fc *counter, struct mlx5_fc_bulk *bulk, u32 id)
+static void mlx5_fc_init(struct mlx5_fc *counter, struct mlx5_fc_bulk *bulk,
+ u32 id)
{
counter->bulk = bulk;
counter->id = id;
@@ -435,8 +481,7 @@ static int mlx5_fc_bulk_get_free_fcs_amount(struct mlx5_fc_bulk *bulk)
return bitmap_weight(bulk->bitmask, bulk->bulk_len);
}
-static struct mlx5_fc_bulk __attribute__((unused))
-*mlx5_fc_bulk_create(struct mlx5_core_dev *dev)
+static struct mlx5_fc_bulk *mlx5_fc_bulk_create(struct mlx5_core_dev *dev)
{
enum mlx5_fc_bulk_alloc_bitmask alloc_bitmask;
struct mlx5_fc_bulk *bulk;
@@ -479,7 +524,7 @@ static struct mlx5_fc_bulk __attribute__((unused))
return ERR_PTR(err);
}
-static int __attribute__((unused))
+static int
mlx5_fc_bulk_destroy(struct mlx5_core_dev *dev, struct mlx5_fc_bulk *bulk)
{
if (mlx5_fc_bulk_get_free_fcs_amount(bulk) < bulk->bulk_len) {
@@ -494,8 +539,7 @@ mlx5_fc_bulk_destroy(struct mlx5_core_dev *dev, struct mlx5_fc_bulk *bulk)
return 0;
}
-static struct mlx5_fc __attribute__((unused))
-*mlx5_fc_bulk_acquire_fc(struct mlx5_fc_bulk *bulk)
+static struct mlx5_fc *mlx5_fc_bulk_acquire_fc(struct mlx5_fc_bulk *bulk)
{
int free_fc_index = find_first_bit(bulk->bitmask, bulk->bulk_len);
@@ -506,8 +550,7 @@ static struct mlx5_fc __attribute__((unused))
return &bulk->fcs[free_fc_index];
}
-static int __attribute__((unused))
-mlx5_fc_bulk_release_fc(struct mlx5_fc_bulk *bulk, struct mlx5_fc *fc)
+static int mlx5_fc_bulk_release_fc(struct mlx5_fc_bulk *bulk, struct mlx5_fc *fc)
{
int fc_index = fc->id - bulk->base_id;
@@ -517,3 +560,141 @@ mlx5_fc_bulk_release_fc(struct mlx5_fc_bulk *bulk, struct mlx5_fc *fc)
set_bit(fc_index, bulk->bitmask);
return 0;
}
+
+/* Flow counters pool API */
+
+static void mlx5_fc_pool_init(struct mlx5_fc_pool *fc_pool, struct mlx5_core_dev *dev)
+{
+ fc_pool->dev = dev;
+ mutex_init(&fc_pool->pool_lock);
+ INIT_LIST_HEAD(&fc_pool->fully_used);
+ INIT_LIST_HEAD(&fc_pool->partially_used);
+ INIT_LIST_HEAD(&fc_pool->unused);
+ fc_pool->available_fcs = 0;
+ fc_pool->used_fcs = 0;
+ fc_pool->threshold = 0;
+}
+
+static void mlx5_fc_pool_cleanup(struct mlx5_fc_pool *fc_pool)
+{
+ struct mlx5_core_dev *dev = fc_pool->dev;
+ struct mlx5_fc_bulk *bulk;
+ struct mlx5_fc_bulk *tmp;
+
+ list_for_each_entry_safe(bulk, tmp, &fc_pool->fully_used, pool_list)
+ mlx5_fc_bulk_destroy(dev, bulk);
+ list_for_each_entry_safe(bulk, tmp, &fc_pool->partially_used, pool_list)
+ mlx5_fc_bulk_destroy(dev, bulk);
+ list_for_each_entry_safe(bulk, tmp, &fc_pool->unused, pool_list)
+ mlx5_fc_bulk_destroy(dev, bulk);
+}
+
+static void mlx5_fc_pool_update_threshold(struct mlx5_fc_pool *fc_pool)
+{
+ fc_pool->threshold = min_t(int, MLX5_FC_POOL_MAX_THRESHOLD,
+ fc_pool->used_fcs / MLX5_FC_POOL_USED_BUFF_RATIO);
+}
+
+static struct mlx5_fc_bulk *
+mlx5_fc_pool_alloc_new_bulk(struct mlx5_fc_pool *fc_pool)
+{
+ struct mlx5_core_dev *dev = fc_pool->dev;
+ struct mlx5_fc_bulk *new_bulk;
+
+ new_bulk = mlx5_fc_bulk_create(dev);
+ if (!IS_ERR(new_bulk))
+ fc_pool->available_fcs += new_bulk->bulk_len;
+ mlx5_fc_pool_update_threshold(fc_pool);
+ return new_bulk;
+}
+
+static void
+mlx5_fc_pool_free_bulk(struct mlx5_fc_pool *fc_pool, struct mlx5_fc_bulk *bulk)
+{
+ struct mlx5_core_dev *dev = fc_pool->dev;
+
+ fc_pool->available_fcs -= bulk->bulk_len;
+ mlx5_fc_bulk_destroy(dev, bulk);
+ mlx5_fc_pool_update_threshold(fc_pool);
+}
+
+static struct mlx5_fc *
+mlx5_fc_pool_acquire_from_list(struct list_head *src_list,
+ struct list_head *next_list,
+ bool move_non_full_bulk)
+{
+ struct mlx5_fc_bulk *bulk;
+ struct mlx5_fc *fc;
+
+ if (list_empty(src_list))
+ return ERR_PTR(-ENODATA);
+
+ bulk = list_first_entry(src_list, struct mlx5_fc_bulk, pool_list);
+ fc = mlx5_fc_bulk_acquire_fc(bulk);
+ if (move_non_full_bulk || mlx5_fc_bulk_get_free_fcs_amount(bulk) == 0)
+ list_move(&bulk->pool_list, next_list);
+ return fc;
+}
+
+static struct mlx5_fc *
+mlx5_fc_pool_acquire_counter(struct mlx5_fc_pool *fc_pool)
+{
+ struct mlx5_fc_bulk *new_bulk;
+ struct mlx5_fc *fc;
+
+ mutex_lock(&fc_pool->pool_lock);
+
+ fc = mlx5_fc_pool_acquire_from_list(&fc_pool->partially_used,
+ &fc_pool->fully_used, false);
+ if (IS_ERR(fc))
+ fc = mlx5_fc_pool_acquire_from_list(&fc_pool->unused,
+ &fc_pool->partially_used,
+ true);
+ if (IS_ERR(fc)) {
+ new_bulk = mlx5_fc_pool_alloc_new_bulk(fc_pool);
+ if (IS_ERR(new_bulk)) {
+ fc = ERR_CAST(new_bulk);
+ goto out;
+ }
+ fc = mlx5_fc_bulk_acquire_fc(new_bulk);
+ list_add(&new_bulk->pool_list, &fc_pool->partially_used);
+ }
+ fc_pool->available_fcs--;
+ fc_pool->used_fcs++;
+
+out:
+ mutex_unlock(&fc_pool->pool_lock);
+ return fc;
+}
+
+static void
+mlx5_fc_pool_release_counter(struct mlx5_fc_pool *fc_pool, struct mlx5_fc *fc)
+{
+ struct mlx5_core_dev *dev = fc_pool->dev;
+ struct mlx5_fc_bulk *bulk = fc->bulk;
+ int bulk_free_fcs_amount;
+
+ mutex_lock(&fc_pool->pool_lock);
+
+ if (mlx5_fc_bulk_release_fc(bulk, fc)) {
+ mlx5_core_warn(dev, "Attempted to release a counter which is not acquired\n");
+ goto unlock;
+ }
+
+ fc_pool->available_fcs++;
+ fc_pool->used_fcs--;
+
+ bulk_free_fcs_amount = mlx5_fc_bulk_get_free_fcs_amount(bulk);
+ if (bulk_free_fcs_amount == 1)
+ list_move_tail(&bulk->pool_list, &fc_pool->partially_used);
+ if (bulk_free_fcs_amount == bulk->bulk_len) {
+ list_del(&bulk->pool_list);
+ if (fc_pool->available_fcs > fc_pool->threshold)
+ mlx5_fc_pool_free_bulk(fc_pool, bulk);
+ else
+ list_add(&bulk->pool_list, &fc_pool->unused);
+ }
+
+unlock:
+ mutex_unlock(&fc_pool->pool_lock);
+}
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 267b2bc0ca4a..d8f348ef9c33 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -477,6 +477,17 @@ struct mlx5_core_sriov {
u16 max_vfs;
};
+struct mlx5_fc_pool {
+ struct mlx5_core_dev *dev;
+ struct mutex pool_lock; /* protects pool lists */
+ struct list_head fully_used;
+ struct list_head partially_used;
+ struct list_head unused;
+ int available_fcs;
+ int used_fcs;
+ int threshold;
+};
+
struct mlx5_fc_stats {
spinlock_t counters_idr_lock; /* protects counters_idr */
struct idr counters_idr;
@@ -489,6 +500,7 @@ struct mlx5_fc_stats {
unsigned long next_query;
unsigned long sampling_interval; /* jiffies */
u32 *bulk_query_out;
+ struct mlx5_fc_pool fc_pool;
};
struct mlx5_events;
--
2.21.0
^ permalink raw reply related
* [net-next 02/12] net/mlx5: Add flow counter bulk infrastructure
From: Saeed Mahameed @ 2019-08-01 19:56 UTC (permalink / raw)
To: David S. Miller
Cc: netdev@vger.kernel.org, Gavi Teitz, Vlad Buslov, Saeed Mahameed
In-Reply-To: <20190801195620.26180-1-saeedm@mellanox.com>
From: Gavi Teitz <gavi@mellanox.com>
Add infrastructure to track bulks of flow counters, providing
the means to allocate and deallocate bulks, and to acquire and
release individual counters from the bulks.
Signed-off-by: Gavi Teitz <gavi@mellanox.com>
Reviewed-by: Vlad Buslov <vladbu@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
.../ethernet/mellanox/mlx5/core/fs_counters.c | 105 ++++++++++++++++++
1 file changed, 105 insertions(+)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
index 067a4b56498b..3e734e62a6cd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
@@ -58,6 +58,7 @@ struct mlx5_fc {
u64 lastpackets;
u64 lastbytes;
+ struct mlx5_fc_bulk *bulk;
u32 id;
bool aging;
@@ -412,3 +413,107 @@ void mlx5_fc_update_sampling_interval(struct mlx5_core_dev *dev,
fc_stats->sampling_interval = min_t(unsigned long, interval,
fc_stats->sampling_interval);
}
+
+/* Flow counter bluks */
+
+struct mlx5_fc_bulk {
+ u32 base_id;
+ int bulk_len;
+ unsigned long *bitmask;
+ struct mlx5_fc fcs[0];
+};
+
+static void
+mlx5_fc_init(struct mlx5_fc *counter, struct mlx5_fc_bulk *bulk, u32 id)
+{
+ counter->bulk = bulk;
+ counter->id = id;
+}
+
+static int mlx5_fc_bulk_get_free_fcs_amount(struct mlx5_fc_bulk *bulk)
+{
+ return bitmap_weight(bulk->bitmask, bulk->bulk_len);
+}
+
+static struct mlx5_fc_bulk __attribute__((unused))
+*mlx5_fc_bulk_create(struct mlx5_core_dev *dev)
+{
+ enum mlx5_fc_bulk_alloc_bitmask alloc_bitmask;
+ struct mlx5_fc_bulk *bulk;
+ int err = -ENOMEM;
+ int bulk_len;
+ u32 base_id;
+ int i;
+
+ alloc_bitmask = MLX5_CAP_GEN(dev, flow_counter_bulk_alloc);
+ bulk_len = alloc_bitmask > 0 ? MLX5_FC_BULK_NUM_FCS(alloc_bitmask) : 1;
+
+ bulk = kzalloc(sizeof(*bulk) + bulk_len * sizeof(struct mlx5_fc),
+ GFP_KERNEL);
+ if (!bulk)
+ goto err_alloc_bulk;
+
+ bulk->bitmask = kcalloc(BITS_TO_LONGS(bulk_len), sizeof(unsigned long),
+ GFP_KERNEL);
+ if (!bulk->bitmask)
+ goto err_alloc_bitmask;
+
+ err = mlx5_cmd_fc_bulk_alloc(dev, alloc_bitmask, &base_id);
+ if (err)
+ goto err_mlx5_cmd_bulk_alloc;
+
+ bulk->base_id = base_id;
+ bulk->bulk_len = bulk_len;
+ for (i = 0; i < bulk_len; i++) {
+ mlx5_fc_init(&bulk->fcs[i], bulk, base_id + i);
+ set_bit(i, bulk->bitmask);
+ }
+
+ return bulk;
+
+err_mlx5_cmd_bulk_alloc:
+ kfree(bulk->bitmask);
+err_alloc_bitmask:
+ kfree(bulk);
+err_alloc_bulk:
+ return ERR_PTR(err);
+}
+
+static int __attribute__((unused))
+mlx5_fc_bulk_destroy(struct mlx5_core_dev *dev, struct mlx5_fc_bulk *bulk)
+{
+ if (mlx5_fc_bulk_get_free_fcs_amount(bulk) < bulk->bulk_len) {
+ mlx5_core_err(dev, "Freeing bulk before all counters were released\n");
+ return -EBUSY;
+ }
+
+ mlx5_cmd_fc_free(dev, bulk->base_id);
+ kfree(bulk->bitmask);
+ kfree(bulk);
+
+ return 0;
+}
+
+static struct mlx5_fc __attribute__((unused))
+*mlx5_fc_bulk_acquire_fc(struct mlx5_fc_bulk *bulk)
+{
+ int free_fc_index = find_first_bit(bulk->bitmask, bulk->bulk_len);
+
+ if (free_fc_index >= bulk->bulk_len)
+ return ERR_PTR(-ENOSPC);
+
+ clear_bit(free_fc_index, bulk->bitmask);
+ return &bulk->fcs[free_fc_index];
+}
+
+static int __attribute__((unused))
+mlx5_fc_bulk_release_fc(struct mlx5_fc_bulk *bulk, struct mlx5_fc *fc)
+{
+ int fc_index = fc->id - bulk->base_id;
+
+ if (test_bit(fc_index, bulk->bitmask))
+ return -EINVAL;
+
+ set_bit(fc_index, bulk->bitmask);
+ return 0;
+}
--
2.21.0
^ permalink raw reply related
* [pull request][net-next 00/12] Mellanox, mlx5 updates 2019-08-01
From: Saeed Mahameed @ 2019-08-01 19:56 UTC (permalink / raw)
To: David S. Miller; +Cc: netdev@vger.kernel.org, Saeed Mahameed
Hi Dave,
This series provides misc updates to mlx5 driver.
For more information please see tag log below.
Please pull and let me know if there is any problem.
Please note that the series starts with a merge of mlx5-next branch,
to resolve and avoid dependency with rdma tree.
Thanks,
Saeed.
---
The following changes since commit 68e18626dfe971df3856872ee58f63c389dea2f5:
Merge branch 'mlx5-next' of git://git.kernel.org/pub/scm/linux/kernel/git/mellanox/linux (2019-08-01 12:33:14 -0700)
are available in the Git repository at:
git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux.git tags/mlx5-updates-2019-08-01
for you to fetch changes up to 6830b468259b45e3b73070474b8cec9388aa8c11:
net/mlx5e: Allow dropping specific tunnel packets (2019-08-01 12:33:33 -0700)
----------------------------------------------------------------
mlx5-updates-2019-08-01
Misc updates for mlx5 netdev driver:
1) Ingress rate support for E-Switch vports from Eli.
2) Gavi introduces flow counters bulk allocation and pool,
To improve the performance of flow counter acquisition.
3) From Tariq, micro improvements for tx path
4) From Shay, small improvement for XDP TX MPWQE inline flow.
5) Aya provides some cleanups for tx devlink health reporters.
6) Saeed, refactor checksum handling into a single function.
7) Tonghao, allows dropping specific tunnel packets.
----------------------------------------------------------------
Aya Levin (3):
net/mlx5e: Fix mlx5e_tx_reporter_create return value
net/mlx5e: Set tx reporter only on successful creation
net/mlx5e: TX reporter cleanup
Eli Cohen (1):
net/mlx5: E-Switch, add ingress rate support
Gavi Teitz (2):
net/mlx5: Add flow counter bulk infrastructure
net/mlx5: Add flow counter pool
Saeed Mahameed (1):
net/mlx5e: Rx, checksum handling refactoring
Shay Agroskin (1):
net/mlx5e: XDP, Close TX MPWQE session when no room for inline packet left
Tariq Toukan (3):
net/mlx5e: Tx, Strict the room needed for SQ edge NOPs
net/mlx5e: XDP, Slight enhancement for WQE fetch function
net/mlx5e: Tx, Soften inline mode VLAN dependencies
Tonghao Zhang (1):
net/mlx5e: Allow dropping specific tunnel packets
drivers/net/ethernet/mellanox/mlx5/core/en.h | 4 +-
.../net/ethernet/mellanox/mlx5/core/en/reporter.h | 1 -
.../ethernet/mellanox/mlx5/core/en/reporter_tx.c | 17 +-
drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h | 24 +-
drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c | 36 +--
drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h | 66 ++++-
.../net/ethernet/mellanox/mlx5/core/en_common.c | 12 -
drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c | 2 +-
drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 6 +-
drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 19 ++
drivers/net/ethernet/mellanox/mlx5/core/en_rep.h | 1 +
drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 26 +-
drivers/net/ethernet/mellanox/mlx5/core/en_stats.c | 6 +
drivers/net/ethernet/mellanox/mlx5/core/en_stats.h | 3 +
drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 103 ++++++-
drivers/net/ethernet/mellanox/mlx5/core/en_tc.h | 7 +
drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 7 +-
drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 16 ++
drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 2 +
.../net/ethernet/mellanox/mlx5/core/fs_counters.c | 318 +++++++++++++++++++--
drivers/net/ethernet/mellanox/mlx5/core/vport.c | 7 +-
include/linux/mlx5/driver.h | 12 +
22 files changed, 592 insertions(+), 103 deletions(-)
^ permalink raw reply
* Re: [PATCH v3 0/3] auth_gss: netns refcount leaks when use-gss-proxy==1
From: J. Bruce Fields @ 2019-08-01 19:53 UTC (permalink / raw)
To: Wenbin Zeng
Cc: davem, viro, jlayton, trond.myklebust, anna.schumaker, wenbinzeng,
dsahern, nicolas.dichtel, willy, edumazet, jakub.kicinski,
tyhicks, chuck.lever, neilb, linux-fsdevel, linux-kernel, netdev,
linux-nfs
In-Reply-To: <1560341370-24197-1-git-send-email-wenbinzeng@tencent.com>
I lost track, what happened to these patches?
--b.
On Wed, Jun 12, 2019 at 08:09:27PM +0800, Wenbin Zeng wrote:
> This patch series fixes an auth_gss bug that results in netns refcount
> leaks when use-gss-proxy is set to 1.
>
> The problem was found in privileged docker containers with gssproxy service
> enabled and /proc/net/rpc/use-gss-proxy set to 1, the corresponding
> struct net->count ends up at 2 after container gets killed, the consequence
> is that the struct net cannot be freed.
>
> It turns out that write_gssp() called gssp_rpc_create() to create a rpc
> client, this increases net->count by 2; rpcsec_gss_exit_net() is supposed
> to decrease net->count but it never gets called because its call-path is:
> net->count==0 -> cleanup_net -> ops_exit_list -> rpcsec_gss_exit_net
> Before rpcsec_gss_exit_net() gets called, net->count cannot reach 0, this
> is a deadlock situation.
>
> To fix the problem, we must break the deadlock, rpcsec_gss_exit_net()
> should move out of the put() path and find another chance to get called,
> I think nsfs_evict() is a good place to go, when netns inode gets evicted
> we call rpcsec_gss_exit_net() to free the rpc client, this requires a new
> callback i.e. evict to be added in struct proc_ns_operations, and add
> netns_evict() as one of netns_operations as well.
>
> v1->v2:
> * in nsfs_evict(), move ->evict() in front of ->put()
> v2->v3:
> * rpcsec_gss_evict_net() directly call gss_svc_shutdown_net() regardless
> if gssp_clnt is null, this is exactly same to what rpcsec_gss_exit_net()
> previously did
>
> Wenbin Zeng (3):
> nsfs: add evict callback into struct proc_ns_operations
> netns: add netns_evict into netns_operations
> auth_gss: fix deadlock that blocks rpcsec_gss_exit_net when
> use-gss-proxy==1
>
> fs/nsfs.c | 2 ++
> include/linux/proc_ns.h | 1 +
> include/net/net_namespace.h | 1 +
> net/core/net_namespace.c | 12 ++++++++++++
> net/sunrpc/auth_gss/auth_gss.c | 4 ++--
> 5 files changed, 18 insertions(+), 2 deletions(-)
>
> --
> 1.8.3.1
^ permalink raw reply
* Re: [PATCH net] ipv4/route: do not check saddr dev if iif is LOOPBACK_IFINDEX
From: David Ahern @ 2019-08-01 19:51 UTC (permalink / raw)
To: Hangbin Liu, netdev
Cc: Stefano Brivio, Marcelo Ricardo Leitner, David Ahern,
David S . Miller
In-Reply-To: <20190801082900.27216-1-liuhangbin@gmail.com>
On 8/1/19 2:29 AM, Hangbin Liu wrote:
> Jianlin reported a bug that for IPv4, ip route get from src_addr would fail
> if src_addr is not an address on local system.
>
> \# ip route get 1.1.1.1 from 2.2.2.2
> RTNETLINK answers: Invalid argument
so this is a forwarding lookup in which case iif should be set. Based on
the above 'route get' inet_rtm_getroute is doing a lookup as if it is
locally generated traffic.
^ permalink raw reply
* Re: [PATCH] net: bridge: Allow bridge to joing multicast groups
From: Horatiu Vultur @ 2019-08-01 19:48 UTC (permalink / raw)
To: Vivien Didelot
Cc: roopa, nikolay, davem, bridge, netdev, linux-kernel,
allan.nielsen
In-Reply-To: <20190801151739.GB32290@t480s.localdomain>
Hi Vivien,
The 08/01/2019 15:17, Vivien Didelot wrote:
> External E-Mail
>
> I'm a bit late in the conversation. Isn't this what you want?
>
> ip address add <multicast IPv4 address> dev br0 autojoin
>
Not really, I was looking in a way to register the ports to link layer
multicast address. Sorry for the confusion, my description of the patch
was totally missleaning.
If you follow this thread you will get a better idea what we wanted to
achive. We got some really good comments and based on these we send a
RFC[1].
>
> Thanks,
> Vivien
[1] https://patchwork.ozlabs.org/patch/1140468/
--
/Horatiu
^ permalink raw reply
* Re: [PATCH] net: bridge: Allow bridge to joing multicast groups
From: Vivien Didelot @ 2019-08-01 19:17 UTC (permalink / raw)
To: Horatiu Vultur
Cc: roopa, nikolay, davem, bridge, netdev, linux-kernel,
allan.nielsen, Horatiu Vultur
In-Reply-To: <1564055044-27593-1-git-send-email-horatiu.vultur@microchip.com>
Hi Horatiu,
On Thu, 25 Jul 2019 13:44:04 +0200, Horatiu Vultur <horatiu.vultur@microchip.com> wrote:
> There is no way to configure the bridge, to receive only specific link
> layer multicast addresses. From the description of the command 'bridge
> fdb append' is supposed to do that, but there was no way to notify the
> network driver that the bridge joined a group, because LLADDR was added
> to the unicast netdev_hw_addr_list.
>
> Therefore update fdb_add_entry to check if the NLM_F_APPEND flag is set
> and if the source is NULL, which represent the bridge itself. Then add
> address to multicast netdev_hw_addr_list for each bridge interfaces.
> And then the .ndo_set_rx_mode function on the driver is called. To notify
> the driver that the list of multicast mac addresses changed.
>
> Signed-off-by: Horatiu Vultur <horatiu.vultur@microchip.com>
> ---
> net/bridge/br_fdb.c | 49 ++++++++++++++++++++++++++++++++++++++++++++++---
> 1 file changed, 46 insertions(+), 3 deletions(-)
>
> diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
> index b1d3248..d93746d 100644
> --- a/net/bridge/br_fdb.c
> +++ b/net/bridge/br_fdb.c
> @@ -175,6 +175,29 @@ static void fdb_add_hw_addr(struct net_bridge *br, const unsigned char *addr)
> }
> }
>
> +static void fdb_add_hw_maddr(struct net_bridge *br, const unsigned char *addr)
> +{
> + int err;
> + struct net_bridge_port *p;
> +
> + ASSERT_RTNL();
> +
> + list_for_each_entry(p, &br->port_list, list) {
> + if (!br_promisc_port(p)) {
> + err = dev_mc_add(p->dev, addr);
> + if (err)
> + goto undo;
> + }
> + }
> +
> + return;
> +undo:
> + list_for_each_entry_continue_reverse(p, &br->port_list, list) {
> + if (!br_promisc_port(p))
> + dev_mc_del(p->dev, addr);
> + }
> +}
> +
> /* When a static FDB entry is deleted, the HW address from that entry is
> * also removed from the bridge private HW address list and updates all
> * the ports with needed information.
> @@ -192,13 +215,27 @@ static void fdb_del_hw_addr(struct net_bridge *br, const unsigned char *addr)
> }
> }
>
> +static void fdb_del_hw_maddr(struct net_bridge *br, const unsigned char *addr)
> +{
> + struct net_bridge_port *p;
> +
> + ASSERT_RTNL();
> +
> + list_for_each_entry(p, &br->port_list, list) {
> + if (!br_promisc_port(p))
> + dev_mc_del(p->dev, addr);
> + }
> +}
> +
> static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f,
> bool swdev_notify)
> {
> trace_fdb_delete(br, f);
>
> - if (f->is_static)
> + if (f->is_static) {
> fdb_del_hw_addr(br, f->key.addr.addr);
> + fdb_del_hw_maddr(br, f->key.addr.addr);
> + }
>
> hlist_del_init_rcu(&f->fdb_node);
> rhashtable_remove_fast(&br->fdb_hash_tbl, &f->rhnode,
> @@ -843,13 +880,19 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source,
> fdb->is_local = 1;
> if (!fdb->is_static) {
> fdb->is_static = 1;
> - fdb_add_hw_addr(br, addr);
> + if (flags & NLM_F_APPEND && !source)
> + fdb_add_hw_maddr(br, addr);
> + else
> + fdb_add_hw_addr(br, addr);
> }
> } else if (state & NUD_NOARP) {
> fdb->is_local = 0;
> if (!fdb->is_static) {
> fdb->is_static = 1;
> - fdb_add_hw_addr(br, addr);
> + if (flags & NLM_F_APPEND && !source)
> + fdb_add_hw_maddr(br, addr);
> + else
> + fdb_add_hw_addr(br, addr);
> }
> } else {
> fdb->is_local = 0;
> --
> 2.7.4
>
I'm a bit late in the conversation. Isn't this what you want?
ip address add <multicast IPv4 address> dev br0 autojoin
Thanks,
Vivien
^ permalink raw reply
* Re: [PATCH net-next] net/mlx5e: Allow dropping specific tunnel packets
From: Saeed Mahameed @ 2019-08-01 19:14 UTC (permalink / raw)
To: Roi Dayan, xiangxia.m.yue@gmail.com; +Cc: netdev@vger.kernel.org
In-Reply-To: <1564648859-17369-1-git-send-email-xiangxia.m.yue@gmail.com>
On Thu, 2019-08-01 at 16:40 +0800, xiangxia.m.yue@gmail.com wrote:
> From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
>
> In some case, we don't want to allow specific tunnel packets
> to host that can avoid to take up high CPU (e.g network attacks).
> But other tunnel packets which not matched in hardware will be
> sent to host too.
>
> $ tc filter add dev vxlan_sys_4789 \
> protocol ip chain 0 parent ffff: prio 1 handle 1 \
> flower dst_ip 1.1.1.100 ip_proto tcp dst_port 80 \
> enc_dst_ip 2.2.2.100 enc_key_id 100 enc_dst_port 4789 \
> action tunnel_key unset pipe action drop
>
> Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Applied to net-next-mlx5.
Thanks!
^ permalink raw reply
* [PATCH v4 0/4] net: phy: realtek: Enable configuration of RTL8211E LEDs
From: Matthias Kaehlcke @ 2019-08-01 19:07 UTC (permalink / raw)
To: David S . Miller, Rob Herring, Mark Rutland, Andrew Lunn,
Florian Fainelli, Heiner Kallweit
Cc: netdev, devicetree, linux-kernel, Douglas Anderson,
Matthias Kaehlcke
The Realtek RTL8211E allows customization of the PHY LED behavior,
like which LEDs are on for certain link speeds and which LEDs blink
when there is traffic. By default EEE LED mode is enabled, in which
a blinking LED is on for 400ms and off for 2s. This series adds a
generic device tree binding for configuring PHY LEDs and adds LED
configuration support for the RTL8211E PHY.
Certain registers of the RTL8211E can only be accessed through
a vendor specific extended page mechanism. Extended pages need
to be accessed for the LED configuration. This series adds helpers
to facilitate accessing extended pages.
Matthias Kaehlcke (4):
dt-bindings: net: phy: Add subnode for LED configuration
net: phy: Add function to retrieve LED configuration from the DT
net: phy: realtek: Add helpers for accessing RTL8211E extension pages
net: phy: realtek: configure RTL8211E LEDs
.../devicetree/bindings/net/ethernet-phy.yaml | 47 +++++
drivers/net/phy/phy_device.c | 50 ++++++
drivers/net/phy/realtek.c | 169 ++++++++++++++++--
include/linux/phy.h | 15 ++
4 files changed, 266 insertions(+), 15 deletions(-)
--
2.22.0.770.g0f2c4a37fd-goog
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox