* [net-next PATCH v4] net: dummy: Introduce dummy virtual functions
From: Phil Sutter @ 2016-11-23 16:25 UTC (permalink / raw)
To: David Miller; +Cc: netdev, Sabrina Dubroca
The idea for this was born when testing VF support in iproute2 which was
impeded by hardware requirements. In fact, not every VF-capable hardware
driver implements all netdev ops, so testing the interface is still hard
to do even with a well-sorted hardware shelf.
To overcome this and allow for testing the user-kernel interface, this
patch allows to turn dummy into a PF with a configurable amount of VFs.
Due to the assumption that all PFs are PCI devices, this implementation
is not completely straightforward: In order to allow for
rtnl_fill_ifinfo() to see the dummy VFs, a fake PCI parent device is
attached to the dummy netdev. This has to happen at the right spot so
register_netdevice() does not get confused. This patch abuses
ndo_fix_features callback for that. In ndo_uninit callback, the fake
parent is removed again for the same purpose.
Joint work with Sabrina Dubroca.
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: Phil Sutter <phil@nwl.cc>
---
Changes since v3:
- Changed type of vf_mac field from unsigned char to u8.
- Column-aligned structs' field names.
Changes since v2:
- Fixed oops on reboot (need to initialize parent device mutex).
- Got rid of potential mem leak noticed by Eric Dumazet.
- Dropped stray newline insertion.
Changes since v1:
- Fixed issues reported by kbuild test robot:
- pci_dev->sriov is only present if CONFIG_PCI_ATS is active.
- pci_bus_type does not exist if CONFIG_PCI is not defined.
---
drivers/net/dummy.c | 205 +++++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 203 insertions(+), 2 deletions(-)
diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c
index 69fc8409a9733..91d4858ec563a 100644
--- a/drivers/net/dummy.c
+++ b/drivers/net/dummy.c
@@ -34,6 +34,8 @@
#include <linux/etherdevice.h>
#include <linux/init.h>
#include <linux/moduleparam.h>
+#include <linux/pci.h>
+#include "../pci/pci.h" /* for struct pci_sriov */
#include <linux/rtnetlink.h>
#include <net/rtnetlink.h>
#include <linux/u64_stats_sync.h>
@@ -42,6 +44,37 @@
#define DRV_VERSION "1.0"
static int numdummies = 1;
+static int num_vfs;
+
+static struct pci_sriov pdev_sriov;
+
+static struct pci_dev pci_pdev = {
+ .is_physfn = 0,
+#ifdef CONFIG_PCI_ATS
+ .sriov = &pdev_sriov,
+#endif
+#ifdef CONFIG_PCI
+ .dev.bus = &pci_bus_type,
+#endif
+};
+
+struct vf_data_storage {
+ u8 vf_mac[ETH_ALEN];
+ u16 pf_vlan; /* When set, guest VLAN config not allowed. */
+ u16 pf_qos;
+ __be16 vlan_proto;
+ u16 min_tx_rate;
+ u16 max_tx_rate;
+ u8 spoofchk_enabled;
+ bool rss_query_enabled;
+ u8 trusted;
+ int link_state;
+};
+
+struct dummy_priv {
+ int num_vfs;
+ struct vf_data_storage *vfinfo;
+};
/* fake multicast ability */
static void set_multicast_list(struct net_device *dev)
@@ -91,15 +124,31 @@ static netdev_tx_t dummy_xmit(struct sk_buff *skb, struct net_device *dev)
static int dummy_dev_init(struct net_device *dev)
{
+ struct dummy_priv *priv = netdev_priv(dev);
+
dev->dstats = netdev_alloc_pcpu_stats(struct pcpu_dstats);
if (!dev->dstats)
return -ENOMEM;
+ priv->num_vfs = num_vfs;
+ priv->vfinfo = NULL;
+
+ if (!num_vfs)
+ return 0;
+
+ priv->vfinfo = kcalloc(num_vfs, sizeof(struct vf_data_storage),
+ GFP_KERNEL);
+ if (!priv->vfinfo) {
+ free_percpu(dev->dstats);
+ return -ENOMEM;
+ }
+
return 0;
}
static void dummy_dev_uninit(struct net_device *dev)
{
+ dev->dev.parent = NULL;
free_percpu(dev->dstats);
}
@@ -112,6 +161,134 @@ static int dummy_change_carrier(struct net_device *dev, bool new_carrier)
return 0;
}
+/* fake, just to set fake PCI parent after netdev_register_kobject() */
+static netdev_features_t dummy_fix_features(struct net_device *dev,
+ netdev_features_t features)
+{
+ struct dummy_priv *priv = netdev_priv(dev);
+
+ if (priv->num_vfs) {
+ dev->dev.parent = &pci_pdev.dev;
+ if (!pci_pdev.is_physfn) {
+ mutex_init(&pci_pdev.dev.mutex);
+ pci_pdev.is_physfn = 1;
+ }
+ }
+
+ return features;
+}
+
+static int dummy_set_vf_mac(struct net_device *dev, int vf, u8 *mac)
+{
+ struct dummy_priv *priv = netdev_priv(dev);
+
+ if (!is_valid_ether_addr(mac) || (vf >= priv->num_vfs))
+ return -EINVAL;
+
+ memcpy(priv->vfinfo[vf].vf_mac, mac, ETH_ALEN);
+
+ return 0;
+}
+
+static int dummy_set_vf_vlan(struct net_device *dev, int vf,
+ u16 vlan, u8 qos, __be16 vlan_proto)
+{
+ struct dummy_priv *priv = netdev_priv(dev);
+
+ if ((vf >= priv->num_vfs) || (vlan > 4095) || (qos > 7))
+ return -EINVAL;
+
+ priv->vfinfo[vf].pf_vlan = vlan;
+ priv->vfinfo[vf].pf_qos = qos;
+ priv->vfinfo[vf].vlan_proto = vlan_proto;
+
+ return 0;
+}
+
+static int dummy_set_vf_rate(struct net_device *dev, int vf, int min, int max)
+{
+ struct dummy_priv *priv = netdev_priv(dev);
+
+ if (vf >= priv->num_vfs)
+ return -EINVAL;
+
+ priv->vfinfo[vf].min_tx_rate = min;
+ priv->vfinfo[vf].max_tx_rate = max;
+
+ return 0;
+}
+
+static int dummy_set_vf_spoofchk(struct net_device *dev, int vf, bool val)
+{
+ struct dummy_priv *priv = netdev_priv(dev);
+
+ if (vf >= priv->num_vfs)
+ return -EINVAL;
+
+ priv->vfinfo[vf].spoofchk_enabled = val;
+
+ return 0;
+}
+
+static int dummy_set_vf_rss_query_en(struct net_device *dev, int vf, bool val)
+{
+ struct dummy_priv *priv = netdev_priv(dev);
+
+ if (vf >= priv->num_vfs)
+ return -EINVAL;
+
+ priv->vfinfo[vf].rss_query_enabled = val;
+
+ return 0;
+}
+
+static int dummy_set_vf_trust(struct net_device *dev, int vf, bool val)
+{
+ struct dummy_priv *priv = netdev_priv(dev);
+
+ if (vf >= priv->num_vfs)
+ return -EINVAL;
+
+ priv->vfinfo[vf].trusted = val;
+
+ return 0;
+}
+
+static int dummy_get_vf_config(struct net_device *dev,
+ int vf, struct ifla_vf_info *ivi)
+{
+ struct dummy_priv *priv = netdev_priv(dev);
+
+ if (vf >= priv->num_vfs)
+ return -EINVAL;
+
+ ivi->vf = vf;
+ memcpy(&ivi->mac, priv->vfinfo[vf].vf_mac, ETH_ALEN);
+ ivi->vlan = priv->vfinfo[vf].pf_vlan;
+ ivi->qos = priv->vfinfo[vf].pf_qos;
+ ivi->spoofchk = priv->vfinfo[vf].spoofchk_enabled;
+ ivi->linkstate = priv->vfinfo[vf].link_state;
+ ivi->min_tx_rate = priv->vfinfo[vf].min_tx_rate;
+ ivi->max_tx_rate = priv->vfinfo[vf].max_tx_rate;
+ ivi->rss_query_en = priv->vfinfo[vf].rss_query_enabled;
+ ivi->trusted = priv->vfinfo[vf].trusted;
+ ivi->vlan_proto = priv->vfinfo[vf].vlan_proto;
+
+ return 0;
+}
+
+static int dummy_set_vf_link_state(struct net_device *dev, int vf, int state)
+{
+ struct dummy_priv *priv = netdev_priv(dev);
+
+ if (vf >= priv->num_vfs)
+ return -EINVAL;
+
+ priv->vfinfo[vf].link_state = state;
+
+ return 0;
+}
+
static const struct net_device_ops dummy_netdev_ops = {
.ndo_init = dummy_dev_init,
.ndo_uninit = dummy_dev_uninit,
@@ -121,6 +298,15 @@ static const struct net_device_ops dummy_netdev_ops = {
.ndo_set_mac_address = eth_mac_addr,
.ndo_get_stats64 = dummy_get_stats64,
.ndo_change_carrier = dummy_change_carrier,
+ .ndo_fix_features = dummy_fix_features,
+ .ndo_set_vf_mac = dummy_set_vf_mac,
+ .ndo_set_vf_vlan = dummy_set_vf_vlan,
+ .ndo_set_vf_rate = dummy_set_vf_rate,
+ .ndo_set_vf_spoofchk = dummy_set_vf_spoofchk,
+ .ndo_set_vf_trust = dummy_set_vf_trust,
+ .ndo_get_vf_config = dummy_get_vf_config,
+ .ndo_set_vf_link_state = dummy_set_vf_link_state,
+ .ndo_set_vf_rss_query_en = dummy_set_vf_rss_query_en,
};
static void dummy_get_drvinfo(struct net_device *dev,
@@ -134,6 +320,14 @@ static const struct ethtool_ops dummy_ethtool_ops = {
.get_drvinfo = dummy_get_drvinfo,
};
+static void dummy_free_netdev(struct net_device *dev)
+{
+ struct dummy_priv *priv = netdev_priv(dev);
+
+ kfree(priv->vfinfo);
+ free_netdev(dev);
+}
+
static void dummy_setup(struct net_device *dev)
{
ether_setup(dev);
@@ -141,7 +335,7 @@ static void dummy_setup(struct net_device *dev)
/* Initialize the device structure. */
dev->netdev_ops = &dummy_netdev_ops;
dev->ethtool_ops = &dummy_ethtool_ops;
- dev->destructor = free_netdev;
+ dev->destructor = dummy_free_netdev;
/* Fill in device structure with ethernet-generic values. */
dev->flags |= IFF_NOARP;
@@ -169,6 +363,7 @@ static int dummy_validate(struct nlattr *tb[], struct nlattr *data[])
static struct rtnl_link_ops dummy_link_ops __read_mostly = {
.kind = DRV_NAME,
+ .priv_size = sizeof(struct dummy_priv),
.setup = dummy_setup,
.validate = dummy_validate,
};
@@ -177,12 +372,16 @@ static struct rtnl_link_ops dummy_link_ops __read_mostly = {
module_param(numdummies, int, 0);
MODULE_PARM_DESC(numdummies, "Number of dummy pseudo devices");
+module_param(num_vfs, int, 0);
+MODULE_PARM_DESC(num_vfs, "Number of dummy VFs per dummy device");
+
static int __init dummy_init_one(void)
{
struct net_device *dev_dummy;
int err;
- dev_dummy = alloc_netdev(0, "dummy%d", NET_NAME_UNKNOWN, dummy_setup);
+ dev_dummy = alloc_netdev(sizeof(struct dummy_priv),
+ "dummy%d", NET_NAME_UNKNOWN, dummy_setup);
if (!dev_dummy)
return -ENOMEM;
@@ -201,6 +400,8 @@ static int __init dummy_init_module(void)
{
int i, err = 0;
+ pdev_sriov.num_VFs = num_vfs;
+
rtnl_lock();
err = __rtnl_link_register(&dummy_link_ops);
if (err < 0)
--
2.10.0
^ permalink raw reply related
* Re: sendfile from 9p fs into af_alg
From: Alexei Starovoitov @ 2016-11-23 16:29 UTC (permalink / raw)
To: Al Viro; +Cc: linux-kernel, netdev, Daniel Borkmann, Martin KaFai Lau
In-Reply-To: <20161123155301.GP1555@ZenIV.linux.org.uk>
On Wed, Nov 23, 2016 at 03:53:01PM +0000, Al Viro wrote:
> On Wed, Nov 23, 2016 at 12:58:11AM -0800, Alexei Starovoitov wrote:
>
> > if I read it correctly 9p actually responded with 8192 bytes of requests...
> > whereas the file size was 9624.
> > For large file sizes (in megabytes) the difference between what
> > sendfile is reporting and actual file size can be 3x.
> > In the small file case (like above dump) it looks rounded to page size for some reason.
>
> OK, I think I see one bug in there; could you check if this gets it back to
> normal?
>
> diff --git a/fs/splice.c b/fs/splice.c
> index dcaf185..5a7750b 100644
> --- a/fs/splice.c
> +++ b/fs/splice.c
> @@ -408,7 +408,8 @@ static ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
> if (res <= 0)
> return -ENOMEM;
>
> - nr_pages = res / PAGE_SIZE;
> + BUG_ON(dummy);
> + nr_pages = DIV_ROUND_UP(res, PAGE_SIZE);
Hooray. It fixed it :)
Feel free to add my
Tested-by: Alexei Starovoitov <ast@kernel.org>
when you submit it.
Thanks for fixing it so quickly!
^ permalink raw reply
* [PATCH net-next] net: properly flush delay-freed skbs
From: Eric Dumazet @ 2016-11-23 16:44 UTC (permalink / raw)
To: David Miller; +Cc: netdev, Jesper Dangaard Brouer, Alexander Duyck
From: Eric Dumazet <edumazet@google.com>
Typical NAPI drivers use napi_consume_skb(skb) at TX completion time.
This put skb in a percpu special queue, napi_alloc_cache, to get bulk
frees.
It turns out the queue is not flushed and hits the NAPI_SKB_CACHE_SIZE
limit quite often, with skbs that were queued hundreds of usec earlier.
I measured this can take ~6000 nsec to perform one flush.
__kfree_skb_flush() can be called from two points right now :
1) From net_tx_action(), but only for skbs that were queued to
sd->completion_queue.
-> Irrelevant for NAPI drivers in normal operation.
2) From net_rx_action(), but only under high stress or if RPS/RFS has a
pending action.
This patch changes net_rx_action() to perform the flush in all cases and
after more urgent operations happened (like kicking remote CPUS for
RPS/RFS).
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Jesper Dangaard Brouer <brouer@redhat.com>
Cc: Alexander Duyck <alexander.h.duyck@intel.com>
---
net/core/dev.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/net/core/dev.c b/net/core/dev.c
index f71b34ab57a5132647729d20e21376d362d4e630..048b46b7c92ae10080226ea7050fad3529920baa 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5260,7 +5260,7 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)
if (list_empty(&list)) {
if (!sd_has_rps_ipi_waiting(sd) && list_empty(&repoll))
- return;
+ goto out;
break;
}
@@ -5278,7 +5278,6 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)
}
}
- __kfree_skb_flush();
local_irq_disable();
list_splice_tail_init(&sd->poll_list, &list);
@@ -5288,6 +5287,8 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)
__raise_softirq_irqoff(NET_RX_SOFTIRQ);
net_rps_action_and_irq_enable(sd);
+out:
+ __kfree_skb_flush();
}
struct netdev_adjacent {
^ permalink raw reply related
* Re: [PATCH] net: dsa: mv88e6xxx: egress all frames
From: Stefan Eichenberger @ 2016-11-23 16:50 UTC (permalink / raw)
To: Vivien Didelot; +Cc: Andrew Lunn, Stefan Eichenberger, f.fainelli, netdev
In-Reply-To: <87shqi89wu.fsf@ketchup.i-did-not-set--mail-host-address--so-tickle-me>
Hi Vivien
On Wed, Nov 23, 2016 at 10:59:13AM -0500, Vivien Didelot wrote:
> Hi Stefan,
>
> Stefan Eichenberger <stefan.eichenberger@netmodule.com> writes:
>
> >> Now, the different families are not 100% compatible with each
> >> other. We never had access to a 6097, so it has not been tested
> >> recently, and we have probably broken it... My guess would be,
> >> anywhere mv88e6xxx_6095_family(chip) is used, there also needs to be
> >> an mv88e6xxx_6097_family(chip). But i could be wrong.
> >
> > I think I probably found the problem. For EDSA type switches the bit
> > PORT_CONTROL_FORWARD_UNKNOWN_MC is set on the cpu port but not for DSA
> > type switches. Broadcast addresses are threaded as multicast addresses,
> > so unknown frames will never leave the switch.
>
> The Port Control Register (0x04) is one of these registers which changes
> almost completely among chip models.
>
> Are you able to give us the layout of the port register 0x04 on your
> 88E6097? I don't have access to its datasheet.
Yes sure, the layout of the Port Control Register for the 88E6097 is the same
as for the 88E6352:
15:14: SA Filtering: 00 -> SA filtering disabled
01 -> Drop on lock
10 -> Drop on Unlock
11 -> Drop to CPU
13:12: Egress Mode: 00 -> default unmodified mode
01 -> default to transmit all frames untagged
10 -> default to transmit all frames tagged
11 -> reserved for future use
11: Header: Ingress&Egress header mode (PORT_CONTROL_HEADER)
10: IGMP Snoop: IGMP/MLD Snooping (PORT_CONTROL_IGMP_MLD_SNOOP)
9:8 Frame Mode: 00 -> Normal Network
01 -> DSA (FRAME_MODE_DSA)
10 -> Provider (FRAME_MODE_PROVIDER)
11 -> Ether Type DSA (FRAME_ETHER_TYPE_DSA)
7: VLAN Tunnel: VLAN Tunnel (VLAN_TUNNEL)
6: TagIfBoth: Use tag info for QPri
5:4: InitialPri: 00 -> Use Port defaults for FPri and QPri
01 -> Use Tag Priority
10 -> Use IP Priority
11 -> Use Tag & IP Priority
3:2: Egress Floods:00 -> Do not egress any frame with unknown DA
01 -> Do not egress any frame with an unknown mc DA
10 -> Do not egress any frame with an unknown DA
11 -> Egress all frames with an unknown DA
Broadcasts are threaded as multicast if FloodBC in
global2 register is not set.
1:0: PortState: 00 -> Disabled
01 -> Blocking/Listening
10 -> Learning
11 -> Forwarding
I hope this helps, feel free to ask for more infos.
>
> For instance on 88E6185 bit 3 is reserved, on 88E6352 and 88E6390 bit
> 3:2 are "Egress Floods" and 0x2 means "Do not egress any frame with an
> unknown unicast DA".
>
> > Do you know if there is a reason why this bit isn't set for DSA type
> > switches too? The patch would be extremely simple and it seems to work
> > perfectly with this bit set on the CPU port.
>
> All these family checks for bit masking are quite messy and ideally need
> proper abstraction...
>
> Can you give us the chunk of patch you are refering to?
I will send the patch in a few minutes.
Regards,
Stefan
^ permalink raw reply
* [PATCH v2] net: dsa: mv88e6xxx: forward unknown mc packets on mv88e6097
From: Stefan Eichenberger @ 2016-11-23 16:54 UTC (permalink / raw)
To: andrew, vivien.didelot; +Cc: f.fainelli, netdev, Stefan Eichenberger
In-Reply-To: <20161123165022.GD12698@gruene.netmodule.intranet>
Packets with unknown destination addresses are not forwarded to the cpu
port on mv88e6097 based switches (e.g. MV88E6097) at the moment. This
commit enables PORT_CONTROL_FORWARD_UNKNOWN_MC for this family.
Signed-off-by: Stefan Eichenberger <stefan.eichenberger@netmodule.com>
---
drivers/net/dsa/mv88e6xxx/chip.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index b14b3d5..4d21086 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -2487,6 +2487,10 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port)
PORT_CONTROL_FORWARD_UNKNOWN_MC;
else
reg |= PORT_CONTROL_DSA_TAG;
+
+ if (mv88e6xxx_6097_family(chip))
+ reg |= PORT_CONTROL_FORWARD_UNKNOWN_MC;
+
reg |= PORT_CONTROL_EGRESS_ADD_TAG |
PORT_CONTROL_FORWARD_UNKNOWN;
}
--
2.9.3
^ permalink raw reply related
* Re: [PATCH] net: dsa: mv88e6xxx: egress all frames
From: Andrew Lunn @ 2016-11-23 16:58 UTC (permalink / raw)
To: Stefan Eichenberger
Cc: Vivien Didelot, Stefan Eichenberger, f.fainelli, netdev
In-Reply-To: <20161123165022.GD12698@gruene.netmodule.intranet>
> 9:8 Frame Mode: 00 -> Normal Network
> 01 -> DSA (FRAME_MODE_DSA)
> 10 -> Provider (FRAME_MODE_PROVIDER)
> 11 -> Ether Type DSA (FRAME_ETHER_TYPE_DSA)
Ah, there is one issue. This device supports EDSA. However,
MV88E6XXX_FLAGS_FAMILY_6097 does not list MV88E6XXX_FLAG_EDSA.
Without that flag set, the code assumes the device can only do DSA,
using the older definition of this register.
Andrew
^ permalink raw reply
* Re: [patch net-next v2 10/11] mlxsw: spectrum_router: Request a dump of FIB tables during init
From: Hannes Frederic Sowa @ 2016-11-23 16:59 UTC (permalink / raw)
To: Jiri Pirko
Cc: netdev, davem, idosch, eladr, yotamg, nogahf, arkadis, ogerlitz,
roopa, dsa, nikolay, andy, vivien.didelot, andrew, f.fainelli,
alexander.h.duyck, kaber
In-Reply-To: <20161123160453.GB1873@nanopsycho>
On Wed, Nov 23, 2016, at 17:04, Jiri Pirko wrote:
> Wed, Nov 23, 2016 at 05:00:00PM CET, hannes@stressinduktion.org wrote:
> >On Wed, Nov 23, 2016, at 15:48, Jiri Pirko wrote:
> >> From: Ido Schimmel <idosch@mellanox.com>
> >>
> >> Make sure the device has a complete view of the FIB tables by invoking
> >> their dump during module init.
> >>
> >> Signed-off-by: Ido Schimmel <idosch@mellanox.com>
> >> Signed-off-by: Jiri Pirko <jiri@mellanox.com>
> >> ---
> >> drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 16
> >> ++++++++++++++++
> >> 1 file changed, 16 insertions(+)
> >>
> >> diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
> >> b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
> >> index 14bed1d..36a71d2 100644
> >> --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
> >> +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
> >> @@ -2027,6 +2027,21 @@ static int mlxsw_sp_router_fib_event(struct
> >> notifier_block *nb,
> >> return NOTIFY_DONE;
> >> }
> >>
> >> +static void mlxsw_sp_router_fib_dump(struct mlxsw_sp *mlxsw_sp)
> >> +{
> >> + while (!fib_notifier_dump(&mlxsw_sp->fib_nb)) {
> >> + /* Flush pending FIB notifications and then flush the
> >> + * device's table before requesting another dump. Do
> >> + * that with RTNL held, as FIB notification block is
> >> + * already registered.
> >> + */
> >> + mlxsw_core_flush_owq();
> >> + rtnl_lock();
> >> + mlxsw_sp_router_fib_flush(mlxsw_sp);
> >> + rtnl_unlock();
> >> + }
> >> +}
> >
> >I think it is fine to use this kind of synchronization.
> >
> >But I think that this part of the logic still belongs into the core
>
> Core does not know how driver handles the offloaded fibs. So only driver
> knows how/if he needs to do flush in case of retry.
Sure, but an abort function can be provided to the kernel anyway and the
driver can care about that.
> >kernel. I still think it could happen that we will loop here
> >indefinitely because of a lot of routing updates and as such would need
> >to abort this loop after a number of tries.
>
> In theory, it is possible, howevery quite unlikely.
I think the "quite unlikely" already got us down the path to not using
rtnl_lock in the first place.
As I said, I am not sure about this as I didn't try any hardware
offloading before and delays how long it needs to be transferred to
hardware, but having a fail case for that seems like a nice improvement.
At the same time I know of Linux boxes running in internet exchanges
having several peers. The high update rates actually led to bgp
implementation specifying flap damping which is actually nowadays
considered harmful.
Seriously, while most of the time convergence in routing protocols is
good and most updates only hit the BGP user space table anyway and the
change is suppressed because recursive routing lookup idempotence, quite
unlikely events happen to the internet now and then:
http://research.dyn.com/2009/02/longer-is-not-better/, which caused *a
lot* of flapping and ongoing events on BGP routers throughout the world.
I agree it is unlikely that you have to refresh your hw dump during this
time, but who knows what customers do and what admins do in case
something like this happens. I just don't favor to looping endlessly
trying to sync up and getting into a stable state but tell the admin to
detach the control plane from the forwarding plane and sync up then.
That said, I think a sysctl for a maximum number of loops respected by
drivers that needs to do so, should be enough for the time being.
Bye,
Hannes
^ permalink raw reply
* Re: [PATCH v2] net: dsa: mv88e6xxx: forward unknown mc packets on mv88e6097
From: Andrew Lunn @ 2016-11-23 16:59 UTC (permalink / raw)
To: Stefan Eichenberger
Cc: vivien.didelot, f.fainelli, netdev, Stefan Eichenberger
In-Reply-To: <20161123165440.4894-1-stefan.eichenberger@netmodule.com>
On Wed, Nov 23, 2016 at 05:54:40PM +0100, Stefan Eichenberger wrote:
> Packets with unknown destination addresses are not forwarded to the cpu
> port on mv88e6097 based switches (e.g. MV88E6097) at the moment. This
> commit enables PORT_CONTROL_FORWARD_UNKNOWN_MC for this family.
Please try adding MV88E6XXX_FLAG_EDSA to
MV88E6XXX_FLAGS_FAMILY_6097. That is the better fix if it works.
Andrew
^ permalink raw reply
* Re: [patch net-next v2 10/11] mlxsw: spectrum_router: Request a dump of FIB tables during init
From: Jiri Pirko @ 2016-11-23 17:04 UTC (permalink / raw)
To: Hannes Frederic Sowa
Cc: netdev, davem, idosch, eladr, yotamg, nogahf, arkadis, ogerlitz,
roopa, dsa, nikolay, andy, vivien.didelot, andrew, f.fainelli,
alexander.h.duyck, kaber
In-Reply-To: <1479920345.4035504.797158425.2C10AA0C@webmail.messagingengine.com>
Wed, Nov 23, 2016 at 05:59:05PM CET, hannes@stressinduktion.org wrote:
>On Wed, Nov 23, 2016, at 17:04, Jiri Pirko wrote:
>> Wed, Nov 23, 2016 at 05:00:00PM CET, hannes@stressinduktion.org wrote:
>> >On Wed, Nov 23, 2016, at 15:48, Jiri Pirko wrote:
>> >> From: Ido Schimmel <idosch@mellanox.com>
>> >>
>> >> Make sure the device has a complete view of the FIB tables by invoking
>> >> their dump during module init.
>> >>
>> >> Signed-off-by: Ido Schimmel <idosch@mellanox.com>
>> >> Signed-off-by: Jiri Pirko <jiri@mellanox.com>
>> >> ---
>> >> drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 16
>> >> ++++++++++++++++
>> >> 1 file changed, 16 insertions(+)
>> >>
>> >> diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
>> >> b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
>> >> index 14bed1d..36a71d2 100644
>> >> --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
>> >> +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
>> >> @@ -2027,6 +2027,21 @@ static int mlxsw_sp_router_fib_event(struct
>> >> notifier_block *nb,
>> >> return NOTIFY_DONE;
>> >> }
>> >>
>> >> +static void mlxsw_sp_router_fib_dump(struct mlxsw_sp *mlxsw_sp)
>> >> +{
>> >> + while (!fib_notifier_dump(&mlxsw_sp->fib_nb)) {
>> >> + /* Flush pending FIB notifications and then flush the
>> >> + * device's table before requesting another dump. Do
>> >> + * that with RTNL held, as FIB notification block is
>> >> + * already registered.
>> >> + */
>> >> + mlxsw_core_flush_owq();
>> >> + rtnl_lock();
>> >> + mlxsw_sp_router_fib_flush(mlxsw_sp);
>> >> + rtnl_unlock();
>> >> + }
>> >> +}
>> >
>> >I think it is fine to use this kind of synchronization.
>> >
>> >But I think that this part of the logic still belongs into the core
>>
>> Core does not know how driver handles the offloaded fibs. So only driver
>> knows how/if he needs to do flush in case of retry.
>
>Sure, but an abort function can be provided to the kernel anyway and the
>driver can care about that.
Ok, how?
>
>> >kernel. I still think it could happen that we will loop here
>> >indefinitely because of a lot of routing updates and as such would need
>> >to abort this loop after a number of tries.
>>
>> In theory, it is possible, howevery quite unlikely.
>
>I think the "quite unlikely" already got us down the path to not using
>rtnl_lock in the first place.
>
>As I said, I am not sure about this as I didn't try any hardware
>offloading before and delays how long it needs to be transferred to
>hardware, but having a fail case for that seems like a nice improvement.
>At the same time I know of Linux boxes running in internet exchanges
>having several peers. The high update rates actually led to bgp
>implementation specifying flap damping which is actually nowadays
>considered harmful.
>
>Seriously, while most of the time convergence in routing protocols is
>good and most updates only hit the BGP user space table anyway and the
>change is suppressed because recursive routing lookup idempotence, quite
>unlikely events happen to the internet now and then:
>http://research.dyn.com/2009/02/longer-is-not-better/, which caused *a
>lot* of flapping and ongoing events on BGP routers throughout the world.
>
>I agree it is unlikely that you have to refresh your hw dump during this
>time, but who knows what customers do and what admins do in case
>something like this happens. I just don't favor to looping endlessly
>trying to sync up and getting into a stable state but tell the admin to
>detach the control plane from the forwarding plane and sync up then.
>
>That said, I think a sysctl for a maximum number of loops respected by
>drivers that needs to do so, should be enough for the time being.
Okay. Point taken.
^ permalink raw reply
* Re: [patch net-next v2 10/11] mlxsw: spectrum_router: Request a dump of FIB tables during init
From: Hannes Frederic Sowa @ 2016-11-23 17:08 UTC (permalink / raw)
To: Jiri Pirko
Cc: netdev, davem, idosch, eladr, yotamg, nogahf, arkadis, ogerlitz,
roopa, dsa, nikolay, andy, vivien.didelot, andrew, f.fainelli,
alexander.h.duyck, kaber
In-Reply-To: <20161123170436.GC1873@nanopsycho>
On Wed, Nov 23, 2016, at 18:04, Jiri Pirko wrote:
> >Sure, but an abort function can be provided to the kernel anyway and the
> >driver can care about that.
>
> Ok, how?
I think just a sysctl ontop of this series is enough plus a pr_warn.
Rocker and mlxsw are responsible to loop for a maximum amount of time.
Otherwise, if more fancy, can we provide an
fib_inconsistency_notification function pointer in netdev_ops?
Bye and thanks,
Hannes
^ permalink raw reply
* [PATCH v3] net: dsa: mv88e6xxx: enable EDSA on mv88e6097
From: Stefan Eichenberger @ 2016-11-23 17:11 UTC (permalink / raw)
To: andrew, vivien.didelot; +Cc: f.fainelli, netdev, Stefan Eichenberger
In-Reply-To: <20161123165949.GB8760@lunn.ch>
EDSA is currently disabled on mv88e6097 devices, this commit enables it.
Signed-off-by: Stefan Eichenberger <stefan.eichenberger@netmodule.com>
---
drivers/net/dsa/mv88e6xxx/mv88e6xxx.h | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h
index ab52c37..a2ff1fc 100644
--- a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h
+++ b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h
@@ -543,7 +543,8 @@ enum mv88e6xxx_cap {
MV88E6XXX_FLAGS_MULTI_CHIP)
#define MV88E6XXX_FLAGS_FAMILY_6097 \
- (MV88E6XXX_FLAG_G1_ATU_FID | \
+ (MV88E6XXX_FLAG_EDSA | \
+ MV88E6XXX_FLAG_G1_ATU_FID | \
MV88E6XXX_FLAG_G1_VTU_FID | \
MV88E6XXX_FLAG_GLOBAL2 | \
MV88E6XXX_FLAG_G2_MGMT_EN_2X | \
--
2.9.3
^ permalink raw reply related
* Re: [PATCH net-next] net: properly flush delay-freed skbs
From: Alexander Duyck @ 2016-11-23 17:12 UTC (permalink / raw)
To: Eric Dumazet
Cc: David Miller, netdev, Jesper Dangaard Brouer, Alexander Duyck
In-Reply-To: <1479919496.8455.509.camel@edumazet-glaptop3.roam.corp.google.com>
On Wed, Nov 23, 2016 at 8:44 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> From: Eric Dumazet <edumazet@google.com>
>
> Typical NAPI drivers use napi_consume_skb(skb) at TX completion time.
> This put skb in a percpu special queue, napi_alloc_cache, to get bulk
> frees.
>
> It turns out the queue is not flushed and hits the NAPI_SKB_CACHE_SIZE
> limit quite often, with skbs that were queued hundreds of usec earlier.
> I measured this can take ~6000 nsec to perform one flush.
>
> __kfree_skb_flush() can be called from two points right now :
>
> 1) From net_tx_action(), but only for skbs that were queued to
> sd->completion_queue.
>
> -> Irrelevant for NAPI drivers in normal operation.
>
> 2) From net_rx_action(), but only under high stress or if RPS/RFS has a
> pending action.
>
> This patch changes net_rx_action() to perform the flush in all cases and
> after more urgent operations happened (like kicking remote CPUS for
> RPS/RFS).
>
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> Cc: Jesper Dangaard Brouer <brouer@redhat.com>
> Cc: Alexander Duyck <alexander.h.duyck@intel.com>
> ---
Yeah, we didn't intent the data to be sitting around that long. The
change looks good to me.
Acked-by: Alexander Duyck <alexander.h.duyck@intel.com>
^ permalink raw reply
* Re: [PATCH v3] net: dsa: mv88e6xxx: enable EDSA on mv88e6097
From: Andrew Lunn @ 2016-11-23 17:13 UTC (permalink / raw)
To: Stefan Eichenberger
Cc: vivien.didelot, f.fainelli, netdev, Stefan Eichenberger
In-Reply-To: <20161123171135.9768-1-stefan.eichenberger@netmodule.com>
On Wed, Nov 23, 2016 at 06:11:35PM +0100, Stefan Eichenberger wrote:
> EDSA is currently disabled on mv88e6097 devices, this commit enables it.
And was that sufficient to fix all your issues?
Andrew
^ permalink raw reply
* Re: [PATCH v2] net: dsa: mv88e6xxx: forward unknown mc packets on mv88e6097
From: Stefan Eichenberger @ 2016-11-23 17:14 UTC (permalink / raw)
To: Andrew Lunn; +Cc: Stefan Eichenberger, vivien.didelot, f.fainelli, netdev
In-Reply-To: <20161123165949.GB8760@lunn.ch>
On Wed, Nov 23, 2016 at 05:59:49PM +0100, Andrew Lunn wrote:
> On Wed, Nov 23, 2016 at 05:54:40PM +0100, Stefan Eichenberger wrote:
> > Packets with unknown destination addresses are not forwarded to the cpu
> > port on mv88e6097 based switches (e.g. MV88E6097) at the moment. This
> > commit enables PORT_CONTROL_FORWARD_UNKNOWN_MC for this family.
>
> Please try adding MV88E6XXX_FLAG_EDSA to
> MV88E6XXX_FLAGS_FAMILY_6097. That is the better fix if it works.
I was even wondering what EDSA means:) Thanks this solved the problem!
Regards
Stefan
^ permalink raw reply
* Re: [PATCH] drivers: net: davinci_mdio: use builtin_platform_driver
From: Grygorii Strashko @ 2016-11-23 17:22 UTC (permalink / raw)
To: Geliang Tang, Mugunthan V N; +Cc: linux-omap, netdev, linux-kernel
In-Reply-To: <055763562f90fd7e2d311308e1d731ba93c3eea9.1479912302.git.geliangtang@gmail.com>
On 11/23/2016 08:45 AM, Geliang Tang wrote:
> Use builtin_platform_driver() helper to simplify the code.
Not sure about this. We do support this driver to be a module.
>
> Signed-off-by: Geliang Tang <geliangtang@gmail.com>
> ---
> drivers/net/ethernet/ti/davinci_mdio.c | 6 +-----
> 1 file changed, 1 insertion(+), 5 deletions(-)
>
> diff --git a/drivers/net/ethernet/ti/davinci_mdio.c b/drivers/net/ethernet/ti/davinci_mdio.c
> index 33df340..b3f0a12 100644
> --- a/drivers/net/ethernet/ti/davinci_mdio.c
> +++ b/drivers/net/ethernet/ti/davinci_mdio.c
> @@ -536,11 +536,7 @@ static struct platform_driver davinci_mdio_driver = {
> .remove = davinci_mdio_remove,
> };
>
> -static int __init davinci_mdio_init(void)
> -{
> - return platform_driver_register(&davinci_mdio_driver);
> -}
> -device_initcall(davinci_mdio_init);
> +builtin_platform_driver(davinci_mdio_driver);
>
> static void __exit davinci_mdio_exit(void)
> {
>
--
regards,
-grygorii
^ permalink raw reply
* Re: [PATCH v2] net: dsa: mv88e6xxx: forward unknown mc packets on mv88e6097
From: Andrew Lunn @ 2016-11-23 17:32 UTC (permalink / raw)
To: Stefan Eichenberger
Cc: Stefan Eichenberger, vivien.didelot, f.fainelli, netdev
In-Reply-To: <20161123171441.GE12698@gruene.netmodule.intranet>
On Wed, Nov 23, 2016 at 06:14:41PM +0100, Stefan Eichenberger wrote:
> On Wed, Nov 23, 2016 at 05:59:49PM +0100, Andrew Lunn wrote:
> > On Wed, Nov 23, 2016 at 05:54:40PM +0100, Stefan Eichenberger wrote:
> > > Packets with unknown destination addresses are not forwarded to the cpu
> > > port on mv88e6097 based switches (e.g. MV88E6097) at the moment. This
> > > commit enables PORT_CONTROL_FORWARD_UNKNOWN_MC for this family.
> >
> > Please try adding MV88E6XXX_FLAG_EDSA to
> > MV88E6XXX_FLAGS_FAMILY_6097. That is the better fix if it works.
>
> I was even wondering what EDSA means:) Thanks this solved the problem!
Great.
We should fix up a few minor issues and resubmit.
What is the status of the first patch, which added 6097 to the driver?
I don't think David accepted it yet. So lets make one patchset
containing the two patches.
The subject line of the patches need to have net-next in it. e.g.
[PATCH net-next 0/2] Add support for the MV88e6097
Include a cover node, saying what the patchset as a whole does.
This gets used as the merge commit message.
Then the two patches.
When posting the patchset, please start a new thread. A new version of
a patchset or patch should be a new thread.
Thanks
Andrew
^ permalink raw reply
* Re: [PATCH v2] cpsw: ethtool: add support for getting/setting EEE registers
From: Florian Fainelli @ 2016-11-23 17:33 UTC (permalink / raw)
To: yegorslists, netdev
Cc: linux-omap, grygorii.strashko, mugunthanvnm, roszenrami
In-Reply-To: <1479911913-1761-1-git-send-email-yegorslists@googlemail.com>
On 11/23/2016 06:38 AM, yegorslists@googlemail.com wrote:
> From: Yegor Yefremov <yegorslists@googlemail.com>
>
> Add the ability to query and set Energy Efficient Ethernet parameters
> via ethtool for applicable devices.
Are you sure this is enough to actually enable EEE? I don't see where
phy_init_eee() is called here, nor is the cpsw Ethernet controller part
configured to enable/disable EEE. EEE is not just a PHY thing, it
usually also needs to be configured properly at the Ethernet MAC/switch
level as well.
Just curious here.
--
Florian
^ permalink raw reply
* Re: [PATCH v2] net: dsa: mv88e6xxx: forward unknown mc packets on mv88e6097
From: Andrew Lunn @ 2016-11-23 17:40 UTC (permalink / raw)
To: Stefan Eichenberger
Cc: Stefan Eichenberger, vivien.didelot, f.fainelli, netdev
In-Reply-To: <20161123171441.GE12698@gruene.netmodule.intranet>
On Wed, Nov 23, 2016 at 06:14:41PM +0100, Stefan Eichenberger wrote:
> On Wed, Nov 23, 2016 at 05:59:49PM +0100, Andrew Lunn wrote:
> > On Wed, Nov 23, 2016 at 05:54:40PM +0100, Stefan Eichenberger wrote:
> > > Packets with unknown destination addresses are not forwarded to the cpu
> > > port on mv88e6097 based switches (e.g. MV88E6097) at the moment. This
> > > commit enables PORT_CONTROL_FORWARD_UNKNOWN_MC for this family.
> >
> > Please try adding MV88E6XXX_FLAG_EDSA to
> > MV88E6XXX_FLAGS_FAMILY_6097. That is the better fix if it works.
>
> I was even wondering what EDSA means:) Thanks this solved the problem!
Plain DSA puts four bytes of header between the MAC source address and
the EtherType/Length.
EDSA puts in an 8 byte header, and includes an Ethertype value of
0xdada. Having that ethertype value makes it more obvious what is
going on. And if you have a recent version of tcpdump, it will decode
the header.
Andrew
^ permalink raw reply
* [PATCH net-next] mlx4: do not use priv->stats_lock in mlx4_en_auto_moderation()
From: Eric Dumazet @ 2016-11-23 17:46 UTC (permalink / raw)
To: David Miller; +Cc: netdev, Tariq Toukan
From: Eric Dumazet <edumazet@google.com>
Per RX ring packets/bytes counters are not protected by global
priv->stats_lock.
Better not confuse the reader, and use READ_ONCE() to show we read
these counters without surrounding synchronization.
Interrupt moderation is best effort, and we do not really care of
ultra precise counters.
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 9a807e93c9fdd81e61e561208aa1480a244d0bdb..b964bdcd4ae509a7e693215e8b32f040218e252c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -1391,10 +1391,8 @@ static void mlx4_en_auto_moderation(struct mlx4_en_priv *priv)
return;
for (ring = 0; ring < priv->rx_ring_num; ring++) {
- spin_lock_bh(&priv->stats_lock);
- rx_packets = priv->rx_ring[ring]->packets;
- rx_bytes = priv->rx_ring[ring]->bytes;
- spin_unlock_bh(&priv->stats_lock);
+ rx_packets = READ_ONCE(priv->rx_ring[ring]->packets);
+ rx_bytes = READ_ONCE(priv->rx_ring[ring]->bytes);
rx_pkt_diff = ((unsigned long) (rx_packets -
priv->last_moder_packets[ring]));
^ permalink raw reply related
* Re: [patch net-next v2 09/11] ipv4: fib: Add an API to request a FIB dump
From: Hannes Frederic Sowa @ 2016-11-23 17:47 UTC (permalink / raw)
To: Jiri Pirko, netdev
Cc: davem, idosch, eladr, yotamg, nogahf, arkadis, ogerlitz, roopa,
dsa, nikolay, andy, vivien.didelot, andrew, f.fainelli,
alexander.h.duyck, kaber
In-Reply-To: <1479911670-4525-10-git-send-email-jiri@resnulli.us>
On 23.11.2016 15:34, Jiri Pirko wrote:
> From: Ido Schimmel <idosch@mellanox.com>
>
> Commit b90eb7549499 ("fib: introduce FIB notification infrastructure")
> introduced a new notification chain to notify listeners (f.e., switchdev
> drivers) about addition and deletion of routes.
>
> However, upon registration to the chain the FIB tables can already be
> populated, which means potential listeners will have an incomplete view
> of the tables.
>
> Solve that by adding an API to request a FIB dump. The dump itself it
> done using RCU in order not to starve consumers that need RTNL to make
> progress.
>
> For each net namespace the integrity of the dump is ensured by reading
> the atomic change sequence counter before and after the dump. This
> allows us to avoid the problematic situation in which the dumping
> process sends a ENTRY_ADD notification following ENTRY_DEL generated by
> another process holding RTNL.
>
> Signed-off-by: Ido Schimmel <idosch@mellanox.com>
> Signed-off-by: Jiri Pirko <jiri@mellanox.com>
> ---
> include/net/ip_fib.h | 1 +
> net/ipv4/fib_trie.c | 117 +++++++++++++++++++++++++++++++++++++++++++++++++++
> 2 files changed, 118 insertions(+)
>
> diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
> index 6c67b93..c76303e 100644
> --- a/include/net/ip_fib.h
> +++ b/include/net/ip_fib.h
> @@ -221,6 +221,7 @@ enum fib_event_type {
> FIB_EVENT_RULE_DEL,
> };
>
> +bool fib_notifier_dump(struct notifier_block *nb);
> int register_fib_notifier(struct notifier_block *nb);
> int unregister_fib_notifier(struct notifier_block *nb);
> int call_fib_notifiers(struct net *net, enum fib_event_type event_type,
> diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
> index b1d2d09..9770edfe 100644
> --- a/net/ipv4/fib_trie.c
> +++ b/net/ipv4/fib_trie.c
> @@ -86,6 +86,67 @@
>
> static ATOMIC_NOTIFIER_HEAD(fib_chain);
>
> +static int call_fib_notifier(struct notifier_block *nb, struct net *net,
> + enum fib_event_type event_type,
> + struct fib_notifier_info *info)
> +{
> + info->net = net;
> + return nb->notifier_call(nb, event_type, info);
> +}
> +
> +static void fib_rules_notify(struct net *net, struct notifier_block *nb,
> + enum fib_event_type event_type)
> +{
> +#ifdef CONFIG_IP_MULTIPLE_TABLES
> + struct fib_notifier_info info;
> +
> + if (net->ipv4.fib_has_custom_rules)
> + call_fib_notifier(nb, net, event_type, &info);
> +#endif
> +}
> +
> +static void fib_notify(struct net *net, struct notifier_block *nb,
> + enum fib_event_type event_type);
> +
> +static int call_fib_entry_notifier(struct notifier_block *nb, struct net *net,
> + enum fib_event_type event_type, u32 dst,
> + int dst_len, struct fib_info *fi,
> + u8 tos, u8 type, u32 tb_id, u32 nlflags)
> +{
> + struct fib_entry_notifier_info info = {
> + .dst = dst,
> + .dst_len = dst_len,
> + .fi = fi,
> + .tos = tos,
> + .type = type,
> + .tb_id = tb_id,
> + .nlflags = nlflags,
> + };
> + return call_fib_notifier(nb, net, event_type, &info.info);
> +}
> +
> +bool fib_notifier_dump(struct notifier_block *nb)
> +{
> + struct net *net;
> + bool ret = true;
> + rcu_read_lock();
> + for_each_net_rcu(net) {
> + int fib_seq = atomic_read(&net->ipv4.fib_seq);
> +
> + fib_rules_notify(net, nb, FIB_EVENT_RULE_ADD);
> + fib_notify(net, nb, FIB_EVENT_ENTRY_ADD);
> + if (atomic_read(&net->ipv4.fib_seq) != fib_seq) {
> + ret = false;
> + goto out_unlock;
> + }
Hmm, I think you need to read the sequence counter under rtnl_lock to
have an ordering with the rest of the updates to the RCU trie. Otherwise
you don't know if the fib trie has the correct view regarding to the
incoming notifications as a whole. This is also necessary during restarts.
You can also try to register the notifier after the dump and check for
the sequence number after registering the notifier, maybe that is easier
(and restart unregisters and does the same).
Bye,
Hannes
^ permalink raw reply
* Re: [PATCH v2] net: dsa: mv88e6xxx: forward unknown mc packets on mv88e6097
From: Stefan Eichenberger @ 2016-11-23 17:49 UTC (permalink / raw)
To: Andrew Lunn; +Cc: Stefan Eichenberger, vivien.didelot, f.fainelli, netdev
In-Reply-To: <20161123173230.GD8760@lunn.ch>
On Wed, Nov 23, 2016 at 06:32:30PM +0100, Andrew Lunn wrote:
> On Wed, Nov 23, 2016 at 06:14:41PM +0100, Stefan Eichenberger wrote:
> > On Wed, Nov 23, 2016 at 05:59:49PM +0100, Andrew Lunn wrote:
> > > On Wed, Nov 23, 2016 at 05:54:40PM +0100, Stefan Eichenberger wrote:
> > > > Packets with unknown destination addresses are not forwarded to the cpu
> > > > port on mv88e6097 based switches (e.g. MV88E6097) at the moment. This
> > > > commit enables PORT_CONTROL_FORWARD_UNKNOWN_MC for this family.
> > >
> > > Please try adding MV88E6XXX_FLAG_EDSA to
> > > MV88E6XXX_FLAGS_FAMILY_6097. That is the better fix if it works.
> >
> > I was even wondering what EDSA means:) Thanks this solved the problem!
>
> Great.
>
> We should fix up a few minor issues and resubmit.
>
> What is the status of the first patch, which added 6097 to the driver?
> I don't think David accepted it yet. So lets make one patchset
> containing the two patches.
>
> The subject line of the patches need to have net-next in it. e.g.
>
> [PATCH net-next 0/2] Add support for the MV88e6097
>
> Include a cover node, saying what the patchset as a whole does.
> This gets used as the merge commit message.
>
> Then the two patches.
Perfect, thanks a lot for the help! The patchset will follow.
Thanks
Stefan
^ permalink raw reply
* Re: [PATCH v2] net: dsa: mv88e6xxx: forward unknown mc packets on mv88e6097
From: Vivien Didelot @ 2016-11-23 17:52 UTC (permalink / raw)
To: Andrew Lunn, Stefan Eichenberger; +Cc: Stefan Eichenberger, f.fainelli, netdev
In-Reply-To: <20161123174040.GE8760@lunn.ch>
Hi Andrew,
Andrew Lunn <andrew@lunn.ch> writes:
> And if you have a recent version of tcpdump, it will decode
> the header.
Since d729eb4, thanks to you Andrew ;-)
I move up the cleanup of ports setup in my priority list. The code is
quite cluttered at the moment and it's hard to read through it. We need
proper helpers for egress floods, (E)DSA setup, etc. like what is being
done for the other devices.
Thanks,
Vivien
^ permalink raw reply
* [PATCH net-next 0/2] Add support for the MV88e6097
From: Stefan Eichenberger @ 2016-11-23 17:55 UTC (permalink / raw)
To: andrew, vivien.didelot, davem; +Cc: netdev, Stefan Eichenberger
This patchset will add support for the MV88E6097 DSA switch and enable
EDSA on MV88E6097 family devices.
Stefan Eichenberger (2):
net: dsa: mv88e6xxx: add MV88E6097 switch
net: dsa: mv88e6xxx: enable EDSA on mv88e6097
drivers/net/dsa/mv88e6xxx/chip.c | 26 ++++++++++++++++++++++++++
drivers/net/dsa/mv88e6xxx/mv88e6xxx.h | 5 ++++-
2 files changed, 30 insertions(+), 1 deletion(-)
--
2.9.3
^ permalink raw reply
* [PATCH 1/2] net: dsa: mv88e6xxx: add MV88E6097 switch
From: Stefan Eichenberger @ 2016-11-23 17:55 UTC (permalink / raw)
To: andrew, vivien.didelot, davem; +Cc: netdev, Stefan Eichenberger
In-Reply-To: <20161123175546.31416-1-stefan.eichenberger@netmodule.com>
Add support for the MV88E6097 switch. The change was tested on an Armada
based platform with a MV88E6097 switch.
Signed-off-by: Stefan Eichenberger <stefan.eichenberger@netmodule.com>
---
drivers/net/dsa/mv88e6xxx/chip.c | 26 ++++++++++++++++++++++++++
drivers/net/dsa/mv88e6xxx/mv88e6xxx.h | 2 ++
2 files changed, 28 insertions(+)
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index bada646..b14b3d5 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -3209,6 +3209,19 @@ static const struct mv88e6xxx_ops mv88e6095_ops = {
.stats_get_stats = mv88e6095_stats_get_stats,
};
+static const struct mv88e6xxx_ops mv88e6097_ops = {
+ .set_switch_mac = mv88e6xxx_g2_set_switch_mac,
+ .phy_read = mv88e6xxx_g2_smi_phy_read,
+ .phy_write = mv88e6xxx_g2_smi_phy_write,
+ .port_set_link = mv88e6xxx_port_set_link,
+ .port_set_duplex = mv88e6xxx_port_set_duplex,
+ .port_set_speed = mv88e6185_port_set_speed,
+ .stats_snapshot = mv88e6xxx_g1_stats_snapshot,
+ .stats_get_sset_count = mv88e6095_stats_get_sset_count,
+ .stats_get_strings = mv88e6095_stats_get_strings,
+ .stats_get_stats = mv88e6095_stats_get_stats,
+};
+
static const struct mv88e6xxx_ops mv88e6123_ops = {
/* MV88E6XXX_FAMILY_6165 */
.set_switch_mac = mv88e6xxx_g2_set_switch_mac,
@@ -3580,6 +3593,19 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.ops = &mv88e6095_ops,
},
+ [MV88E6097] = {
+ .prod_num = PORT_SWITCH_ID_PROD_NUM_6097,
+ .family = MV88E6XXX_FAMILY_6097,
+ .name = "Marvell 88E6097/88E6097F",
+ .num_databases = 4096,
+ .num_ports = 11,
+ .port_base_addr = 0x10,
+ .global1_addr = 0x1b,
+ .age_time_coeff = 15000,
+ .flags = MV88E6XXX_FLAGS_FAMILY_6097,
+ .ops = &mv88e6097_ops,
+ },
+
[MV88E6123] = {
.prod_num = PORT_SWITCH_ID_PROD_NUM_6123,
.family = MV88E6XXX_FAMILY_6165,
diff --git a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h
index 9298faa..ab52c37 100644
--- a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h
+++ b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h
@@ -81,6 +81,7 @@
#define PORT_SWITCH_ID 0x03
#define PORT_SWITCH_ID_PROD_NUM_6085 0x04a
#define PORT_SWITCH_ID_PROD_NUM_6095 0x095
+#define PORT_SWITCH_ID_PROD_NUM_6097 0x099
#define PORT_SWITCH_ID_PROD_NUM_6131 0x106
#define PORT_SWITCH_ID_PROD_NUM_6320 0x115
#define PORT_SWITCH_ID_PROD_NUM_6123 0x121
@@ -378,6 +379,7 @@
enum mv88e6xxx_model {
MV88E6085,
MV88E6095,
+ MV88E6097,
MV88E6123,
MV88E6131,
MV88E6161,
--
2.9.3
^ permalink raw reply related
* Re: [PATCH v2] net: dsa: mv88e6xxx: forward unknown mc packets on mv88e6097
From: Andrew Lunn @ 2016-11-23 18:01 UTC (permalink / raw)
To: Vivien Didelot; +Cc: Stefan Eichenberger, f.fainelli, netdev
In-Reply-To: <877f7uxevf.fsf@ketchup.i-did-not-set--mail-host-address--so-tickle-me>
On Wed, Nov 23, 2016 at 12:52:52PM -0500, Vivien Didelot wrote:
> Hi Andrew,
>
> Andrew Lunn <andrew@lunn.ch> writes:
>
> > And if you have a recent version of tcpdump, it will decode
> > the header.
>
> Since d729eb4, thanks to you Andrew ;-)
>
> I move up the cleanup of ports setup in my priority list.
Hi Vivien
Please take a look at my mv88e6390 branch. I already refactored this
code, because the mv88e6390 does something slightly different...
I hope to post another batch of mv88e6390 patches soon, and they will
include this cleanup. Since they will clash with these patches, i will
post them first as RFC.
Andrew
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox