netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [net-next PATCH v2] net: dummy: Introduce dummy virtual functions
@ 2016-11-14 13:02 Phil Sutter
  2016-11-14 14:23 ` Eric Dumazet
  2016-11-17 22:04 ` Or Gerlitz
  0 siblings, 2 replies; 4+ messages in thread
From: Phil Sutter @ 2016-11-14 13:02 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Sabrina Dubroca

The idea for this was born when testing VF support in iproute2 which was
impeded by hardware requirements. In fact, not every VF-capable hardware
driver implements all netdev ops, so testing the interface is still hard
to do even with a well-sorted hardware shelf.

To overcome this and allow for testing the user-kernel interface, this
patch allows to turn dummy into a PF with a configurable amount of VFs.

Due to the assumption that all PFs are PCI devices, this implementation
is not completely straightforward: In order to allow for
rtnl_fill_ifinfo() to see the dummy VFs, a fake PCI parent device is
attached to the dummy netdev. This has to happen at the right spot so
register_netdevice() does not get confused. This patch abuses
ndo_fix_features callback for that. In ndo_uninit callback, the fake
parent is removed again for the same purpose.

Joint work with Sabrina Dubroca.

Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: Phil Sutter <phil@nwl.cc>
---
Changes since v1:
- Fixed issues reported by kbuild test robot:
  - pci_dev->sriov is only present if CONFIG_PCI_ATS is active.
  - pci_bus_type does not exist if CONFIG_PCI is not defined.
---
 drivers/net/dummy.c | 199 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 197 insertions(+), 2 deletions(-)

diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c
index 69fc8409a9733..a831537145bd9 100644
--- a/drivers/net/dummy.c
+++ b/drivers/net/dummy.c
@@ -34,6 +34,8 @@
 #include <linux/etherdevice.h>
 #include <linux/init.h>
 #include <linux/moduleparam.h>
+#include <linux/pci.h>
+#include "../pci/pci.h"		/* for struct pci_sriov */
 #include <linux/rtnetlink.h>
 #include <net/rtnetlink.h>
 #include <linux/u64_stats_sync.h>
@@ -42,6 +44,37 @@
 #define DRV_VERSION	"1.0"
 
 static int numdummies = 1;
+static int num_vfs;
+
+static struct pci_sriov pdev_sriov;
+
+static struct pci_dev pci_pdev = {
+	.is_physfn = 1,
+#ifdef CONFIG_PCI_ATS
+	.sriov = &pdev_sriov,
+#endif
+#ifdef CONFIG_PCI
+	.dev.bus = &pci_bus_type,
+#endif
+};
+
+struct vf_data_storage {
+	unsigned char vf_mac[ETH_ALEN];
+	u16 pf_vlan; /* When set, guest VLAN config not allowed. */
+	u16 pf_qos;
+	__be16 vlan_proto;
+	u16 min_tx_rate;
+	u16 max_tx_rate;
+	u8 spoofchk_enabled;
+	bool rss_query_enabled;
+	u8 trusted;
+	int link_state;
+};
+
+struct dummy_priv {
+	int num_vfs;
+	struct vf_data_storage *vfinfo;
+};
 
 /* fake multicast ability */
 static void set_multicast_list(struct net_device *dev)
@@ -91,15 +124,29 @@ static netdev_tx_t dummy_xmit(struct sk_buff *skb, struct net_device *dev)
 
 static int dummy_dev_init(struct net_device *dev)
 {
+	struct dummy_priv *priv = netdev_priv(dev);
+
 	dev->dstats = netdev_alloc_pcpu_stats(struct pcpu_dstats);
 	if (!dev->dstats)
 		return -ENOMEM;
 
+	priv->num_vfs = num_vfs;
+	priv->vfinfo = NULL;
+
+	if (!num_vfs)
+		return 0;
+
+	priv->vfinfo = kcalloc(num_vfs, sizeof(struct vf_data_storage),
+			       GFP_KERNEL);
+	if (!priv->vfinfo)
+		return -ENOMEM;
+
 	return 0;
 }
 
 static void dummy_dev_uninit(struct net_device *dev)
 {
+	dev->dev.parent = NULL;
 	free_percpu(dev->dstats);
 }
 
@@ -112,6 +159,129 @@ static int dummy_change_carrier(struct net_device *dev, bool new_carrier)
 	return 0;
 }
 
+/* fake, just to set fake PCI parent after netdev_register_kobject() */
+static netdev_features_t dummy_fix_features(struct net_device *dev,
+					    netdev_features_t features)
+{
+	struct dummy_priv *priv = netdev_priv(dev);
+
+	if (priv->num_vfs)
+		dev->dev.parent = &pci_pdev.dev;
+
+	return features;
+}
+
+static int dummy_set_vf_mac(struct net_device *dev, int vf, u8 *mac)
+{
+	struct dummy_priv *priv = netdev_priv(dev);
+
+	if (!is_valid_ether_addr(mac) || (vf >= priv->num_vfs))
+		return -EINVAL;
+
+	memcpy(priv->vfinfo[vf].vf_mac, mac, ETH_ALEN);
+
+	return 0;
+}
+
+static int dummy_set_vf_vlan(struct net_device *dev, int vf,
+			     u16 vlan, u8 qos, __be16 vlan_proto)
+{
+	struct dummy_priv *priv = netdev_priv(dev);
+
+	if ((vf >= priv->num_vfs) || (vlan > 4095) || (qos > 7))
+		return -EINVAL;
+
+	priv->vfinfo[vf].pf_vlan = vlan;
+	priv->vfinfo[vf].pf_qos = qos;
+	priv->vfinfo[vf].vlan_proto = vlan_proto;
+
+	return 0;
+}
+
+static int dummy_set_vf_rate(struct net_device *dev, int vf, int min, int max)
+{
+	struct dummy_priv *priv = netdev_priv(dev);
+
+	if (vf >= priv->num_vfs)
+		return -EINVAL;
+
+	priv->vfinfo[vf].min_tx_rate = min;
+	priv->vfinfo[vf].max_tx_rate = max;
+
+	return 0;
+}
+
+static int dummy_set_vf_spoofchk(struct net_device *dev, int vf, bool val)
+{
+	struct dummy_priv *priv = netdev_priv(dev);
+
+	if (vf >= priv->num_vfs)
+		return -EINVAL;
+
+	priv->vfinfo[vf].spoofchk_enabled = val;
+
+	return 0;
+}
+
+static int dummy_set_vf_rss_query_en(struct net_device *dev, int vf, bool val)
+{
+	struct dummy_priv *priv = netdev_priv(dev);
+
+	if (vf >= priv->num_vfs)
+		return -EINVAL;
+
+	priv->vfinfo[vf].rss_query_enabled = val;
+
+	return 0;
+}
+
+static int dummy_set_vf_trust(struct net_device *dev, int vf, bool val)
+{
+	struct dummy_priv *priv = netdev_priv(dev);
+
+	if (vf >= priv->num_vfs)
+		return -EINVAL;
+
+	priv->vfinfo[vf].trusted = val;
+
+	return 0;
+}
+
+static int dummy_get_vf_config(struct net_device *dev,
+			       int vf, struct ifla_vf_info *ivi)
+{
+	struct dummy_priv *priv = netdev_priv(dev);
+
+	if (vf >= priv->num_vfs)
+		return -EINVAL;
+
+	ivi->vf = vf;
+	memcpy(&ivi->mac, priv->vfinfo[vf].vf_mac, ETH_ALEN);
+	ivi->vlan = priv->vfinfo[vf].pf_vlan;
+	ivi->qos = priv->vfinfo[vf].pf_qos;
+	ivi->spoofchk = priv->vfinfo[vf].spoofchk_enabled;
+	ivi->linkstate = priv->vfinfo[vf].link_state;
+	ivi->min_tx_rate = priv->vfinfo[vf].min_tx_rate;
+	ivi->max_tx_rate = priv->vfinfo[vf].max_tx_rate;
+	ivi->rss_query_en = priv->vfinfo[vf].rss_query_enabled;
+	ivi->trusted = priv->vfinfo[vf].trusted;
+	ivi->vlan_proto = priv->vfinfo[vf].vlan_proto;
+
+	return 0;
+}
+
+static int dummy_set_vf_link_state(struct net_device *dev, int vf, int state)
+{
+	struct dummy_priv *priv = netdev_priv(dev);
+
+	if (vf >= priv->num_vfs)
+		return -EINVAL;
+
+	priv->vfinfo[vf].link_state = state;
+
+	return 0;
+}
+
 static const struct net_device_ops dummy_netdev_ops = {
 	.ndo_init		= dummy_dev_init,
 	.ndo_uninit		= dummy_dev_uninit,
@@ -121,6 +291,15 @@ static const struct net_device_ops dummy_netdev_ops = {
 	.ndo_set_mac_address	= eth_mac_addr,
 	.ndo_get_stats64	= dummy_get_stats64,
 	.ndo_change_carrier	= dummy_change_carrier,
+	.ndo_fix_features	= dummy_fix_features,
+	.ndo_set_vf_mac		= dummy_set_vf_mac,
+	.ndo_set_vf_vlan	= dummy_set_vf_vlan,
+	.ndo_set_vf_rate	= dummy_set_vf_rate,
+	.ndo_set_vf_spoofchk	= dummy_set_vf_spoofchk,
+	.ndo_set_vf_trust	= dummy_set_vf_trust,
+	.ndo_get_vf_config	= dummy_get_vf_config,
+	.ndo_set_vf_link_state	= dummy_set_vf_link_state,
+	.ndo_set_vf_rss_query_en = dummy_set_vf_rss_query_en,
 };
 
 static void dummy_get_drvinfo(struct net_device *dev,
@@ -134,6 +313,14 @@ static const struct ethtool_ops dummy_ethtool_ops = {
 	.get_drvinfo            = dummy_get_drvinfo,
 };
 
+static void dummy_free_netdev(struct net_device *dev)
+{
+	struct dummy_priv *priv = netdev_priv(dev);
+
+	kfree(priv->vfinfo);
+	free_netdev(dev);
+}
+
 static void dummy_setup(struct net_device *dev)
 {
 	ether_setup(dev);
@@ -141,7 +328,7 @@ static void dummy_setup(struct net_device *dev)
 	/* Initialize the device structure. */
 	dev->netdev_ops = &dummy_netdev_ops;
 	dev->ethtool_ops = &dummy_ethtool_ops;
-	dev->destructor = free_netdev;
+	dev->destructor = dummy_free_netdev;
 
 	/* Fill in device structure with ethernet-generic values. */
 	dev->flags |= IFF_NOARP;
@@ -169,6 +356,7 @@ static int dummy_validate(struct nlattr *tb[], struct nlattr *data[])
 
 static struct rtnl_link_ops dummy_link_ops __read_mostly = {
 	.kind		= DRV_NAME,
+	.priv_size	= sizeof(struct dummy_priv),
 	.setup		= dummy_setup,
 	.validate	= dummy_validate,
 };
@@ -177,12 +365,16 @@ static struct rtnl_link_ops dummy_link_ops __read_mostly = {
 module_param(numdummies, int, 0);
 MODULE_PARM_DESC(numdummies, "Number of dummy pseudo devices");
 
+module_param(num_vfs, int, 0);
+MODULE_PARM_DESC(num_vfs, "Number of dummy VFs per dummy device");
+
 static int __init dummy_init_one(void)
 {
 	struct net_device *dev_dummy;
 	int err;
 
-	dev_dummy = alloc_netdev(0, "dummy%d", NET_NAME_UNKNOWN, dummy_setup);
+	dev_dummy = alloc_netdev(sizeof(struct dummy_priv),
+				 "dummy%d", NET_NAME_UNKNOWN, dummy_setup);
 	if (!dev_dummy)
 		return -ENOMEM;
 
@@ -190,6 +382,7 @@ static int __init dummy_init_one(void)
 	err = register_netdevice(dev_dummy);
 	if (err < 0)
 		goto err;
+
 	return 0;
 
 err:
@@ -201,6 +394,8 @@ static int __init dummy_init_module(void)
 {
 	int i, err = 0;
 
+	pdev_sriov.num_VFs = num_vfs;
+
 	rtnl_lock();
 	err = __rtnl_link_register(&dummy_link_ops);
 	if (err < 0)
-- 
2.10.0

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [net-next PATCH v2] net: dummy: Introduce dummy virtual functions
  2016-11-14 13:02 [net-next PATCH v2] net: dummy: Introduce dummy virtual functions Phil Sutter
@ 2016-11-14 14:23 ` Eric Dumazet
  2016-11-17 22:04 ` Or Gerlitz
  1 sibling, 0 replies; 4+ messages in thread
From: Eric Dumazet @ 2016-11-14 14:23 UTC (permalink / raw)
  To: Phil Sutter; +Cc: David Miller, netdev, Sabrina Dubroca

On Mon, 2016-11-14 at 14:02 +0100, Phil Sutter wrote:
> The idea for this was born when testing VF support in iproute2 which was
> impeded by hardware requirements. In fact, not every VF-capable hardware
> driver implements all netdev ops, so testing the interface is still hard
> to do even with a well-sorted hardware shelf.
> 
...
>  static int dummy_dev_init(struct net_device *dev)
>  {
> +	struct dummy_priv *priv = netdev_priv(dev);
> +
>  	dev->dstats = netdev_alloc_pcpu_stats(struct pcpu_dstats);
>  	if (!dev->dstats)
>  		return -ENOMEM;
>  
> +	priv->num_vfs = num_vfs;
> +	priv->vfinfo = NULL;
> +
> +	if (!num_vfs)
> +		return 0;
> +
> +	priv->vfinfo = kcalloc(num_vfs, sizeof(struct vf_data_storage),
> +			       GFP_KERNEL);
> +	if (!priv->vfinfo)
> +		return -ENOMEM;

You must free dev->dstats here, otherwise kernel memory will leak. 

> +
>  	return 0;
>  }
>  

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [net-next PATCH v2] net: dummy: Introduce dummy virtual functions
  2016-11-14 13:02 [net-next PATCH v2] net: dummy: Introduce dummy virtual functions Phil Sutter
  2016-11-14 14:23 ` Eric Dumazet
@ 2016-11-17 22:04 ` Or Gerlitz
  2016-11-18 15:59   ` Phil Sutter
  1 sibling, 1 reply; 4+ messages in thread
From: Or Gerlitz @ 2016-11-17 22:04 UTC (permalink / raw)
  To: Phil Sutter; +Cc: David Miller, Linux Netdev List, Sabrina Dubroca

On Mon, Nov 14, 2016 at 3:02 PM, Phil Sutter <phil@nwl.cc> wrote:

> Due to the assumption that all PFs are PCI devices, this implementation
> is not completely straightforward: In order to allow for
> rtnl_fill_ifinfo() to see the dummy VFs, a fake PCI parent device is
> attached to the dummy netdev. This has to happen at the right spot so
> register_netdevice() does not get confused. This patch abuses
> ndo_fix_features callback for that. In ndo_uninit callback, the fake
> parent is removed again for the same purpose.

So you did some mimic-ing of PCI interface, how do you let the user to
config the number of VFs? though a module param? why? if the module
param only serves to say how many VF the device supports, maybe
support the maximum possible by PCI spec and skip the module param?


> +module_param(num_vfs, int, 0);
> +MODULE_PARM_DESC(num_vfs, "Number of dummy VFs per dummy device");
> +

> @@ -190,6 +382,7 @@ static int __init dummy_init_one(void)
>         err = register_netdevice(dev_dummy);
>         if (err < 0)
>                 goto err;
> +
>         return 0;

nit, remove this added blank line..

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [net-next PATCH v2] net: dummy: Introduce dummy virtual functions
  2016-11-17 22:04 ` Or Gerlitz
@ 2016-11-18 15:59   ` Phil Sutter
  0 siblings, 0 replies; 4+ messages in thread
From: Phil Sutter @ 2016-11-18 15:59 UTC (permalink / raw)
  To: Or Gerlitz; +Cc: David Miller, Linux Netdev List, Sabrina Dubroca

Hi,

On Fri, Nov 18, 2016 at 12:04:14AM +0200, Or Gerlitz wrote:
> On Mon, Nov 14, 2016 at 3:02 PM, Phil Sutter <phil@nwl.cc> wrote:
> 
> > Due to the assumption that all PFs are PCI devices, this implementation
> > is not completely straightforward: In order to allow for
> > rtnl_fill_ifinfo() to see the dummy VFs, a fake PCI parent device is
> > attached to the dummy netdev. This has to happen at the right spot so
> > register_netdevice() does not get confused. This patch abuses
> > ndo_fix_features callback for that. In ndo_uninit callback, the fake
> > parent is removed again for the same purpose.
> 
> So you did some mimic-ing of PCI interface, how do you let the user to
> config the number of VFs? though a module param? why? if the module
> param only serves to say how many VF the device supports, maybe
> support the maximum possible by PCI spec and skip the module param?

Yes, this is controlled via module parameter. But it doesn't say how
much is supported but rather how many dummy VFs are to be created for
each dummy interface.

> > +module_param(num_vfs, int, 0);
> > +MODULE_PARM_DESC(num_vfs, "Number of dummy VFs per dummy device");
> > +
> 
> > @@ -190,6 +382,7 @@ static int __init dummy_init_one(void)
> >         err = register_netdevice(dev_dummy);
> >         if (err < 0)
> >                 goto err;
> > +
> >         return 0;
> 
> nit, remove this added blank line..

Oh yes, thanks. Spontaneous reviewer's blindness. :)

The implementation is problematic in another aspect though: Upon reboot,
it seems like no netdev ops are being called but the fake PCI parent's
kobject is being freed which does not work (and leads to an oops).

Cheers, Phil

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2016-11-18 15:59 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-11-14 13:02 [net-next PATCH v2] net: dummy: Introduce dummy virtual functions Phil Sutter
2016-11-14 14:23 ` Eric Dumazet
2016-11-17 22:04 ` Or Gerlitz
2016-11-18 15:59   ` Phil Sutter

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).