netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Sridhar Samudrala <sridhar.samudrala@intel.com>
To: mst@redhat.com, stephen@networkplumber.org, davem@davemloft.net,
	netdev@vger.kernel.org,
	virtualization@lists.linux-foundation.org,
	virtio-dev@lists.oasis-open.org, jesse.brandeburg@intel.com,
	alexander.h.duyck@intel.com, kubakici@wp.pl,
	sridhar.samudrala@intel.com, jasowang@redhat.com,
	loseweigh@gmail.com, jiri@resnulli.us, aaron.f.brown@intel.com,
	anjali.singhai@intel.com
Subject: [PATCH net-next v12 1/5] net: Introduce generic failover module
Date: Thu, 24 May 2018 09:55:13 -0700	[thread overview]
Message-ID: <1527180917-39737-2-git-send-email-sridhar.samudrala@intel.com> (raw)
In-Reply-To: <1527180917-39737-1-git-send-email-sridhar.samudrala@intel.com>

The failover module provides a generic interface for paravirtual drivers
to register a netdev and a set of ops with a failover instance. The ops
are used as event handlers that get called to handle netdev register/
unregister/link change/name change events on slave pci ethernet devices
with the same mac address as the failover netdev.

This enables paravirtual drivers to use a VF as an accelerated low latency
datapath. It also allows migration of VMs with direct attached VFs by
failing over to the paravirtual datapath when the VF is unplugged.

Signed-off-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
---
 Documentation/networking/failover.rst |  18 ++
 MAINTAINERS                           |   8 +
 include/linux/netdevice.h             |  16 ++
 include/net/failover.h                |  36 ++++
 net/Kconfig                           |  13 ++
 net/core/Makefile                     |   1 +
 net/core/failover.c                   | 315 ++++++++++++++++++++++++++++++++++
 7 files changed, 407 insertions(+)
 create mode 100644 Documentation/networking/failover.rst
 create mode 100644 include/net/failover.h
 create mode 100644 net/core/failover.c

diff --git a/Documentation/networking/failover.rst b/Documentation/networking/failover.rst
new file mode 100644
index 000000000000..f0c8483cdbf5
--- /dev/null
+++ b/Documentation/networking/failover.rst
@@ -0,0 +1,18 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+========
+FAILOVER
+========
+
+Overview
+========
+
+The failover module provides a generic interface for paravirtual drivers
+to register a netdev and a set of ops with a failover instance. The ops
+are used as event handlers that get called to handle netdev register/
+unregister/link change/name change events on slave pci ethernet devices
+with the same mac address as the failover netdev.
+
+This enables paravirtual drivers to use a VF as an accelerated low latency
+datapath. It also allows live migration of VMs with direct attached VFs by
+failing over to the paravirtual datapath when the VF is unplugged.
diff --git a/MAINTAINERS b/MAINTAINERS
index 032807a95558..ee0ce10b2a8d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5412,6 +5412,14 @@ S:	Maintained
 F:	Documentation/hwmon/f71805f
 F:	drivers/hwmon/f71805f.c
 
+FAILOVER MODULE
+M:	Sridhar Samudrala <sridhar.samudrala@intel.com>
+L:	netdev@vger.kernel.org
+S:	Supported
+F:	net/core/failover.c
+F:	include/net/failover.h
+F:	Documentation/networking/failover.rst
+
 FANOTIFY
 M:	Jan Kara <jack@suse.cz>
 R:	Amir Goldstein <amir73il@gmail.com>
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 03ed492c4e14..0f4ba52b641d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1421,6 +1421,8 @@ struct net_device_ops {
  *	entity (i.e. the master device for bridged veth)
  * @IFF_MACSEC: device is a MACsec device
  * @IFF_NO_RX_HANDLER: device doesn't support the rx_handler hook
+ * @IFF_FAILOVER: device is a failover master device
+ * @IFF_FAILOVER_SLAVE: device is lower dev of a failover master device
  */
 enum netdev_priv_flags {
 	IFF_802_1Q_VLAN			= 1<<0,
@@ -1450,6 +1452,8 @@ enum netdev_priv_flags {
 	IFF_PHONY_HEADROOM		= 1<<24,
 	IFF_MACSEC			= 1<<25,
 	IFF_NO_RX_HANDLER		= 1<<26,
+	IFF_FAILOVER			= 1<<27,
+	IFF_FAILOVER_SLAVE		= 1<<28,
 };
 
 #define IFF_802_1Q_VLAN			IFF_802_1Q_VLAN
@@ -1478,6 +1482,8 @@ enum netdev_priv_flags {
 #define IFF_RXFH_CONFIGURED		IFF_RXFH_CONFIGURED
 #define IFF_MACSEC			IFF_MACSEC
 #define IFF_NO_RX_HANDLER		IFF_NO_RX_HANDLER
+#define IFF_FAILOVER			IFF_FAILOVER
+#define IFF_FAILOVER_SLAVE		IFF_FAILOVER_SLAVE
 
 /**
  *	struct net_device - The DEVICE structure.
@@ -4321,6 +4327,16 @@ static inline bool netif_is_rxfh_configured(const struct net_device *dev)
 	return dev->priv_flags & IFF_RXFH_CONFIGURED;
 }
 
+static inline bool netif_is_failover(const struct net_device *dev)
+{
+	return dev->priv_flags & IFF_FAILOVER;
+}
+
+static inline bool netif_is_failover_slave(const struct net_device *dev)
+{
+	return dev->priv_flags & IFF_FAILOVER_SLAVE;
+}
+
 /* This device needs to keep skb dst for qdisc enqueue or ndo_start_xmit() */
 static inline void netif_keep_dst(struct net_device *dev)
 {
diff --git a/include/net/failover.h b/include/net/failover.h
new file mode 100644
index 000000000000..bb15438f39c7
--- /dev/null
+++ b/include/net/failover.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _FAILOVER_H
+#define _FAILOVER_H
+
+#include <linux/netdevice.h>
+
+struct failover_ops {
+	int (*slave_pre_register)(struct net_device *slave_dev,
+				  struct net_device *failover_dev);
+	int (*slave_register)(struct net_device *slave_dev,
+			      struct net_device *failover_dev);
+	int (*slave_pre_unregister)(struct net_device *slave_dev,
+				    struct net_device *failover_dev);
+	int (*slave_unregister)(struct net_device *slave_dev,
+				struct net_device *failover_dev);
+	int (*slave_link_change)(struct net_device *slave_dev,
+				 struct net_device *failover_dev);
+	int (*slave_name_change)(struct net_device *slave_dev,
+				 struct net_device *failover_dev);
+	rx_handler_result_t (*slave_handle_frame)(struct sk_buff **pskb);
+};
+
+struct failover {
+	struct list_head list;
+	struct net_device __rcu *failover_dev;
+	struct failover_ops __rcu *ops;
+};
+
+struct failover *failover_register(struct net_device *dev,
+				   struct failover_ops *ops);
+void failover_unregister(struct failover *failover);
+int failover_slave_unregister(struct net_device *slave_dev);
+
+#endif /* _FAILOVER_H */
diff --git a/net/Kconfig b/net/Kconfig
index df8d45ef47d8..d581c4f0f1c4 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -430,6 +430,19 @@ config MAY_USE_DEVLINK
 config PAGE_POOL
        bool
 
+config FAILOVER
+	tristate "Generic failover module"
+	help
+	  The failover module provides a generic interface for paravirtual
+	  drivers to register a netdev and a set of ops with a failover
+	  instance. The ops are used as event handlers that get called to
+	  handle netdev register/unregister/link change/name change events
+	  on slave pci ethernet devices with the same mac address as the
+	  failover netdev. This enables paravirtual drivers to use a
+	  VF as an accelerated low latency datapath. It also allows live
+	  migration of VMs with direct attached VFs by failing over to the
+	  paravirtual datapath when the VF is unplugged.
+
 endif   # if NET
 
 # Used by archs to tell that they support BPF JIT compiler plus which flavour.
diff --git a/net/core/Makefile b/net/core/Makefile
index 7080417f8bc8..80175e6a2eb8 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -31,3 +31,4 @@ obj-$(CONFIG_DST_CACHE) += dst_cache.o
 obj-$(CONFIG_HWBM) += hwbm.o
 obj-$(CONFIG_NET_DEVLINK) += devlink.o
 obj-$(CONFIG_GRO_CELLS) += gro_cells.o
+obj-$(CONFIG_FAILOVER) += failover.o
diff --git a/net/core/failover.c b/net/core/failover.c
new file mode 100644
index 000000000000..4a92a98ccce9
--- /dev/null
+++ b/net/core/failover.c
@@ -0,0 +1,315 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018, Intel Corporation. */
+
+/* A common module to handle registrations and notifications for paravirtual
+ * drivers to enable accelerated datapath and support VF live migration.
+ *
+ * The notifier and event handling code is based on netvsc driver.
+ */
+
+#include <linux/module.h>
+#include <linux/etherdevice.h>
+#include <uapi/linux/if_arp.h>
+#include <linux/rtnetlink.h>
+#include <linux/if_vlan.h>
+#include <net/failover.h>
+
+static LIST_HEAD(failover_list);
+static DEFINE_SPINLOCK(failover_lock);
+
+static struct net_device *failover_get_bymac(u8 *mac, struct failover_ops **ops)
+{
+	struct net_device *failover_dev;
+	struct failover *failover;
+
+	spin_lock(&failover_lock);
+	list_for_each_entry(failover, &failover_list, list) {
+		failover_dev = rtnl_dereference(failover->failover_dev);
+		if (ether_addr_equal(failover_dev->perm_addr, mac)) {
+			*ops = rtnl_dereference(failover->ops);
+			spin_unlock(&failover_lock);
+			return failover_dev;
+		}
+	}
+	spin_unlock(&failover_lock);
+	return NULL;
+}
+
+/**
+ * failover_slave_register - Register a slave netdev
+ *
+ * @slave_dev: slave netdev that is being registered
+ *
+ * Registers a slave device to a failover instance. Only ethernet devices
+ * are supported.
+ */
+static int failover_slave_register(struct net_device *slave_dev)
+{
+	struct netdev_lag_upper_info lag_upper_info;
+	struct net_device *failover_dev;
+	struct failover_ops *fops;
+	int err;
+
+	if (slave_dev->type != ARPHRD_ETHER)
+		goto done;
+
+	ASSERT_RTNL();
+
+	failover_dev = failover_get_bymac(slave_dev->perm_addr, &fops);
+	if (!failover_dev)
+		goto done;
+
+	if (fops && fops->slave_pre_register &&
+	    fops->slave_pre_register(slave_dev, failover_dev))
+		goto done;
+
+	err = netdev_rx_handler_register(slave_dev, fops->slave_handle_frame,
+					 failover_dev);
+	if (err) {
+		netdev_err(slave_dev, "can not register failover rx handler (err = %d)\n",
+			   err);
+		goto done;
+	}
+
+	lag_upper_info.tx_type = NETDEV_LAG_TX_TYPE_ACTIVEBACKUP;
+	err = netdev_master_upper_dev_link(slave_dev, failover_dev, NULL,
+					   &lag_upper_info, NULL);
+	if (err) {
+		netdev_err(slave_dev, "can not set failover device %s (err = %d)\n",
+			   failover_dev->name, err);
+		goto err_upper_link;
+	}
+
+	slave_dev->priv_flags |= IFF_FAILOVER_SLAVE;
+
+	if (fops && fops->slave_register &&
+	    !fops->slave_register(slave_dev, failover_dev))
+		return NOTIFY_OK;
+
+	netdev_upper_dev_unlink(slave_dev, failover_dev);
+	slave_dev->priv_flags &= ~IFF_FAILOVER_SLAVE;
+err_upper_link:
+	netdev_rx_handler_unregister(slave_dev);
+done:
+	return NOTIFY_DONE;
+}
+
+/**
+ * failover_slave_unregister - Unregister a slave netdev
+ *
+ * @slave_dev: slave netdev that is being unregistered
+ *
+ * Unregisters a slave device from a failover instance.
+ */
+int failover_slave_unregister(struct net_device *slave_dev)
+{
+	struct net_device *failover_dev;
+	struct failover_ops *fops;
+
+	if (!netif_is_failover_slave(slave_dev))
+		goto done;
+
+	ASSERT_RTNL();
+
+	failover_dev = failover_get_bymac(slave_dev->perm_addr, &fops);
+	if (!failover_dev)
+		goto done;
+
+	if (fops && fops->slave_pre_unregister &&
+	    fops->slave_pre_unregister(slave_dev, failover_dev))
+		goto done;
+
+	netdev_rx_handler_unregister(slave_dev);
+	netdev_upper_dev_unlink(slave_dev, failover_dev);
+	slave_dev->priv_flags &= ~IFF_FAILOVER_SLAVE;
+
+	if (fops && fops->slave_unregister &&
+	    !fops->slave_unregister(slave_dev, failover_dev))
+		return NOTIFY_OK;
+
+done:
+	return NOTIFY_DONE;
+}
+EXPORT_SYMBOL_GPL(failover_slave_unregister);
+
+static int failover_slave_link_change(struct net_device *slave_dev)
+{
+	struct net_device *failover_dev;
+	struct failover_ops *fops;
+
+	if (!netif_is_failover_slave(slave_dev))
+		goto done;
+
+	ASSERT_RTNL();
+
+	failover_dev = failover_get_bymac(slave_dev->perm_addr, &fops);
+	if (!failover_dev)
+		goto done;
+
+	if (!netif_running(failover_dev))
+		goto done;
+
+	if (fops && fops->slave_link_change &&
+	    !fops->slave_link_change(slave_dev, failover_dev))
+		return NOTIFY_OK;
+
+done:
+	return NOTIFY_DONE;
+}
+
+static int failover_slave_name_change(struct net_device *slave_dev)
+{
+	struct net_device *failover_dev;
+	struct failover_ops *fops;
+
+	if (!netif_is_failover_slave(slave_dev))
+		goto done;
+
+	ASSERT_RTNL();
+
+	failover_dev = failover_get_bymac(slave_dev->perm_addr, &fops);
+	if (!failover_dev)
+		goto done;
+
+	if (!netif_running(failover_dev))
+		goto done;
+
+	if (fops && fops->slave_name_change &&
+	    !fops->slave_name_change(slave_dev, failover_dev))
+		return NOTIFY_OK;
+
+done:
+	return NOTIFY_DONE;
+}
+
+static int
+failover_event(struct notifier_block *this, unsigned long event, void *ptr)
+{
+	struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);
+
+	/* Skip parent events */
+	if (netif_is_failover(event_dev))
+		return NOTIFY_DONE;
+
+	switch (event) {
+	case NETDEV_REGISTER:
+		return failover_slave_register(event_dev);
+	case NETDEV_UNREGISTER:
+		return failover_slave_unregister(event_dev);
+	case NETDEV_UP:
+	case NETDEV_DOWN:
+	case NETDEV_CHANGE:
+		return failover_slave_link_change(event_dev);
+	case NETDEV_CHANGENAME:
+		return failover_slave_name_change(event_dev);
+	default:
+		return NOTIFY_DONE;
+	}
+}
+
+static struct notifier_block failover_notifier = {
+	.notifier_call = failover_event,
+};
+
+static void
+failover_existing_slave_register(struct net_device *failover_dev)
+{
+	struct net *net = dev_net(failover_dev);
+	struct net_device *dev;
+
+	rtnl_lock();
+	for_each_netdev(net, dev) {
+		if (netif_is_failover(dev))
+			continue;
+		if (ether_addr_equal(failover_dev->perm_addr, dev->perm_addr))
+			failover_slave_register(dev);
+	}
+	rtnl_unlock();
+}
+
+/**
+ * failover_register - Register a failover instance
+ *
+ * @dev: failover netdev
+ * @ops: failover ops
+ *
+ * Allocate and register a failover instance for a failover netdev. ops
+ * provides handlers for slave device register/unregister/link change/
+ * name change events.
+ *
+ * Return: pointer to failover instance
+ */
+struct failover *failover_register(struct net_device *dev,
+				   struct failover_ops *ops)
+{
+	struct failover *failover;
+
+	if (dev->type != ARPHRD_ETHER)
+		return ERR_PTR(-EINVAL);
+
+	failover = kzalloc(sizeof(*failover), GFP_KERNEL);
+	if (!failover)
+		return ERR_PTR(-ENOMEM);
+
+	rcu_assign_pointer(failover->ops, ops);
+	dev_hold(dev);
+	dev->priv_flags |= IFF_FAILOVER;
+	rcu_assign_pointer(failover->failover_dev, dev);
+
+	spin_lock(&failover_lock);
+	list_add_tail(&failover->list, &failover_list);
+	spin_unlock(&failover_lock);
+
+	netdev_info(dev, "failover master:%s registered\n", dev->name);
+
+	failover_existing_slave_register(dev);
+
+	return failover;
+}
+EXPORT_SYMBOL_GPL(failover_register);
+
+/**
+ * failover_unregister - Unregister a failover instance
+ *
+ * @failover: pointer to failover instance
+ *
+ * Unregisters and frees a failover instance.
+ */
+void failover_unregister(struct failover *failover)
+{
+	struct net_device *failover_dev;
+
+	failover_dev = rcu_dereference(failover->failover_dev);
+
+	netdev_info(failover_dev, "failover master:%s unregistered\n",
+		    failover_dev->name);
+
+	failover_dev->priv_flags &= ~IFF_FAILOVER;
+	dev_put(failover_dev);
+
+	spin_lock(&failover_lock);
+	list_del(&failover->list);
+	spin_unlock(&failover_lock);
+
+	kfree(failover);
+}
+EXPORT_SYMBOL_GPL(failover_unregister);
+
+static __init int
+failover_init(void)
+{
+	register_netdevice_notifier(&failover_notifier);
+
+	return 0;
+}
+module_init(failover_init);
+
+static __exit
+void failover_exit(void)
+{
+	unregister_netdevice_notifier(&failover_notifier);
+}
+module_exit(failover_exit);
+
+MODULE_DESCRIPTION("Generic failover infrastructure/interface");
+MODULE_LICENSE("GPL v2");
-- 
2.14.3

  reply	other threads:[~2018-05-24 16:55 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-05-24 16:55 [PATCH net-next v12 0/5] Enable virtio_net to act as a standby for a passthru device Sridhar Samudrala
2018-05-24 16:55 ` Sridhar Samudrala [this message]
2018-05-25 22:37   ` [PATCH net-next v12 1/5] net: Introduce generic failover module Stephen Hemminger
2018-05-25 23:04     ` Samudrala, Sridhar
2018-05-26  7:43     ` Jiri Pirko
2018-05-25 22:38   ` Stephen Hemminger
2018-05-25 23:06     ` Samudrala, Sridhar
2018-05-25 23:29       ` Stephen Hemminger
2018-05-31  2:52       ` Stephen Hemminger
2018-05-31  2:58         ` Samudrala, Sridhar
2018-05-24 16:55 ` [PATCH net-next v12 2/5] netvsc: refactor notifier/event handling code to use the failover framework Sridhar Samudrala
2018-05-25 22:34   ` Stephen Hemminger
2018-05-25 23:11     ` Samudrala, Sridhar
2018-05-25 23:28       ` Stephen Hemminger
2018-05-26  7:22         ` Samudrala, Sridhar
2018-05-26  7:51           ` Jiri Pirko
2018-05-26 19:22             ` Samudrala, Sridhar
2018-05-31  2:06   ` Stephen Hemminger
2018-05-31  3:03     ` Samudrala, Sridhar
2018-05-31 12:58       ` Stephen Hemminger
2018-05-31 17:34         ` Michael S. Tsirkin
2018-05-31 18:35     ` Michael S. Tsirkin
2018-05-31 20:41       ` Siwei Liu
2018-05-24 16:55 ` [PATCH net-next v12 3/5] net: Introduce net_failover driver Sridhar Samudrala
2018-05-24 16:55 ` [PATCH net-next v12 4/5] virtio_net: Introduce VIRTIO_NET_F_STANDBY feature bit Sridhar Samudrala
2018-05-24 16:55 ` [PATCH net-next v12 5/5] virtio_net: Extend virtio to use VF datapath when available Sridhar Samudrala
2018-05-25 17:19 ` [PATCH net-next v12 0/5] Enable virtio_net to act as a standby for a passthru device Michael S. Tsirkin
2018-05-29  3:00 ` David Miller

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1527180917-39737-2-git-send-email-sridhar.samudrala@intel.com \
    --to=sridhar.samudrala@intel.com \
    --cc=aaron.f.brown@intel.com \
    --cc=alexander.h.duyck@intel.com \
    --cc=anjali.singhai@intel.com \
    --cc=davem@davemloft.net \
    --cc=jasowang@redhat.com \
    --cc=jesse.brandeburg@intel.com \
    --cc=jiri@resnulli.us \
    --cc=kubakici@wp.pl \
    --cc=loseweigh@gmail.com \
    --cc=mst@redhat.com \
    --cc=netdev@vger.kernel.org \
    --cc=stephen@networkplumber.org \
    --cc=virtio-dev@lists.oasis-open.org \
    --cc=virtualization@lists.linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).