[net-next-2.6 PATCH 1/2] Add netdev port-profile support (take III, was iovnl)

netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [net-next-2.6 PATCH 1/2] Add netdev port-profile support (take III, was iovnl)
@ 2010-04-28  4:42 Scott Feldman
  2010-04-28  4:42 ` [net-next-2.6 PATCH 2/2] add ndo_set_port_profile op support for enic dynamic vnics Scott Feldman
  2010-04-28 13:13 ` [net-next-2.6 PATCH 1/2] Add netdev port-profile support (take III, was iovnl) Arnd Bergmann
  0 siblings, 2 replies; 20+ messages in thread
From: Scott Feldman @ 2010-04-28  4:42 UTC (permalink / raw)
  To: davem; +Cc: netdev, chrisw, arnd

From: Scott Feldman <scofeldm@cisco.com>

Add new netdev ops ndo_{set|get}_port_profile to allow setting of port-profile
on a netdev interface.  Extends RTM_SETLINK/RTM_GETLINK with new sub cmd called
IFLA_PORT_PROFILE (added to end of IFLA_cmd list).  The port-profile cmd
arguments are (as seen from modified iproute2 cmdline):

       ip link set DEVICE [ { up | down } ]
                          [ arp { on | off } ]
                          [ dynamic { on | off } ]
                          [ multicast { on | off } ]
                          ...
                          [ vf NUM [ mac LLADDR ]
                                   [ vlan VLANID [ qos VLAN-QOS ] ]
                                   [ rate TXRATE ] ] 
                          [ port_profile [ PORT-PROFILE
                                   [ mac LLADDR ]
                                   [ host_uuid HOST_UUID ]
                                   [ client_uuid CLIENT_UUID ]
                                   [ client_name CLIENT_NAME ] ] ]
       ip link show [ DEVICE ]


A port-profile is used to configure/enable the switch port backing the netdev
interface, not to configure the host-facing side of the netdev.  A port-
profile is an identifier known to the switch.  How port-profiles are installed
on the switch or how available port-profiles is made know to the host is
outside the scope of this patch.

The general flow is the port-profile is applied to a host netdev interface
using RTM_SETLINK, the receiver of the RTM_SETLINK msg (more about that later)
communicates with the switch, and the switch port backing the host netdev
interface is configured/enabled based on the settings defined by the port-
profile.  What those settings comprise, and how those settings are managed is
again outside the scope of this patch, since this patch only deals with the
first step in the flow.

Since we're using netlink sockets, the receiver of the RTM_SETLINK msg can
be in kernel- or user-space.  For kernel-space recipient, rtnetlink.c, the
new ndo_set_port_profile netdev op is called to set the port-profile.
User-space recipients can decide how they propagate the msg to the switch.
There is also a RTM_GETLINK cmd to to return port-profile setting of an
interface and to also return the status of the last port-profile.

Signed-off-by: Scott Feldman <scofeldm@cisco.com>
Signed-off-by: Roopa Prabhu<roprabhu@cisco.com>
---
 include/linux/if_link.h   |   26 ++++++++++++++++++++++++++
 include/linux/netdevice.h |   10 ++++++++++
 net/core/rtnetlink.c      |   22 ++++++++++++++++++++++
 3 files changed, 58 insertions(+), 0 deletions(-)

diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index cfd420b..6f02398 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -116,6 +116,7 @@ enum {
 	IFLA_VF_TX_RATE,	/* TX Bandwidth Allocation */
 	IFLA_VFINFO,
 	IFLA_STATS64,
+	IFLA_PORT_PROFILE,
 	__IFLA_MAX
 };
 
@@ -259,4 +260,29 @@ struct ifla_vf_info {
 	__u32 qos;
 	__u32 tx_rate;
 };
+
+/* Port-profile managment section */
+
+#define IFLA_PORT_PROFILE_MAX	40
+#define IFLA_PP_HOST_UUID_MAX	40
+#define IFLA_PP_CLIENT_UUID_MAX	40
+#define IFLA_PP_CLIENT_NAME_MAX	40
+
+enum ifla_port_profile_status {
+	IFLA_PORT_PROFILE_STATUS_UNKNOWN,
+	IFLA_PORT_PROFILE_STATUS_SUCCESS,
+	IFLA_PORT_PROFILE_STATUS_ERROR,
+	IFLA_PORT_PROFILE_STATUS_INPROGRESS,
+};
+
+struct ifla_port_profile {
+	__u8 status;
+	__u8 port_profile[IFLA_PORT_PROFILE_MAX];
+	__u8 mac[32]; /* MAX_ADDR_LEN */
+	__u8 host_uuid[IFLA_PP_HOST_UUID_MAX];
+		/* e.g. "CEEFD3B1-9E11-11DE-BDFD-000BAB01C0FB" */
+	__u8 client_uuid[IFLA_PP_CLIENT_UUID_MAX];
+	__u8 client_name[IFLA_PP_CLIENT_NAME_MAX]; /* e.g. "vm0-eth1" */
+};
+
 #endif /* _LINUX_IF_LINK_H */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3c5ed5f..2962288 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -696,6 +696,12 @@ struct netdev_rx_queue {
  * int (*ndo_set_vf_tx_rate)(struct net_device *dev, int vf, int rate);
  * int (*ndo_get_vf_config)(struct net_device *dev,
  *			    int vf, struct ifla_vf_info *ivf);
+ *
+ *	Port-profile management functions.
+ * int (*ndo_set_port_profile)(struct net_device *dev,
+ *			       struct ifla_port_profile *ipp);
+ * int (*ndo_get_port_profile)(struct net_device *dev,
+ *			       struct ifla_port_profile *ipp);
  */
 #define HAVE_NET_DEVICE_OPS
 struct net_device_ops {
@@ -744,6 +750,10 @@ struct net_device_ops {
 	int			(*ndo_get_vf_config)(struct net_device *dev,
 						     int vf,
 						     struct ifla_vf_info *ivf);
+	int			(*ndo_set_port_profile)(struct net_device *dev,
+					struct ifla_port_profile *ipp);
+	int			(*ndo_get_port_profile)(struct net_device *dev,
+					struct ifla_port_profile *ipp);
 #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
 	int			(*ndo_fcoe_enable)(struct net_device *dev);
 	int			(*ndo_fcoe_disable)(struct net_device *dev);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 78c8598..1d7e9a7 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -758,6 +758,14 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 			NLA_PUT(skb, IFLA_VFINFO, sizeof(ivi), &ivi);
 		}
 	}
+
+	if (dev->netdev_ops->ndo_get_port_profile) {
+		struct ifla_port_profile ipp;
+
+		if (!dev->netdev_ops->ndo_get_port_profile(dev, &ipp))
+			NLA_PUT(skb, IFLA_PORT_PROFILE, sizeof(ipp), &ipp);
+	}
+
 	if (dev->rtnl_link_ops) {
 		if (rtnl_link_fill(skb, dev) < 0)
 			goto nla_put_failure;
@@ -824,6 +832,8 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = {
 				    .len = sizeof(struct ifla_vf_vlan) },
 	[IFLA_VF_TX_RATE]	= { .type = NLA_BINARY,
 				    .len = sizeof(struct ifla_vf_tx_rate) },
+	[IFLA_PORT_PROFILE]	= { .type = NLA_BINARY,
+				    .len = sizeof(struct ifla_port_profile)},
 };
 EXPORT_SYMBOL(ifla_policy);
 
@@ -1028,6 +1038,18 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
 	}
 	err = 0;
 
+	if (tb[IFLA_PORT_PROFILE]) {
+		struct ifla_port_profile *ipp;
+		ipp = nla_data(tb[IFLA_PORT_PROFILE]);
+		err = -EOPNOTSUPP;
+		if (ops->ndo_set_port_profile)
+			err = ops->ndo_set_port_profile(dev, ipp);
+		if (err < 0)
+			goto errout;
+		modified = 1;
+	}
+	err = 0;
+
 errout:
 	if (err < 0 && modified && net_ratelimit())
 		printk(KERN_WARNING "A link change request failed with "


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [net-next-2.6 PATCH 2/2] add ndo_set_port_profile op support for enic dynamic vnics
  2010-04-28  4:42 [net-next-2.6 PATCH 1/2] Add netdev port-profile support (take III, was iovnl) Scott Feldman
@ 2010-04-28  4:42 ` Scott Feldman
  2010-04-28 13:32   ` Arnd Bergmann
  2010-04-28 13:13 ` [net-next-2.6 PATCH 1/2] Add netdev port-profile support (take III, was iovnl) Arnd Bergmann
  1 sibling, 1 reply; 20+ messages in thread
From: Scott Feldman @ 2010-04-28  4:42 UTC (permalink / raw)
  To: davem; +Cc: netdev, chrisw, arnd

From: Scott Feldman <scofeldm@cisco.com>

Add enic ndo_{set|get}_port_profile op to support setting port-profile for
dynamic vnics.  Enic dynamic vnics are just like normal enic eth vnics except
dynamic vnics require an extra configuration step to assign a port-profile
identifier to the interface before the interface is useable. Once assigned,
link comes up on the interface and is ready for I/O.  The port-profile is
used to configure the network port assigned to the interface.  The network
port configuration includes VLAN membership, QoS policies, and port security
settings typical of a data center network.

Signed-off-by: Scott Feldman <scofeldm@cisco.com>
Signed-off-by: Roopa Prabhu<roprabhu@cisco.com>
---
 drivers/net/enic/Makefile    |    2 -
 drivers/net/enic/enic.h      |    3 +
 drivers/net/enic/enic_main.c |  163 +++++++++++++++++++++++++++++++++++++++---
 drivers/net/enic/vnic_dev.c  |   50 +++++++++++++
 drivers/net/enic/vnic_dev.h  |    3 +
 drivers/net/enic/vnic_vic.c  |   73 +++++++++++++++++++
 drivers/net/enic/vnic_vic.h  |   59 +++++++++++++++
 7 files changed, 338 insertions(+), 15 deletions(-)

diff --git a/drivers/net/enic/Makefile b/drivers/net/enic/Makefile
index 391c3bc..e7b6c31 100644
--- a/drivers/net/enic/Makefile
+++ b/drivers/net/enic/Makefile
@@ -1,5 +1,5 @@
 obj-$(CONFIG_ENIC) := enic.o
 
 enic-y := enic_main.o vnic_cq.o vnic_intr.o vnic_wq.o \
-	enic_res.o vnic_dev.o vnic_rq.o
+	enic_res.o vnic_dev.o vnic_rq.o vnic_vic.o
 
diff --git a/drivers/net/enic/enic.h b/drivers/net/enic/enic.h
index 5fa56f1..b54e9eb 100644
--- a/drivers/net/enic/enic.h
+++ b/drivers/net/enic/enic.h
@@ -34,7 +34,7 @@
 
 #define DRV_NAME		"enic"
 #define DRV_DESCRIPTION		"Cisco VIC Ethernet NIC Driver"
-#define DRV_VERSION		"1.3.1.1"
+#define DRV_VERSION		"1.3.1.1-iov"
 #define DRV_COPYRIGHT		"Copyright 2008-2009 Cisco Systems, Inc"
 #define PFX			DRV_NAME ": "
 
@@ -93,6 +93,7 @@ struct enic {
 	unsigned int mc_count;
 	int csum_rx_enabled;
 	u32 port_mtu;
+	struct ifla_port_profile port_profile;
 	u32 rx_coalesce_usecs;
 	u32 tx_coalesce_usecs;
 
diff --git a/drivers/net/enic/enic_main.c b/drivers/net/enic/enic_main.c
index 1232887..394771d 100644
--- a/drivers/net/enic/enic_main.c
+++ b/drivers/net/enic/enic_main.c
@@ -29,6 +29,7 @@
 #include <linux/etherdevice.h>
 #include <linux/if_ether.h>
 #include <linux/if_vlan.h>
+#include <linux/if_link.h>
 #include <linux/ethtool.h>
 #include <linux/in.h>
 #include <linux/ip.h>
@@ -40,6 +41,7 @@
 #include "vnic_dev.h"
 #include "vnic_intr.h"
 #include "vnic_stats.h"
+#include "vnic_vic.h"
 #include "enic_res.h"
 #include "enic.h"
 
@@ -49,10 +51,12 @@
 #define ENIC_DESC_MAX_SPLITS		(MAX_TSO / WQ_ENET_MAX_DESC_LEN + 1)
 
 #define PCI_DEVICE_ID_CISCO_VIC_ENET         0x0043  /* ethernet vnic */
+#define PCI_DEVICE_ID_CISCO_VIC_ENET_DYN     0x0044  /* enet dynamic vnic */
 
 /* Supported devices */
 static DEFINE_PCI_DEVICE_TABLE(enic_id_table) = {
 	{ PCI_VDEVICE(CISCO, PCI_DEVICE_ID_CISCO_VIC_ENET) },
+	{ PCI_VDEVICE(CISCO, PCI_DEVICE_ID_CISCO_VIC_ENET_DYN) },
 	{ 0, }	/* end of table */
 };
 
@@ -113,6 +117,11 @@ static const struct enic_stat enic_rx_stats[] = {
 static const unsigned int enic_n_tx_stats = ARRAY_SIZE(enic_tx_stats);
 static const unsigned int enic_n_rx_stats = ARRAY_SIZE(enic_rx_stats);
 
+static int enic_is_dynamic(struct enic *enic)
+{
+	return enic->pdev->device == PCI_DEVICE_ID_CISCO_VIC_ENET_DYN;
+}
+
 static int enic_get_settings(struct net_device *netdev,
 	struct ethtool_cmd *ecmd)
 {
@@ -810,14 +819,24 @@ static void enic_reset_mcaddrs(struct enic *enic)
 
 static int enic_set_mac_addr(struct net_device *netdev, char *addr)
 {
-	if (!is_valid_ether_addr(addr))
-		return -EADDRNOTAVAIL;
+	struct enic *enic = netdev_priv(netdev);
 
-	memcpy(netdev->dev_addr, addr, netdev->addr_len);
+	if (enic_is_dynamic(enic)) {
+		random_ether_addr(netdev->dev_addr);
+	} else {
+		if (!is_valid_ether_addr(addr))
+			return -EADDRNOTAVAIL;
+		memcpy(netdev->dev_addr, addr, netdev->addr_len);
+	}
 
 	return 0;
 }
 
+static int enic_set_mac_address(struct net_device *netdev, void *p)
+{
+	return -EOPNOTSUPP;
+}
+
 /* netif_tx_lock held, BHs disabled */
 static void enic_set_multicast_list(struct net_device *netdev)
 {
@@ -922,6 +941,118 @@ static void enic_tx_timeout(struct net_device *netdev)
 	schedule_work(&enic->reset);
 }
 
+static int enic_vnic_dev_deinit(struct enic *enic)
+{
+	int err;
+
+	spin_lock(&enic->devcmd_lock);
+	err = vnic_dev_deinit(enic->vdev);
+	spin_unlock(&enic->devcmd_lock);
+	return err;
+}
+
+static int enic_dev_init_prov(struct enic *enic, struct vic_provinfo *vp)
+{
+	int err;
+
+	spin_lock(&enic->devcmd_lock);
+	err = vnic_dev_init_prov(enic->vdev, (u8 *)vp, vic_provinfo_size(vp));
+	spin_unlock(&enic->devcmd_lock);
+	return err;
+}
+
+static int enic_provinfo_add_tlv_str(struct vic_provinfo *vp, u16 type,
+	u16 max_length, char *str)
+{
+	if (!str)
+		return 0;
+
+	if (strlen(str) + 1 > max_length)
+		return 0;
+
+	return vic_provinfo_add_tlv(vp, type, strlen(str) + 1, str);
+}
+
+static int enic_set_port_profile(struct net_device *netdev,
+	struct ifla_port_profile *ipp)
+{
+	struct enic *enic = netdev_priv(netdev);
+	struct vic_provinfo *vp;
+	u8 oui[3] = VIC_PROVINFO_CISCO_OUI;
+	u8 *mac = ipp->mac;
+	int err;
+
+	memset(&enic->port_profile, 0, sizeof(enic->port_profile));
+
+	if (!enic_is_dynamic(enic))
+		return -EOPNOTSUPP;
+
+	enic_vnic_dev_deinit(enic);
+
+	if (strlen(ipp->port_profile) == 0)
+		return 0;
+
+	if (is_zero_ether_addr(mac))
+		mac = netdev->dev_addr;
+
+	if (!is_valid_ether_addr(mac))
+		return -EADDRNOTAVAIL;
+
+	vp = vic_provinfo_alloc(GFP_KERNEL, oui, VIC_PROVINFO_LINUX_TYPE);
+	if (!vp)
+		return -ENOMEM;
+
+	enic_provinfo_add_tlv_str(vp, VIC_LINUX_PROV_TLV_PORT_PROFILE_NAME_STR,
+		IFLA_PORT_PROFILE_MAX, ipp->port_profile);
+	vic_provinfo_add_tlv(vp, VIC_LINUX_PROV_TLV_CLIENT_MAC_ADDR,
+		ETH_ALEN, mac);
+	enic_provinfo_add_tlv_str(vp, VIC_LINUX_PROV_TLV_HOST_UUID_STR,
+		IFLA_PP_HOST_UUID_MAX, ipp->host_uuid);
+	enic_provinfo_add_tlv_str(vp, VIC_LINUX_PROV_TLV_CLIENT_UUID_STR,
+		IFLA_PP_CLIENT_UUID_MAX, ipp->client_uuid);
+	enic_provinfo_add_tlv_str(vp, VIC_LINUX_PROV_TLV_CLIENT_NAME_STR,
+		IFLA_PP_CLIENT_NAME_MAX, ipp->client_name);
+
+	err = enic_dev_init_prov(enic, vp);
+	if (err)
+		goto err_out;
+
+	enic_set_multicast_list(netdev);
+
+	memcpy(&enic->port_profile, ipp, sizeof(enic->port_profile));
+
+err_out:
+	vic_provinfo_free(vp);
+
+	return err;
+}
+
+static int enic_get_port_profile(struct net_device *netdev,
+	struct ifla_port_profile *ipp)
+{
+	struct enic *enic = netdev_priv(netdev);
+	int done, err, error;
+
+	enic->port_profile.status = IFLA_PORT_PROFILE_STATUS_UNKNOWN;
+
+	spin_lock(&enic->devcmd_lock);
+	err = vnic_dev_init_done(enic->vdev, &done, &error);
+	spin_unlock(&enic->devcmd_lock);
+
+	if (err || error)
+		enic->port_profile.status = IFLA_PORT_PROFILE_STATUS_ERROR;
+
+	if (!done)
+		enic->port_profile.status = IFLA_PORT_PROFILE_STATUS_INPROGRESS;
+
+	if (!error)
+		enic->port_profile.status = IFLA_PORT_PROFILE_STATUS_SUCCESS;
+
+	memcpy(ipp, &enic->port_profile, sizeof(enic->port_profile));
+
+	return 0;
+}
+
 static void enic_free_rq_buf(struct vnic_rq *rq, struct vnic_rq_buf *buf)
 {
 	struct enic *enic = vnic_dev_priv(rq->vdev);
@@ -1440,10 +1571,12 @@ static int enic_open(struct net_device *netdev)
 	for (i = 0; i < enic->rq_count; i++)
 		vnic_rq_enable(&enic->rq[i]);
 
-	spin_lock(&enic->devcmd_lock);
-	enic_add_station_addr(enic);
-	spin_unlock(&enic->devcmd_lock);
-	enic_set_multicast_list(netdev);
+	if (!enic_is_dynamic(enic)) {
+		spin_lock(&enic->devcmd_lock);
+		enic_add_station_addr(enic);
+		spin_unlock(&enic->devcmd_lock);
+		enic_set_multicast_list(netdev);
+	}
 
 	netif_wake_queue(netdev);
 	napi_enable(&enic->napi);
@@ -1780,13 +1913,15 @@ static const struct net_device_ops enic_netdev_ops = {
 	.ndo_start_xmit		= enic_hard_start_xmit,
 	.ndo_get_stats		= enic_get_stats,
 	.ndo_validate_addr	= eth_validate_addr,
-	.ndo_set_mac_address 	= eth_mac_addr,
 	.ndo_set_multicast_list	= enic_set_multicast_list,
+	.ndo_set_mac_address	= enic_set_mac_address,
 	.ndo_change_mtu		= enic_change_mtu,
 	.ndo_vlan_rx_register	= enic_vlan_rx_register,
 	.ndo_vlan_rx_add_vid	= enic_vlan_rx_add_vid,
 	.ndo_vlan_rx_kill_vid	= enic_vlan_rx_kill_vid,
 	.ndo_tx_timeout		= enic_tx_timeout,
+	.ndo_set_port_profile	= enic_set_port_profile,
+	.ndo_get_port_profile	= enic_get_port_profile,
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller	= enic_poll_controller,
 #endif
@@ -2010,11 +2145,13 @@ static int __devinit enic_probe(struct pci_dev *pdev,
 
 	netif_carrier_off(netdev);
 
-	err = vnic_dev_init(enic->vdev, 0);
-	if (err) {
-		printk(KERN_ERR PFX
-			"vNIC dev init failed, aborting.\n");
-		goto err_out_dev_close;
+	if (!enic_is_dynamic(enic)) {
+		err = vnic_dev_init(enic->vdev, 0);
+		if (err) {
+			printk(KERN_ERR PFX
+				"vNIC dev init failed, aborting.\n");
+			goto err_out_dev_close;
+		}
 	}
 
 	err = enic_dev_init(enic);
diff --git a/drivers/net/enic/vnic_dev.c b/drivers/net/enic/vnic_dev.c
index d43a9d4..e351b0f 100644
--- a/drivers/net/enic/vnic_dev.c
+++ b/drivers/net/enic/vnic_dev.c
@@ -682,6 +682,56 @@ int vnic_dev_init(struct vnic_dev *vdev, int arg)
 	return r;
 }
 
+int vnic_dev_init_done(struct vnic_dev *vdev, int *done, int *err)
+{
+	u64 a0 = 0, a1 = 0;
+	int wait = 1000;
+	int ret;
+
+	*done = 0;
+
+	ret = vnic_dev_cmd(vdev, CMD_INIT_STATUS, &a0, &a1, wait);
+	if (ret)
+		return ret;
+
+	*done = (a0 == 0);
+
+	*err = (a0 == 0) ? a1 : 0;
+
+	return 0;
+}
+
+int vnic_dev_init_prov(struct vnic_dev *vdev, u8 *buf, u32 len)
+{
+	u64 a0, a1 = len;
+	int wait = 1000;
+	u64 prov_pa;
+	void *prov_buf;
+	int ret;
+
+	prov_buf = pci_alloc_consistent(vdev->pdev, len, &prov_pa);
+	if (!prov_buf)
+		return -ENOMEM;
+
+	memcpy(prov_buf, buf, len);
+
+	a0 = prov_pa;
+
+	ret = vnic_dev_cmd(vdev, CMD_INIT_PROV_INFO, &a0, &a1, wait);
+
+	pci_free_consistent(vdev->pdev, len, prov_buf, prov_pa);
+
+	return ret;
+}
+
+int vnic_dev_deinit(struct vnic_dev *vdev)
+{
+	u64 a0 = 0, a1 = 0;
+	int wait = 1000;
+
+	return vnic_dev_cmd(vdev, CMD_DEINIT, &a0, &a1, wait);
+}
+
 int vnic_dev_link_status(struct vnic_dev *vdev)
 {
 	if (vdev->linkstatus)
diff --git a/drivers/net/enic/vnic_dev.h b/drivers/net/enic/vnic_dev.h
index f5be640..27f5a5a 100644
--- a/drivers/net/enic/vnic_dev.h
+++ b/drivers/net/enic/vnic_dev.h
@@ -124,6 +124,9 @@ int vnic_dev_disable(struct vnic_dev *vdev);
 int vnic_dev_open(struct vnic_dev *vdev, int arg);
 int vnic_dev_open_done(struct vnic_dev *vdev, int *done);
 int vnic_dev_init(struct vnic_dev *vdev, int arg);
+int vnic_dev_init_done(struct vnic_dev *vdev, int *done, int *err);
+int vnic_dev_init_prov(struct vnic_dev *vdev, u8 *buf, u32 len);
+int vnic_dev_deinit(struct vnic_dev *vdev);
 int vnic_dev_soft_reset(struct vnic_dev *vdev, int arg);
 int vnic_dev_soft_reset_done(struct vnic_dev *vdev, int *done);
 void vnic_dev_set_intr_mode(struct vnic_dev *vdev,
diff --git a/drivers/net/enic/vnic_vic.c b/drivers/net/enic/vnic_vic.c
new file mode 100644
index 0000000..d769772
--- /dev/null
+++ b/drivers/net/enic/vnic_vic.c
@@ -0,0 +1,73 @@
+/*
+ * Copyright 2010 Cisco Systems, Inc.  All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+
+#include "vnic_vic.h"
+
+struct vic_provinfo *vic_provinfo_alloc(gfp_t flags, u8 *oui, u8 type)
+{
+	struct vic_provinfo *vp = kzalloc(VIC_PROVINFO_MAX_DATA, flags);
+
+	if (!vp || !oui)
+		return NULL;
+
+	memcpy(vp->oui, oui, sizeof(vp->oui));
+	vp->type = type;
+	vp->length = htonl(sizeof(vp->num_tlvs));
+
+	return vp;
+}
+
+void vic_provinfo_free(struct vic_provinfo *vp)
+{
+	kfree(vp);
+}
+
+int vic_provinfo_add_tlv(struct vic_provinfo *vp, u16 type, u16 length,
+	void *value)
+{
+	struct vic_provinfo_tlv *tlv;
+
+	if (!vp || !value)
+		return -EINVAL;
+
+	if (ntohl(vp->length) + sizeof(*tlv) + length >
+		VIC_PROVINFO_MAX_TLV_DATA)
+		return -ENOMEM;
+
+	tlv = (struct vic_provinfo_tlv *)((u8 *)vp->tlv +
+		ntohl(vp->length) - sizeof(vp->num_tlvs));
+
+	tlv->type = htons(type);
+	tlv->length = htons(length);
+	memcpy(tlv->value, value, length);
+
+	vp->num_tlvs = htonl(ntohl(vp->num_tlvs) + 1);
+	vp->length = htonl(ntohl(vp->length) + sizeof(*tlv) + length);
+
+	return 0;
+}
+
+size_t vic_provinfo_size(struct vic_provinfo *vp)
+{
+	return vp ?  ntohl(vp->length) + sizeof(*vp) - sizeof(vp->num_tlvs) : 0;
+}
diff --git a/drivers/net/enic/vnic_vic.h b/drivers/net/enic/vnic_vic.h
new file mode 100644
index 0000000..085c2a2
--- /dev/null
+++ b/drivers/net/enic/vnic_vic.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright 2010 Cisco Systems, Inc.  All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef _VNIC_VIC_H_
+#define _VNIC_VIC_H_
+
+/* Note: All integer fields in NETWORK byte order */
+
+/* Note: String field lengths include null char */
+
+#define VIC_PROVINFO_CISCO_OUI		{ 0x00, 0x00, 0x0c }
+#define VIC_PROVINFO_LINUX_TYPE		0x2
+
+enum vic_linux_prov_tlv_type {
+	VIC_LINUX_PROV_TLV_PORT_PROFILE_NAME_STR = 0,
+	VIC_LINUX_PROV_TLV_CLIENT_MAC_ADDR = 1,			/* u8[6] */
+	VIC_LINUX_PROV_TLV_CLIENT_NAME_STR = 2,
+	VIC_LINUX_PROV_TLV_HOST_UUID_STR = 8,
+	VIC_LINUX_PROV_TLV_CLIENT_UUID_STR = 9,
+};
+
+struct vic_provinfo {
+	u8 oui[3];		/* OUI of data provider */
+	u8 type;		/* provider-specific type */
+	u32 length;		/* length of data below */
+	u32 num_tlvs;		/* number of tlvs */
+	struct vic_provinfo_tlv {
+		u16 type;
+		u16 length;
+		u8 value[0];
+	} tlv[0];
+} __attribute__ ((packed));
+
+#define VIC_PROVINFO_MAX_DATA		1385
+#define VIC_PROVINFO_MAX_TLV_DATA (VIC_PROVINFO_MAX_DATA - \
+	sizeof(struct vic_provinfo))
+
+struct vic_provinfo *vic_provinfo_alloc(gfp_t flags, u8 *oui, u8 type);
+void vic_provinfo_free(struct vic_provinfo *vp);
+int vic_provinfo_add_tlv(struct vic_provinfo *vp, u16 type, u16 length,
+	void *value);
+size_t vic_provinfo_size(struct vic_provinfo *vp);
+
+#endif	/* _VNIC_VIC_H_ */


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [net-next-2.6 PATCH 1/2] Add netdev port-profile support (take III, was iovnl)
  2010-04-28  4:42 [net-next-2.6 PATCH 1/2] Add netdev port-profile support (take III, was iovnl) Scott Feldman
  2010-04-28  4:42 ` [net-next-2.6 PATCH 2/2] add ndo_set_port_profile op support for enic dynamic vnics Scott Feldman
@ 2010-04-28 13:13 ` Arnd Bergmann
  2010-04-28 17:51   ` Scott Feldman
  2010-04-28 18:54   ` Scott Feldman
  1 sibling, 2 replies; 20+ messages in thread
From: Arnd Bergmann @ 2010-04-28 13:13 UTC (permalink / raw)
  To: Scott Feldman; +Cc: davem, netdev, chrisw

On Wednesday 28 April 2010, Scott Feldman wrote:
> From: Scott Feldman <scofeldm@cisco.com>
> 
> Add new netdev ops ndo_{set|get}_port_profile to allow setting of port-profile
> on a netdev interface.  Extends RTM_SETLINK/RTM_GETLINK with new sub cmd called
> IFLA_PORT_PROFILE (added to end of IFLA_cmd list).  The port-profile cmd
> arguments are (as seen from modified iproute2 cmdline):
>
>        ip link set DEVICE [ { up | down } ]
>                           [ arp { on | off } ]
>                           [ dynamic { on | off } ]
>                           [ multicast { on | off } ]
>                           ...
>                           [ vf NUM [ mac LLADDR ]
>                                    [ vlan VLANID [ qos VLAN-QOS ] ]
>                                    [ rate TXRATE ] ] 
>                           [ port_profile [ PORT-PROFILE
>                                    [ mac LLADDR ]
>                                    [ host_uuid HOST_UUID ]
>                                    [ client_uuid CLIENT_UUID ]
>                                    [ client_name CLIENT_NAME ] ] ]
>        ip link show [ DEVICE ]

We will need a few more options to cover draft VDP in addition to the protocol
your NIC is using. I still think it's possible to use the same interface
for both, but the differences are obviously showing.

The missing bits that I can see so far are:

- You only have 'get' and 'set'. We will also need a 'unset' or 'delete'
  option in order to get rid of a port profile association.
- VDP has three different ways to 'set' a port profile: 'associate',
  'pre-associate with resource reservation' and 'pre-associate without
  resource reservation'. This could become an extra option flag.
- Instead of a port profile name, VDP specifies a tuple like
   struct vsi_associate {
	unsigned char VSI_Mgr_ID;       /* VSI manager ID */
	unsigned char VSI_Type_ID[3];   /* 24 bit VSI Type ID */
	unsigned char VSI_Type_Version; /* VSI Type version */
   };
   I'm not sure how to deal with that best, but there needs to be
   some parsing of these numbers.
- VDP requires a vlan ID to be part of the association, in addition to
  the MAC address. In theory, we could have multiple tuples of MAC+VLAN
  addresses, but we can probably just associate each tuple separately
  and ignore that part of the standard.
- we have a set of possible error conditions that can be returned by
  the switch (invalid format, insufficient resources, unknown VTID,
  VTID violation, VTID verison violation, out of sync). It should be
  possible to return each of these to the user with 'get'.

> Since we're using netlink sockets, the receiver of the RTM_SETLINK msg can
> be in kernel- or user-space.  For kernel-space recipient, rtnetlink.c, the
> new ndo_set_port_profile netdev op is called to set the port-profile.
> User-space recipients can decide how they propagate the msg to the switch.
> There is also a RTM_GETLINK cmd to to return port-profile setting of an
> interface and to also return the status of the last port-profile.

More on a stylistic note, I'm not convinced that using RTM_SETLINK/GETLINK
is the right interface for this, unlike the 'ip link set DEV vf ...' stuff,
because it seems to suggest that this is an option of the adapter itself.
I actually liked the iovnl family better in this regard, because it kept
the protocols separate.

What I could imagine to unify this is something like

   ip port_profile set DEVICE [ { pre_associate | pre_associate_rr } ]
                              { name PORT-PROFILE | vsi MGR:VTID:VER }
                                    mac LLADDR
				    [ vlan VID ]
                                    [ host_uuid HOST_UUID ]
                                    [ client_uuid CLIENT_UUID ]
                                    [ client_name CLIENT_NAME ]
   ip port_profile del DEVICE [ mac LLADDR [ vlan VID ] ]
   ip port_profile show DEVICE

	Arnd

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [net-next-2.6 PATCH 2/2] add ndo_set_port_profile op support for enic dynamic vnics
  2010-04-28  4:42 ` [net-next-2.6 PATCH 2/2] add ndo_set_port_profile op support for enic dynamic vnics Scott Feldman
@ 2010-04-28 13:32   ` Arnd Bergmann
  2010-04-28 18:39     ` Scott Feldman
  0 siblings, 1 reply; 20+ messages in thread
From: Arnd Bergmann @ 2010-04-28 13:32 UTC (permalink / raw)
  To: Scott Feldman; +Cc: davem, netdev, chrisw

On Wednesday 28 April 2010, Scott Feldman wrote:
> +static int enic_set_port_profile(struct net_device *netdev,
> +       struct ifla_port_profile *ipp)
> +{
> +       struct enic *enic = netdev_priv(netdev);
> +       struct vic_provinfo *vp;
> +       u8 oui[3] = VIC_PROVINFO_CISCO_OUI;
> +       u8 *mac = ipp->mac;
> +       int err;
> +
> +       memset(&enic->port_profile, 0, sizeof(enic->port_profile));
> +
> +       if (!enic_is_dynamic(enic))
> +               return -EOPNOTSUPP;

Not sure I understand how this fits together. You said in an earlier mail:

> > Anything that ties port profiles to VFs seems fundamentally flawed AFAICT,
> > at least when we want to extend this to adapters that don't do it in firmware.
>
> Ya, I tend I agree.  Let's just make port-profile a setting of any netdev,
> an eth, macvtap, eth.x, bond, etc.  That's probably what I should have done
> in the first place.  Something like:

I thought you had meant that we can do the association of attached interfaces
through any interface, rather than tying it to the slave interface. While I'm
not sure I read your code correctly, it seems like you now only talk to the
slave interface, not to the master at all!

At least the check above should be 'if (enic_is_dynamic(enic)) return -EOPNOTSUPP',
not the other way round.
Moreover, if the netdev is the master here, you only allow a single slave, which
is not enough for larger setups (n > 1), though that could be a limitation of your
first version.

Passing just the slave device however would not work in the general case, as I
tried to point out in the mail you replied to. If the slave interface is owned
by a guest using PCI passthrough, or it sits below a stack of nested interfaces
(vlan, bridge, tap, vhost, ...), it's impossible to know what interface is
responsible for setting up the slave. Note that you cannot perform the association
through the slave interface itself because the remote switch would discard any
traffic originating from an unassociated interface.

> +static int enic_get_port_profile(struct net_device *netdev,
> +       struct ifla_port_profile *ipp)
> +{
> +       struct enic *enic = netdev_priv(netdev);
> +       int done, err, error;
> +
> +       enic->port_profile.status = IFLA_PORT_PROFILE_STATUS_UNKNOWN;
> +
> +       spin_lock(&enic->devcmd_lock);
> +       err = vnic_dev_init_done(enic->vdev, &done, &error);
> +       spin_unlock(&enic->devcmd_lock);
> +
> +       if (err || error)
> +               enic->port_profile.status = IFLA_PORT_PROFILE_STATUS_ERROR;
> +
> +       if (!done)
> +               enic->port_profile.status = IFLA_PORT_PROFILE_STATUS_INPROGRESS;
> +
> +       if (!error)
> +               enic->port_profile.status = IFLA_PORT_PROFILE_STATUS_SUCCESS;
> +
> +       memcpy(ipp, &enic->port_profile, sizeof(enic->port_profile));
> +
> +       return 0;
> +}
> +

Similarly, this interface only passes back a single port profile association,
where it should have a way to return all of the slave ports.

	Arnd

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [net-next-2.6 PATCH 1/2] Add netdev port-profile support (take III, was iovnl)
  2010-04-28 13:13 ` [net-next-2.6 PATCH 1/2] Add netdev port-profile support (take III, was iovnl) Arnd Bergmann
@ 2010-04-28 17:51   ` Scott Feldman
  2010-04-28 19:33     ` Arnd Bergmann
  2010-04-28 18:54   ` Scott Feldman
  1 sibling, 1 reply; 20+ messages in thread
From: Scott Feldman @ 2010-04-28 17:51 UTC (permalink / raw)
  To: Arnd Bergmann; +Cc: davem, netdev, chrisw

On 4/28/10 6:13 AM, "Arnd Bergmann" <arnd@arndb.de> wrote:

> On Wednesday 28 April 2010, Scott Feldman wrote:
>>        ip link set DEVICE [ { up | down } ]
>>                           [ arp { on | off } ]
>>                           [ dynamic { on | off } ]
>>                           [ multicast { on | off } ]
>>                           ...
>>                           [ vf NUM [ mac LLADDR ]
>>                                    [ vlan VLANID [ qos VLAN-QOS ] ]
>>                                    [ rate TXRATE ] ]
>>                           [ port_profile [ PORT-PROFILE
>>                                    [ mac LLADDR ]
>>                                    [ host_uuid HOST_UUID ]
>>                                    [ client_uuid CLIENT_UUID ]
>>                                    [ client_name CLIENT_NAME ] ] ]
>>        ip link show [ DEVICE ]
> 
> We will need a few more options to cover draft VDP in addition to the protocol
> your NIC is using. I still think it's possible to use the same interface
> for both, but the differences are obviously showing.
> 
> The missing bits that I can see so far are:
> 
> - You only have 'get' and 'set'. We will also need a 'unset' or 'delete'
>   option in order to get rid of a port profile association.

That's there in my patch.  If you don't specify anything after port-profile
keyword, it's an unset.  See extra "[" and "]" above.  Will that work for
you?

> - VDP has three different ways to 'set' a port profile: 'associate',
>   'pre-associate with resource reservation' and 'pre-associate without
>   resource reservation'. This could become an extra option flag.

Ok, let's add an option flag bit field.  I'm not sure how that looks from
the iproute2 cmd line.  Would you take a stab at defining these?

> - Instead of a port profile name, VDP specifies a tuple like
>    struct vsi_associate {
> unsigned char VSI_Mgr_ID;       /* VSI manager ID */
> unsigned char VSI_Type_ID[3];   /* 24 bit VSI Type ID */
> unsigned char VSI_Type_Version; /* VSI Type version */
>    };
>    I'm not sure how to deal with that best, but there needs to be
>    some parsing of these numbers.

PORT-PROFILE above is a u8* array.  You could decide to encode the tuple in
a string, e.g. "1.12345.1", and let the receiver parse it?  Or pack it in as
binary.  PORT-PROFILE for us is just a string identifier, e.g. "corp-net-10"
or "joes-garage".

> - VDP requires a vlan ID to be part of the association, in addition to
>   the MAC address. In theory, we could have multiple tuples of MAC+VLAN
>   addresses, but we can probably just associate each tuple separately
>   and ignore that part of the standard.

I don't think I have enough information to spec this out for this item.
Would you take a stab at how this would look in the struct and how it would
look from the iproute2 cmd line?  (Note I'm using iproute2 cmd line for
illustrative purposes, but the sender of the msg could be something like
libvirt).

> - we have a set of possible error conditions that can be returned by
>   the switch (invalid format, insufficient resources, unknown VTID,
>   VTID violation, VTID verison violation, out of sync). It should be
>   possible to return each of these to the user with 'get'.

There is a status code in the get cmd as defined in my patch.  I have it as
a u8 with some enum codes.  Can we add to the enum code list?  Or do you
want to return a full string?  Our requirements are we return one of:
{success, error, in-progress}.
 
>> Since we're using netlink sockets, the receiver of the RTM_SETLINK msg can
>> be in kernel- or user-space.  For kernel-space recipient, rtnetlink.c, the
>> new ndo_set_port_profile netdev op is called to set the port-profile.
>> User-space recipients can decide how they propagate the msg to the switch.
>> There is also a RTM_GETLINK cmd to to return port-profile setting of an
>> interface and to also return the status of the last port-profile.
> 
> More on a stylistic note, I'm not convinced that using RTM_SETLINK/GETLINK
> is the right interface for this, unlike the 'ip link set DEV vf ...' stuff,
> because it seems to suggest that this is an option of the adapter itself.
> I actually liked the iovnl family better in this regard, because it kept
> the protocols separate.

Wait a second...I abandoned iovnl and moved to if_link based on suggestions
from you and others.  On 4/21/10 2:13 PM, "Arnd Bergmann" <arnd@arndb.de>
wrote:

   > Right. My preference would probably be make these a subcategory of
   > the if_link, and use the existing RTM_NEWLINK/RTM_DELLINK commands.

My latest with if_link fits best with what we're trying to do with enic.
Note the current interface is per netdev interface (a link), where the
netdev interface could be the adapter itself or any other netdev such as
macvlan, bond, etc.
 
> What I could imagine to unify this is something like
> 
>    ip port_profile set DEVICE [ { pre_associate | pre_associate_rr } ]
>                               { name PORT-PROFILE | vsi MGR:VTID:VER }
>                                     mac LLADDR
>    [ vlan VID ]
>                                     [ host_uuid HOST_UUID ]
>                                     [ client_uuid CLIENT_UUID ]
>                                     [ client_name CLIENT_NAME ]
>    ip port_profile del DEVICE [ mac LLADDR [ vlan VID ] ]
>    ip port_profile show DEVICE

If we want to break port_profile out into it's own ip cmd, I'm ok with that.
What you have above would work for enic.  The netdev would have these ops:

    ndo_set_port_profile
    ndo_get_port_profile
    ndo_del_port_profile

Sounds OK?

-scott


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [net-next-2.6 PATCH 2/2] add ndo_set_port_profile op support for enic dynamic vnics
  2010-04-28 13:32   ` Arnd Bergmann
@ 2010-04-28 18:39     ` Scott Feldman
  2010-04-28 19:16       ` Arnd Bergmann
  0 siblings, 1 reply; 20+ messages in thread
From: Scott Feldman @ 2010-04-28 18:39 UTC (permalink / raw)
  To: Arnd Bergmann; +Cc: davem, netdev, chrisw

On 4/28/10 6:32 AM, "Arnd Bergmann" <arnd@arndb.de> wrote:

> On Wednesday 28 April 2010, Scott Feldman wrote:
>> +static int enic_set_port_profile(struct net_device *netdev,
>> +       struct ifla_port_profile *ipp)
>> +{
>> +       struct enic *enic = netdev_priv(netdev);
>> +       struct vic_provinfo *vp;
>> +       u8 oui[3] = VIC_PROVINFO_CISCO_OUI;
>> +       u8 *mac = ipp->mac;
>> +       int err;
>> +
>> +       memset(&enic->port_profile, 0, sizeof(enic->port_profile));
>> +
>> +       if (!enic_is_dynamic(enic))
>> +               return -EOPNOTSUPP;
> 
> Not sure I understand how this fits together. You said in an earlier mail:
> 
>>> Anything that ties port profiles to VFs seems fundamentally flawed AFAICT,
>>> at least when we want to extend this to adapters that don't do it in
>>> firmware.
>> 
>> Ya, I tend I agree.  Let's just make port-profile a setting of any netdev,
>> an eth, macvtap, eth.x, bond, etc.  That's probably what I should have done
>> in the first place.  Something like:
> 
> I thought you had meant that we can do the association of attached interfaces
> through any interface, rather than tying it to the slave interface. While I'm
> not sure I read your code correctly, it seems like you now only talk to the
> slave interface, not to the master at all!
> 
> At least the check above should be 'if (enic_is_dynamic(enic)) return
> -EOPNOTSUPP', not the other way round.
> Moreover, if the netdev is the master here, you only allow a single slave,
> which is not enough for larger setups (n > 1), though that could be a
> limitation of your first version.

The code is correct.  I probably confused you with earlier patches trying to
accommodate master/slave devices and you might have assumed enic was such a
device.  But it's not.  For enic, there are two device IDs, let's call one
"static" and the other "dynamic".  The only difference between the two is
static enics load up fully ready to go just like a normal nic, whereas
dynamic enics load up but can't yet pass traffic because they're not
"plugged in" to the network.  To plug them in, you need to associate a
port-profile.  The physical analogy is this: server admin tells network
admin: plug my nic into a switch port with these characteristics.  Here, the
port-profile describes those switch port characteristics.  Now, there is no
master/slave relationship between static and dynamic enics.  There could be
with a simple firmware update, but it's not there today.  Also, I want to
point out that a single phys Cisco nic can be provisioned to expose many
static and/or many dynamic enics to the host.  On the order of 100s.  The
code above is to block port-profile association on static enics.  Static
enics where already provisioned on the network when created so there is no
need for a port-profile push from the host.

> Passing just the slave device however would not work in the general case, as I
> tried to point out in the mail you replied to. If the slave interface is owned
> by a guest using PCI passthrough, or it sits below a stack of nested
> interfaces
> (vlan, bridge, tap, vhost, ...), it's impossible to know what interface is
> responsible for setting up the slave.

For port-profile, we want to pass the device that is to be "plugged-in" to
the network based on port-profile association.  This is the device that
gives basic connectivity to the guest interface, regardless of how the guest
interface is wired to the device.  It could be direct PCI pass-thru, macvtap
stack, some yet-to-be-invented kernel-bypass stack, etc.

> Note that you cannot perform the association
> through the slave interface itself because the remote switch would discard any
> traffic originating from an unassociated interface.

That's not a limitation of our device/switch.

-scott

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [net-next-2.6 PATCH 1/2] Add netdev port-profile support (take III, was iovnl)
  2010-04-28 13:13 ` [net-next-2.6 PATCH 1/2] Add netdev port-profile support (take III, was iovnl) Arnd Bergmann
  2010-04-28 17:51   ` Scott Feldman
@ 2010-04-28 18:54   ` Scott Feldman
  2010-04-28 19:37     ` Arnd Bergmann
  1 sibling, 1 reply; 20+ messages in thread
From: Scott Feldman @ 2010-04-28 18:54 UTC (permalink / raw)
  To: Arnd Bergmann; +Cc: davem, netdev, chrisw

On 4/28/10 6:13 AM, "Arnd Bergmann" <arnd@arndb.de> wrote:

> What I could imagine to unify this is something like
> 
>    ip port_profile set DEVICE [ { pre_associate | pre_associate_rr } ]
>                               { name PORT-PROFILE | vsi MGR:VTID:VER }
>                                     mac LLADDR
>    [ vlan VID ]
>                                     [ host_uuid HOST_UUID ]
>                                     [ client_uuid CLIENT_UUID ]
>                                     [ client_name CLIENT_NAME ]
>    ip port_profile del DEVICE [ mac LLADDR [ vlan VID ] ]
>    ip port_profile show DEVICE

Arnd, can someone test this with VDP today?  I don't have access to that
equipment so it's difficult for me to fully test the unified patch.  I can
test the previous patch with enic easily because I have access to production
systems.  I'd like to make sure someone can test this with VDP before I
respin the patch one more time.

-scott


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [net-next-2.6 PATCH 2/2] add ndo_set_port_profile op support for enic dynamic vnics
  2010-04-28 18:39     ` Scott Feldman
@ 2010-04-28 19:16       ` Arnd Bergmann
  2010-04-28 22:38         ` Scott Feldman
  0 siblings, 1 reply; 20+ messages in thread
From: Arnd Bergmann @ 2010-04-28 19:16 UTC (permalink / raw)
  To: Scott Feldman; +Cc: davem, netdev, chrisw

On Wednesday 28 April 2010, Scott Feldman wrote:
> > I thought you had meant that we can do the association of attached interfaces
> > through any interface, rather than tying it to the slave interface. While I'm
> > not sure I read your code correctly, it seems like you now only talk to the
> > slave interface, not to the master at all!
> > 
> > At least the check above should be 'if (enic_is_dynamic(enic)) return
> > -EOPNOTSUPP', not the other way round.
> > Moreover, if the netdev is the master here, you only allow a single slave,
> > which is not enough for larger setups (n > 1), though that could be a
> > limitation of your first version.
> 
> The code is correct.  I probably confused you with earlier patches trying to
> accommodate master/slave devices and you might have assumed enic was such a
> device.  But it's not.  For enic, there are two device IDs, let's call one
> "static" and the other "dynamic".  The only difference between the two is
> static enics load up fully ready to go just like a normal nic, whereas
> dynamic enics load up but can't yet pass traffic because they're not
> "plugged in" to the network.  To plug them in, you need to associate a
> port-profile.  The physical analogy is this: server admin tells network
> admin: plug my nic into a switch port with these characteristics.  Here, the
> port-profile describes those switch port characteristics.  Now, there is no
> master/slave relationship between static and dynamic enics.  There could be
> with a simple firmware update, but it's not there today.  Also, I want to
> point out that a single phys Cisco nic can be provisioned to expose many
> static and/or many dynamic enics to the host.  On the order of 100s.  The
> code above is to block port-profile association on static enics.  Static
> enics where already provisioned on the network when created so there is no
> need for a port-profile push from the host.

Ok, I see. I had asked this before, but never got a definite reply on this.

> > Passing just the slave device however would not work in the general case, as I
> > tried to point out in the mail you replied to. If the slave interface is owned
> > by a guest using PCI passthrough, or it sits below a stack of nested
> > interfaces
> > (vlan, bridge, tap, vhost, ...), it's impossible to know what interface is
> > responsible for setting up the slave.
> 
> For port-profile, we want to pass the device that is to be "plugged-in" to
> the network based on port-profile association.  This is the device that
> gives basic connectivity to the guest interface, regardless of how the guest
> interface is wired to the device.  It could be direct PCI pass-thru, macvtap
> stack, some yet-to-be-invented kernel-bypass stack, etc.

But if the device is already passed to the guest using pass-thru or containers,
you would no longer to query or change the port profile, because it is no
longer visible in the host, right?

> > Note that you cannot perform the association
> > through the slave interface itself because the remote switch would discard any
> > traffic originating from an unassociated interface.
> 
> That's not a limitation of our device/switch.

This seems to contradict what you write above, at least when you drop the
assumption that the protocol is implemented in the NIC firmware.
The switch obviously does not care about the interface name in Linux or
any of its data structures. What it cares about instead is the traffic on
the wire and which of its ports this takes place on.

When you create a new dynamic enic device or a macvtap port but not assoicate
it, the switch cannot allow this device to send or receive any traffic itself,
as you write above (not 'plugged in'). The application (or firmware, for that
matter) therefore needs to talk to the switch over an interface that is already
associated. With VDP, this is the base device that a VEPA port is created from,
i.e. the one that talks LLDP to the switch, i.e. the one that comes up at boot
time when you have no virtualization and plug into a dumb switch.

I assumed that this was a specific PF in your NIC,  but it now sounds like it
could be an internal device that is only visible in your firmware and not exposed
as a network interface in Linux, right?

Your firmware can obviously find out the right communication channel for
a associating a dynamic interface with the switch, but when this is implemnted
in software, we cannot generally know that and rely on getting access to the
interface that lets us talk to the switch. The information which interface
is getting associated however is completely useless to an implementation like
this.

	Arnd

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [net-next-2.6 PATCH 1/2] Add netdev port-profile support (take III, was iovnl)
  2010-04-28 17:51   ` Scott Feldman
@ 2010-04-28 19:33     ` Arnd Bergmann
  0 siblings, 0 replies; 20+ messages in thread
From: Arnd Bergmann @ 2010-04-28 19:33 UTC (permalink / raw)
  To: Scott Feldman; +Cc: davem, netdev, chrisw

On Wednesday 28 April 2010, Scott Feldman wrote:
> On 4/28/10 6:13 AM, "Arnd Bergmann" <arnd@arndb.de> wrote:
> > 
> > We will need a few more options to cover draft VDP in addition to the protocol
> > your NIC is using. I still think it's possible to use the same interface
> > for both, but the differences are obviously showing.
> > 
> > The missing bits that I can see so far are:
> > 
> > - You only have 'get' and 'set'. We will also need a 'unset' or 'delete'
> >   option in order to get rid of a port profile association.
> 
> That's there in my patch.  If you don't specify anything after port-profile
> keyword, it's an unset.  See extra "[" and "]" above.  Will that work for
> you?

Well, this won't work if you want to have multiple slave interfaces connected
to a single master and record the port profile in the master.

> > - VDP has three different ways to 'set' a port profile: 'associate',
> >   'pre-associate with resource reservation' and 'pre-associate without
> >   resource reservation'. This could become an extra option flag.
> 
> Ok, let's add an option flag bit field.  I'm not sure how that looks from
> the iproute2 cmd line.  Would you take a stab at defining these?
> 
> > - Instead of a port profile name, VDP specifies a tuple like
> >    struct vsi_associate {
> > unsigned char VSI_Mgr_ID;       /* VSI manager ID */
> > unsigned char VSI_Type_ID[3];   /* 24 bit VSI Type ID */
> > unsigned char VSI_Type_Version; /* VSI Type version */
> >    };
> >    I'm not sure how to deal with that best, but there needs to be
> >    some parsing of these numbers.
> 
> PORT-PROFILE above is a u8* array.  You could decide to encode the tuple in
> a string, e.g. "1.12345.1", and let the receiver parse it?  Or pack it in as
> binary.  PORT-PROFILE for us is just a string identifier, e.g. "corp-net-10"
> or "joes-garage".

I guess either one would work, but I'd prefer to do the parsing at
the front-end and passing it as binary to avoid libvirt encoding
it as ascii and lldpad (or the kernel) parsing the data again, which
would be more error-prone.

Another alternative would be to make this a distinct argument (or even
three of them) and only allow passing one or the other. That would
also implicitly choose the protocol (VDP or port extender).
 
> > - VDP requires a vlan ID to be part of the association, in addition to
> >   the MAC address. In theory, we could have multiple tuples of MAC+VLAN
> >   addresses, but we can probably just associate each tuple separately
> >   and ignore that part of the standard.
> 
> I don't think I have enough information to spec this out for this item.
> Would you take a stab at how this would look in the struct and how it would
> look from the iproute2 cmd line?  (Note I'm using iproute2 cmd line for
> illustrative purposes, but the sender of the msg could be something like
> libvirt).

Just adding the VID would be done I wrote at the end of my last mail.
Adding multiple VLAN/MAC pairs is probably not necessary if we can
do multiple associations.

> > - we have a set of possible error conditions that can be returned by
> >   the switch (invalid format, insufficient resources, unknown VTID,
> >   VTID violation, VTID verison violation, out of sync). It should be
> >   possible to return each of these to the user with 'get'.
> 
> There is a status code in the get cmd as defined in my patch.  I have it as
> a u8 with some enum codes.  Can we add to the enum code list?  Or do you
> want to return a full string?  Our requirements are we return one of:
> {success, error, in-progress}.

enum is fine, but I think it would be good to use the same numbers
as the VDP standard where possible. Maybe we could use two bytes,
the first one for the overall status (success, error, in progress)
and the second one for the specific error (as above)?

> >> Since we're using netlink sockets, the receiver of the RTM_SETLINK msg can
> >> be in kernel- or user-space.  For kernel-space recipient, rtnetlink.c, the
> >> new ndo_set_port_profile netdev op is called to set the port-profile.
> >> User-space recipients can decide how they propagate the msg to the switch.
> >> There is also a RTM_GETLINK cmd to to return port-profile setting of an
> >> interface and to also return the status of the last port-profile.
> > 
> > More on a stylistic note, I'm not convinced that using RTM_SETLINK/GETLINK
> > is the right interface for this, unlike the 'ip link set DEV vf ...' stuff,
> > because it seems to suggest that this is an option of the adapter itself.
> > I actually liked the iovnl family better in this regard, because it kept
> > the protocols separate.
> 
> Wait a second...I abandoned iovnl and moved to if_link based on suggestions
> from you and others.  On 4/21/10 2:13 PM, "Arnd Bergmann" <arnd@arndb.de>
> wrote:
>
>    > Right. My preference would probably be make these a subcategory of
>    > the if_link, and use the existing RTM_NEWLINK/RTM_DELLINK commands.
> 
> My latest with if_link fits best with what we're trying to do with enic.

Sorry for the misunderstanding, I was probably not clear enough. This
was a side-discussion about how we should create and destroy virtual
interfaces on IOV capable adapters. My point was that this should be
_separate_ from the port profile association. For creating a
macvlan/macvtap device, we already use RTM_NEWLINK, which does not
associate the device with the switch, so I suggested that for creating
a virtual interface with hardware support we should do the same thing,
but leave the association somewhere else. iovnl sounds like a good
place for that.

> Note the current interface is per netdev interface (a link), where the
> netdev interface could be the adapter itself or any other netdev such as
> macvlan, bond, etc.

We certainly missed each others arguments here, see my other mail.
In order to do the port profile association from software, we
definitely need the master device (nic, PF, bond, bridge, ...) so
we have a way to communicate to the switch, not the slave device
(VF, tap, macvtap, ...) that we are trying to associate.

> > What I could imagine to unify this is something like
> > 
> >    ip port_profile set DEVICE [ { pre_associate | pre_associate_rr } ]
> >                               { name PORT-PROFILE | vsi MGR:VTID:VER }
> >                                     mac LLADDR
> >    [ vlan VID ]
> >                                     [ host_uuid HOST_UUID ]
> >                                     [ client_uuid CLIENT_UUID ]
> >                                     [ client_name CLIENT_NAME ]
> >    ip port_profile del DEVICE [ mac LLADDR [ vlan VID ] ]
> >    ip port_profile show DEVICE
> 
> If we want to break port_profile out into it's own ip cmd, I'm ok with that.
> What you have above would work for enic.  The netdev would have these ops:
> 
>     ndo_set_port_profile
>     ndo_get_port_profile
>     ndo_del_port_profile
> 
> Sounds OK?

That sounds good.

	Arnd

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [net-next-2.6 PATCH 1/2] Add netdev port-profile support (take III, was iovnl)
  2010-04-28 18:54   ` Scott Feldman
@ 2010-04-28 19:37     ` Arnd Bergmann
  0 siblings, 0 replies; 20+ messages in thread
From: Arnd Bergmann @ 2010-04-28 19:37 UTC (permalink / raw)
  To: Scott Feldman; +Cc: davem, netdev, chrisw, Jens Osterkamp

On Wednesday 28 April 2010, Scott Feldman wrote:
> On 4/28/10 6:13 AM, "Arnd Bergmann" <arnd@arndb.de> wrote:
> 
> > What I could imagine to unify this is something like
> > 
> >    ip port_profile set DEVICE [ { pre_associate | pre_associate_rr } ]
> >                               { name PORT-PROFILE | vsi MGR:VTID:VER }
> >                                     mac LLADDR
> >    [ vlan VID ]
> >                                     [ host_uuid HOST_UUID ]
> >                                     [ client_uuid CLIENT_UUID ]
> >                                     [ client_name CLIENT_NAME ]
> >    ip port_profile del DEVICE [ mac LLADDR [ vlan VID ] ]
> >    ip port_profile show DEVICE
> 
> Arnd, can someone test this with VDP today?  I don't have access to that
> equipment so it's difficult for me to fully test the unified patch.  I can
> test the previous patch with enic easily because I have access to production
> systems.  I'd like to make sure someone can test this with VDP before I
> respin the patch one more time.

Sorry, but I don't have access to production hardware at this time. Jens wants to
implement both sides so we can test this in simulation mode, but it's not done
yet.

	Arnd

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [net-next-2.6 PATCH 2/2] add ndo_set_port_profile op support for enic dynamic vnics
  2010-04-28 19:16       ` Arnd Bergmann
@ 2010-04-28 22:38         ` Scott Feldman
  2010-04-29 12:27           ` Arnd Bergmann
  0 siblings, 1 reply; 20+ messages in thread
From: Scott Feldman @ 2010-04-28 22:38 UTC (permalink / raw)
  To: Arnd Bergmann; +Cc: davem, netdev, chrisw

On 4/28/10 12:16 PM, "Arnd Bergmann" <arnd@arndb.de> wrote:

> On Wednesday 28 April 2010, Scott Feldman wrote:

>>> Passing just the slave device however would not work in the general case, as
>>> I tried to point out in the mail you replied to. If the slave interface is
>>> owned by a guest using PCI passthrough, or it sits below a stack of nested
>>> interfaces (vlan, bridge, tap, vhost, ...), it's impossible to know what
>>> interface is responsible for setting up the slave.
>> 
>> For port-profile, we want to pass the device that is to be "plugged-in" to
>> the network based on port-profile association.  This is the device that
>> gives basic connectivity to the guest interface, regardless of how the guest
>> interface is wired to the device.  It could be direct PCI pass-thru, macvtap
>> stack, some yet-to-be-invented kernel-bypass stack, etc.
> 
> But if the device is already passed to the guest using pass-thru or
> containers, you would no longer to query or change the port profile,
> because it is no longer visible in the host, right?

Drats, I made a mistake here.  You're right, in the pass-thru case the host
lost control of the device, so we need another device to proxy the
port-profile for the pass-thru device.  I had this in the second patch
submission where I was trying to extend the SR-IOV if_link cmds to included
port-profile, but that got mired down in the VF discussions.
  
>>> Note that you cannot perform the association
>>> through the slave interface itself because the remote switch would discard
>>> any traffic originating from an unassociated interface.
>> 
>> That's not a limitation of our device/switch.
> 
> This seems to contradict what you write above, at least when you drop the
> assumption that the protocol is implemented in the NIC firmware.
> The switch obviously does not care about the interface name in Linux or
> any of its data structures. What it cares about instead is the traffic on
> the wire and which of its ports this takes place on.
> 
> When you create a new dynamic enic device or a macvtap port but not assoicate
> it, the switch cannot allow this device to send or receive any traffic itself,
> as you write above (not 'plugged in'). The application (or firmware, for that
> matter) therefore needs to talk to the switch over an interface that is
> already
> associated. With VDP, this is the base device that a VEPA port is created
> from,
> i.e. the one that talks LLDP to the switch, i.e. the one that comes up at boot
> time when you have no virtualization and plug into a dumb switch.
> 
> I assumed that this was a specific PF in your NIC,  but it now sounds like it
> could be an internal device that is only visible in your firmware and not
> exposed
> as a network interface in Linux, right?

Yes, that's right.  Without going into implementation details, assume any
enic has firmware with private mgmt channel to switch to do the equivalent
of your base device->LLDP->switch.

> Your firmware can obviously find out the right communication channel for
> a associating a dynamic interface with the switch, but when this is implemnted
> in software, we cannot generally know that and rely on getting access to the
> interface that lets us talk to the switch. The information which interface
> is getting associated however is completely useless to an implementation like
> this.

So we're kind of back to where we were with iovnl.  We need to specify both
devices, the base device that has access to the switch and the target device
to associate the port-profile with.  Something like:

   ip port_profile set DEVICE [ base DEVICE ] [ { pre_associate |
                                                  pre_associate_rr } ]
                              { name PORT-PROFILE | vsi MGR:VTID:VER }
                              mac LLADDR
                              [ vlan VID ]
                              [ host_uuid HOST_UUID ]
                              [ client_uuid CLIENT_UUID ]
                              [ client_name CLIENT_NAME ]
   ip port_profile del DEVICE [ base DEVICE ] [ mac LLADDR [ vlan VID ] ]
   ip port_profile show DEVICE [ base DEVICE ]

The netdev ops are (when netlink msg handled in kernel):

    ndo_set_port_profile(netdev *target, ...)
    ndo_get_port_profile(netdev *target, ...)
    ndo_del_port_profile(netdev *target, ...)

Base device is optional.  If base device is not given, then target device
gets netdev ops.  If base device is given, then base device gets netdev ops
and *target refers to target device.  This covers the following cases:

1. Current enic where base == target since target can communicate directly
with switch to associate port-profile.  This will not work for the enic
pass-thru case as noted earlier.  We get:

    ip port_profile set eth0 name joes-garage ...

And

    eth0:ndo_set_port_profile(NULL, ...)

2. Future enic for pass-thru case where base != target.  We get:

    ip port_profile set eth1 base eth0 name joes-garage ...

And

    eth0:ndi_set_port_profile(eth1, ...)

3. Future VEPA, we get:

    ip port_profile set eth11 base eth10 vsi 1:23456:7

And (here netlink msg handled in user-space):

    VDP msg sent on eth10 to set port-profile on eth11 using vis tuple
    

Does this work?  I want to get agreement before coding up patch attempt #4.

-scott



^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [net-next-2.6 PATCH 2/2] add ndo_set_port_profile op support for enic dynamic vnics
  2010-04-28 22:38         ` Scott Feldman
@ 2010-04-29 12:27           ` Arnd Bergmann
  2010-04-29 14:32             ` Scott Feldman
  0 siblings, 1 reply; 20+ messages in thread
From: Arnd Bergmann @ 2010-04-29 12:27 UTC (permalink / raw)
  To: Scott Feldman; +Cc: davem, netdev, chrisw, Jens Osterkamp

On Thursday 29 April 2010, Scott Feldman wrote:
> On 4/28/10 12:16 PM, "Arnd Bergmann" <arnd@arndb.de> wrote:
> > On Wednesday 28 April 2010, Scott Feldman wrote:
> > 
> > But if the device is already passed to the guest using pass-thru or
> > containers, you would no longer to query or change the port profile,
> > because it is no longer visible in the host, right?
> 
> Drats, I made a mistake here.  You're right, in the pass-thru case the host
> lost control of the device, so we need another device to proxy the
> port-profile for the pass-thru device.  I had this in the second patch
> submission where I was trying to extend the SR-IOV if_link cmds to included
> port-profile, but that got mired down in the VF discussions.

ok
 
> > I assumed that this was a specific PF in your NIC,  but it now sounds like it
> > could be an internal device that is only visible in your firmware and not
> > exposed
> > as a network interface in Linux, right?
> 
> Yes, that's right.  Without going into implementation details, assume any
> enic has firmware with private mgmt channel to switch to do the equivalent
> of your base device->LLDP->switch.

Ok, now it all makes a *lot* more sense, thanks for the clarification!

For my curiosity, can you point to any documentation about what's actually
going on on the wire? Is it possible or planned to implement the same
protocol in Linux so you can do it with Cisco switches and cheap non-IOV
NICs?

> > Your firmware can obviously find out the right communication channel for
> > a associating a dynamic interface with the switch, but when this is implemnted
> > in software, we cannot generally know that and rely on getting access to the
> > interface that lets us talk to the switch. The information which interface
> > is getting associated however is completely useless to an implementation like
> > this.
> 
> So we're kind of back to where we were with iovnl.  We need to specify both
> devices, the base device that has access to the switch and the target device
> to associate the port-profile with.  Something like:
> 
>    ip port_profile set DEVICE [ base DEVICE ] [ { pre_associate |
>                                                   pre_associate_rr } ]
>                               { name PORT-PROFILE | vsi MGR:VTID:VER }
>                               mac LLADDR
>                               [ vlan VID ]
>                               [ host_uuid HOST_UUID ]
>                               [ client_uuid CLIENT_UUID ]
>                               [ client_name CLIENT_NAME ]
>    ip port_profile del DEVICE [ base DEVICE ] [ mac LLADDR [ vlan VID ] ]
>    ip port_profile show DEVICE [ base DEVICE ]
> 
> The netdev ops are (when netlink msg handled in kernel):
> 
>     ndo_set_port_profile(netdev *target, ...)
>     ndo_get_port_profile(netdev *target, ...)
>     ndo_del_port_profile(netdev *target, ...)
> 
> Base device is optional.  If base device is not given, then target device
> gets netdev ops.  If base device is given, then base device gets netdev ops
> and *target refers to target device.  This covers the following cases:

A bit more complicated than I had hoped for, but it sounds like the only
option that covers all corner cases so far.

> 1. Current enic where base == target since target can communicate directly
> with switch to associate port-profile.  This will not work for the enic
> pass-thru case as noted earlier.  We get:
> 
>     ip port_profile set eth0 name joes-garage ...
>
> And
> 
>     eth0:ndo_set_port_profile(NULL, ...)

Yes.
 
> 2. Future enic for pass-thru case where base != target.  We get:
> 
>     ip port_profile set eth1 base eth0 name joes-garage ...
> 
> And
> 
>     eth0:ndi_set_port_profile(eth1, ...)

Is eth1 the static device and eth0 the dynamic device in this scenario
or the other way round?

Wouldn't you still require access to both devices from the host root
network namespace here or do you just ignore the identifier for the
dynamic device here?
 
> 3. Future VEPA, we get:
> 
>     ip port_profile set eth11 base eth10 vsi 1:23456:7
> 
> And (here netlink msg handled in user-space):
> 
>     VDP msg sent on eth10 to set port-profile on eth11 using vis tuple
     
Yes. I'd prefer still requiring to pass the mac and vlan addresses in this
case, but seems workable.

> Does this work?  I want to get agreement before coding up patch attempt #4.

Seems ok for all I can see at this point, other than the complexity
that results from doing two network protocols through a single netlink
protocol. Maybe Jens and Chris can comment some more on this.

	Arnd

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [net-next-2.6 PATCH 2/2] add ndo_set_port_profile op support for enic dynamic vnics
  2010-04-29 12:27           ` Arnd Bergmann
@ 2010-04-29 14:32             ` Scott Feldman
  2010-04-29 15:48               ` Arnd Bergmann
  0 siblings, 1 reply; 20+ messages in thread
From: Scott Feldman @ 2010-04-29 14:32 UTC (permalink / raw)
  To: Arnd Bergmann; +Cc: davem, netdev, chrisw, Jens Osterkamp

On 4/29/10 5:27 AM, "Arnd Bergmann" <arnd@arndb.de> wrote:

> On Thursday 29 April 2010, Scott Feldman wrote:
>> Yes, that's right.  Without going into implementation details, assume any
>> enic has firmware with private mgmt channel to switch to do the equivalent
>> of your base device->LLDP->switch.
> 
> Ok, now it all makes a *lot* more sense, thanks for the clarification!
> 
> For my curiosity, can you point to any documentation about what's actually
> going on on the wire?

I don't believe those links are available at this time.

> Is it possible or planned to implement the same protocol in Linux so you
> can do it with Cisco switches and cheap non-IOV NICs?

That seems very possible from a technical standpoint.  I don't think the
port-profile netlink API we're specing out excludes that option.

>>    ip port_profile set DEVICE [ base DEVICE ] [ { pre_associate |
>>                                                   pre_associate_rr } ]
>>                               { name PORT-PROFILE | vsi MGR:VTID:VER }

BTW, I was meaning to ask: is there a way to role the vsi tuple and the
flags up into a single identifier, say a string like PORT-PROFILE?  I'm
asking because it seems awkward from an admin's perspective to know how to
construct a vsi tuple or to know what pre_associate_rr means. I have to
admit I didn't fully grok what pre_associate_rr means myself.  Even if there
was a simple local database to map named port-profiles to the underlying
{vsi tuple, flags}, that would bring us closer to a more consistent user
interface.  Is this possible?

>> 2. Future enic for pass-thru case where base != target.  We get:
>> 
>>     ip port_profile set eth1 base eth0 name joes-garage ...
>> 
>> And
>> 
>>     eth0:ndi_set_port_profile(eth1, ...)
> 
> Is eth1 the static device and eth0 the dynamic device in this scenario
> or the other way round?

eth0 is the static and eth1 is the dynamic.  So eth0 is the base device.
(The PF in SR-IOV parlance).

> Wouldn't you still require access to both devices from the host root
> network namespace here or do you just ignore the identifier for the
> dynamic device here?

The dynamic device is the one to apply the port-profile to (we'll, I should
say to apply to the dynamic's devices switch port).  So we need the dynamic
device identified.

>> 3. Future VEPA, we get:
>> 
>>     ip port_profile set eth11 base eth10 vsi 1:23456:7
>> 
>> And (here netlink msg handled in user-space):
>> 
>>     VDP msg sent on eth10 to set port-profile on eth11 using vis tuple
>      
> Yes. I'd prefer still requiring to pass the mac and vlan addresses in this
> case, but seems workable.

Sorry, I forgot the "..." in this use-case.  I didn't mean to exclude the
other params in the example cmd line.

>> Does this work?  I want to get agreement before coding up patch attempt #4.
> 
> Seems ok for all I can see at this point, other than the complexity
> that results from doing two network protocols through a single netlink
> protocol. Maybe Jens and Chris can comment some more on this.

Ok, thanks Arnd.  I'll start coding this up now, hedging that the design is
set before hearing back from Jens/Chris.

-scott

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [net-next-2.6 PATCH 2/2] add ndo_set_port_profile op support for enic dynamic vnics
  2010-04-29 14:32             ` Scott Feldman
@ 2010-04-29 15:48               ` Arnd Bergmann
  2010-04-29 16:31                 ` Scott Feldman
                                   ` (2 more replies)
  0 siblings, 3 replies; 20+ messages in thread
From: Arnd Bergmann @ 2010-04-29 15:48 UTC (permalink / raw)
  To: Scott Feldman; +Cc: davem, netdev, chrisw, Jens Osterkamp

On Thursday 29 April 2010, Scott Feldman wrote:
> On 4/29/10 5:27 AM, "Arnd Bergmann" <arnd@arndb.de> wrote:
> 
> I don't believe those links are available at this time.
> 
> > Is it possible or planned to implement the same protocol in Linux so you
> > can do it with Cisco switches and cheap non-IOV NICs?
> 
> That seems very possible from a technical standpoint.  I don't think the
> port-profile netlink API we're specing out excludes that option.

Ok, good.

> >>    ip port_profile set DEVICE [ base DEVICE ] [ { pre_associate |
> >>                                                   pre_associate_rr } ]
> >>                               { name PORT-PROFILE | vsi MGR:VTID:VER }
> 
> BTW, I was meaning to ask: is there a way to role the vsi tuple and the
> flags up into a single identifier, say a string like PORT-PROFILE?  I'm
> asking because it seems awkward from an admin's perspective to know how to
> construct a vsi tuple or to know what pre_associate_rr means. I have to
> admit I didn't fully grok what pre_associate_rr means myself.  Even if there
> was a simple local database to map named port-profiles to the underlying
> {vsi tuple, flags}, that would bring us closer to a more consistent user
> interface.  Is this possible?

I think that's technically possible but may not be helpful to make the
user interface easier. Some background on pre-associate:

The purpose of this is to assist guest migration. A single VSI (i.e. guest
network adapter) may only be connected to a single switch port at any
given time. The VSI is identified by its UUID and it has a unique
MAC address.

When migrating a guest to a new hypervisor, we need to ask the switch
to associate that VSI at the destination switch port (which may or may
not be on the same different switch as the source port). This operation
may fail for a number of reasons and can take some time. Since we want
migration to alway succeed and take as little time as possible, we
do a pre-associate-with-resource-reservation before the migration and
only start the actual guest migration if that completes successfully.

After a successful pre-associate-with-resource-reservation step, we
know that the actual associate step will be both fast and successful.
After it completes, the VSI is known to be on the destination
and all traffic goes there (replacing the gratuitous ARP method we do
today).

I don't think we'd ever do a pre-associate without the
resource-reservation, but the standard defines both. In theory,
we could do a pre-associate at every switch in the data center
in order to find out if it's possible to migrate there.

If you want to have more details, please look at the draft spec at
http://www.ieee802.org/1/files/public/docs2010/bg-joint-evb-0410v1.pdf

> >> 2. Future enic for pass-thru case where base != target.  We get:
> >> 
> >>     ip port_profile set eth1 base eth0 name joes-garage ...
> >> 
> >> And
> >> 
> >>     eth0:ndi_set_port_profile(eth1, ...)
> > 
> > Is eth1 the static device and eth0 the dynamic device in this scenario
> > or the other way round?
> 
> eth0 is the static and eth1 is the dynamic.  So eth0 is the base device.
> (The PF in SR-IOV parlance).

ok.

> > Wouldn't you still require access to both devices from the host root
> > network namespace here or do you just ignore the identifier for the
> > dynamic device here?
> 
> The dynamic device is the one to apply the port-profile to (we'll, I should
> say to apply to the dynamic's devices switch port).  So we need the dynamic
> device identified.

What I mean is: how do you identify it when it belongs to someone else?
Do we always have a proxy netdev for an SR-IOV VF that is assigned to
the guest?

For the separate network namespace case, I guess we could still require
doing it before assigning the device to the guest namespace, but it's
still not ideal.

> >> Does this work?  I want to get agreement before coding up patch attempt #4.
> > 
> > Seems ok for all I can see at this point, other than the complexity
> > that results from doing two network protocols through a single netlink
> > protocol. Maybe Jens and Chris can comment some more on this.
> 
> Ok, thanks Arnd.  I'll start coding this up now, hedging that the design is
> set before hearing back from Jens/Chris.

I believe Chris is the one that was pushing most for having a single interface
for both VDP/LLDPAD and enic.
While I now understand your reasons for doing it in firmware and requiring the
kernel interface in addition to the user interface, my doubts on whether VDP
and your protocol should be part of the same interface are increasing.

While I'm convinced that you can make it work for both now, the alternative
to split the two may turn out to be cleaner. We'd still be able to do
either of the two in kernel or user space. Using iproute2 syntax to describe
this again, it would mean an interface like

   ip iov set  port-profile DEVICE [ base BASE-DEVICE ] name PORT-PROFILE
	                              [ host_uuid HOST_UUID ]
        	                      [ client_name CLIENT_NAME ]
                                      [ client_uuid CLIENT_UUID ]
   ip iov set  vsi { associate | pre-associate | pre-associate-rr } BASE-DEVICE
                                      vsi MGR:VTID:VER
                                      mac LLADDR [ vlan VID ]
                                      client_uuid CLIENT_UUID

   ip iov del  port_profile DEVICE      [ base BASE-DEVICE ]
   ip iov del  vsi          BASE-DEVICE [ mac LLADDR [ vlan VID ] ]
				        [ client_uuid CLIENT_UUID ]

   ip iov show port_profile DEVICE      [ base BASE-DEVICE ]
   ip iov show vsi          BASE-DEVICE [ mac LLADDR [ vlan VID ] ]
					[ client_uuid CLIENT_UUID ]

You would obvioulsy only implement the kernel support for the port-profile
stuff as callbacks, because no driver yet does VDP in the kernel, but we should
have a common netlink header that defines both variants.

Chris, any opinion on this interface as opposed to the combined one?
Either one should work, but splitting it seems cleaner to me.

	Arnd

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [net-next-2.6 PATCH 2/2] add ndo_set_port_profile op support for enic dynamic vnics
  2010-04-29 15:48               ` Arnd Bergmann
@ 2010-04-29 16:31                 ` Scott Feldman
  2010-04-30 20:34                 ` Scott Feldman
  2010-05-03  4:29                 ` Vivek Kashyap
  2 siblings, 0 replies; 20+ messages in thread
From: Scott Feldman @ 2010-04-29 16:31 UTC (permalink / raw)
  To: Arnd Bergmann; +Cc: davem, netdev, chrisw, Jens Osterkamp

On 4/29/10 8:48 AM, "Arnd Bergmann" <arnd@arndb.de> wrote:

> I believe Chris is the one that was pushing most for having a single interface
> for both VDP/LLDPAD and enic.
> While I now understand your reasons for doing it in firmware and requiring the
> kernel interface in addition to the user interface, my doubts on whether VDP
> and your protocol should be part of the same interface are increasing.
> 
> While I'm convinced that you can make it work for both now, the alternative
> to split the two may turn out to be cleaner. We'd still be able to do
> either of the two in kernel or user space. Using iproute2 syntax to describe
> this again, it would mean an interface like
> 
>    ip iov set  port-profile DEVICE [ base BASE-DEVICE ] name PORT-PROFILE
>                              [ host_uuid HOST_UUID ]
>                      [ client_name CLIENT_NAME ]
>                                       [ client_uuid CLIENT_UUID ]
>    ip iov set  vsi { associate | pre-associate | pre-associate-rr }
> BASE-DEVICE
>                                       vsi MGR:VTID:VER
>                                       mac LLADDR [ vlan VID ]
>                                       client_uuid CLIENT_UUID
> 
>    ip iov del  port_profile DEVICE      [ base BASE-DEVICE ]
>    ip iov del  vsi          BASE-DEVICE [ mac LLADDR [ vlan VID ] ]
>        [ client_uuid CLIENT_UUID ]
> 
>    ip iov show port_profile DEVICE      [ base BASE-DEVICE ]
>    ip iov show vsi          BASE-DEVICE [ mac LLADDR [ vlan VID ] ]
> [ client_uuid CLIENT_UUID ]
> 
> You would obvioulsy only implement the kernel support for the port-profile
> stuff as callbacks, because no driver yet does VDP in the kernel, but we
> should
> have a common netlink header that defines both variants.
> 
> Chris, any opinion on this interface as opposed to the combined one?
> Either one should work, but splitting it seems cleaner to me.

I'm OK with either version.  Your latest does seem cleanest.  Let's let
Chris be the final decider.  Chris, door #1 or door #2?

-scott


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [net-next-2.6 PATCH 2/2] add ndo_set_port_profile op support for enic dynamic vnics
  2010-04-29 15:48               ` Arnd Bergmann
  2010-04-29 16:31                 ` Scott Feldman
@ 2010-04-30 20:34                 ` Scott Feldman
  2010-05-01 12:36                   ` Arnd Bergmann
  2010-05-03  4:29                 ` Vivek Kashyap
  2 siblings, 1 reply; 20+ messages in thread
From: Scott Feldman @ 2010-04-30 20:34 UTC (permalink / raw)
  To: Arnd Bergmann; +Cc: davem, netdev, chrisw, Jens Osterkamp

On 4/29/10 8:48 AM, "Arnd Bergmann" <arnd@arndb.de> wrote:

> I believe Chris is the one that was pushing most for having a single interface
> for both VDP/LLDPAD and enic.
> While I now understand your reasons for doing it in firmware and requiring the
> kernel interface in addition to the user interface, my doubts on whether VDP
> and your protocol should be part of the same interface are increasing.
> 
> While I'm convinced that you can make it work for both now, the alternative
> to split the two may turn out to be cleaner. We'd still be able to do
> either of the two in kernel or user space. Using iproute2 syntax to describe
> this again, it would mean an interface like
> 
>    ip iov set  port-profile DEVICE [ base BASE-DEVICE ] name PORT-PROFILE
>                              [ host_uuid HOST_UUID ]
>                      [ client_name CLIENT_NAME ]
>                                       [ client_uuid CLIENT_UUID ]
>    ip iov set  vsi { associate | pre-associate | pre-associate-rr }
> BASE-DEVICE
>                                       vsi MGR:VTID:VER
>                                       mac LLADDR [ vlan VID ]
>                                       client_uuid CLIENT_UUID
> 
>    ip iov del  port_profile DEVICE      [ base BASE-DEVICE ]
>    ip iov del  vsi          BASE-DEVICE [ mac LLADDR [ vlan VID ] ]
>        [ client_uuid CLIENT_UUID ]
> 
>    ip iov show port_profile DEVICE      [ base BASE-DEVICE ]
>    ip iov show vsi          BASE-DEVICE [ mac LLADDR [ vlan VID ] ]
> [ client_uuid CLIENT_UUID ]
> 
> You would obvioulsy only implement the kernel support for the port-profile
> stuff as callbacks, because no driver yet does VDP in the kernel, but we
> should
> have a common netlink header that defines both variants.
> 
> Chris, any opinion on this interface as opposed to the combined one?
> Either one should work, but splitting it seems cleaner to me.

I haven't seen Chris's response, but it seems vger was down for awhile, so
maybe it's coming.  Assuming we go for the split design, we're still talking
about using RTM_SETLINK/RTM_GETLINK/RTM_DELLINK for these netlink msgs?  Or
are you suggesting by your cmd syntax that we return to
RTM_SETIOV/RTM_GETIOV like in the first iovnl patch?  RTM_SET/GET/DELLINK is
probably simplier, cleaner patch.

-scott


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [net-next-2.6 PATCH 2/2] add ndo_set_port_profile op support for enic dynamic vnics
  2010-04-30 20:34                 ` Scott Feldman
@ 2010-05-01 12:36                   ` Arnd Bergmann
  0 siblings, 0 replies; 20+ messages in thread
From: Arnd Bergmann @ 2010-05-01 12:36 UTC (permalink / raw)
  To: Scott Feldman; +Cc: davem, netdev, chrisw, Jens Osterkamp

On Friday 30 April 2010, Scott Feldman wrote:
> >    ip iov set  port-profile DEVICE [ base BASE-DEVICE ] name PORT-PROFILE
> >                              [ host_uuid HOST_UUID ]
> >                      [ client_name CLIENT_NAME ]
> >                                       [ client_uuid CLIENT_UUID ]
> >    ip iov set  vsi { associate | pre-associate | pre-associate-rr }
> > BASE-DEVICE
> >                                       vsi MGR:VTID:VER
> >                                       mac LLADDR [ vlan VID ]
> >                                       client_uuid CLIENT_UUID
> > 
> >    ip iov del  port_profile DEVICE      [ base BASE-DEVICE ]
> >    ip iov del  vsi          BASE-DEVICE [ mac LLADDR [ vlan VID ] ]
> >        [ client_uuid CLIENT_UUID ]
> > 
> >    ip iov show port_profile DEVICE      [ base BASE-DEVICE ]
> >    ip iov show vsi          BASE-DEVICE [ mac LLADDR [ vlan VID ] ]
> > [ client_uuid CLIENT_UUID ]
> > 
> > You would obvioulsy only implement the kernel support for the port-profile
> > stuff as callbacks, because no driver yet does VDP in the kernel, but we
> > should
> > have a common netlink header that defines both variants.
> > 
> > Chris, any opinion on this interface as opposed to the combined one?
> > Either one should work, but splitting it seems cleaner to me.
> 
> I haven't seen Chris's response, but it seems vger was down for awhile, so
> maybe it's coming.  Assuming we go for the split design, we're still talking
> about using RTM_SETLINK/RTM_GETLINK/RTM_DELLINK for these netlink msgs?  Or
> are you suggesting by your cmd syntax that we return to
> RTM_SETIOV/RTM_GETIOV like in the first iovnl patch?  RTM_SET/GET/DELLINK is
> probably simplier, cleaner patch.

In either case (split or combined), I would prefer the separate IOV
commands. The reason for this is that when support is not in the kernel,
it allows a cleaner separation between what's (always) handled in the
kernel and what's (potentially) done in user space.

	Arnd

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [net-next-2.6 PATCH 2/2] add ndo_set_port_profile op support for enic dynamic vnics
  2010-04-29 15:48               ` Arnd Bergmann
  2010-04-29 16:31                 ` Scott Feldman
  2010-04-30 20:34                 ` Scott Feldman
@ 2010-05-03  4:29                 ` Vivek Kashyap
  2010-05-03 11:32                   ` Arnd Bergmann
  2 siblings, 1 reply; 20+ messages in thread
From: Vivek Kashyap @ 2010-05-03  4:29 UTC (permalink / raw)
  To: Arnd Bergmann; +Cc: Scott Feldman, davem, netdev, chrisw, Jens Osterkamp

>
>>>>    ip port_profile set DEVICE [ base DEVICE ] [ { pre_associate |
>>>>                                                   pre_associate_rr } ]
>>>>                               { name PORT-PROFILE | vsi MGR:VTID:VER }
>>
>> BTW, I was meaning to ask: is there a way to role the vsi tuple and the
>> flags up into a single identifier, say a string like PORT-PROFILE?  I'm
>> asking because it seems awkward from an admin's perspective to know how to
>> construct a vsi tuple or to know what pre_associate_rr means. I have to
>> admit I didn't fully grok what pre_associate_rr means myself.  Even if there
>> was a simple local database to map named port-profiles to the underlying
>> {vsi tuple, flags}, that would bring us closer to a more consistent user
>> interface.  Is this possible?
>
> I think that's technically possible but may not be helpful to make the
> user interface easier. Some background on pre-associate:
>
> The purpose of this is to assist guest migration. A single VSI (i.e. guest
> network adapter) may only be connected to a single switch port at any
> given time. The VSI is identified by its UUID and it has a unique
> MAC address.
>
> When migrating a guest to a new hypervisor, we need to ask the switch
> to associate that VSI at the destination switch port (which may or may
> not be on the same different switch as the source port). This operation
> may fail for a number of reasons and can take some time. Since we want
> migration to alway succeed and take as little time as possible, we
> do a pre-associate-with-resource-reservation before the migration and
> only start the actual guest migration if that completes successfully.
>
> After a successful pre-associate-with-resource-reservation step, we
> know that the actual associate step will be both fast and successful.
> After it completes, the VSI is known to be on the destination
> and all traffic goes there (replacing the gratuitous ARP method we do
> today).
>
> I don't think we'd ever do a pre-associate without the
> resource-reservation, but the standard defines both. In theory,
> we could do a pre-associate at every switch in the data center
> in order to find out if it's possible to migrate there.
>
> If you want to have more details, please look at the draft spec at
> http://www.ieee802.org/1/files/public/docs2010/bg-joint-evb-0410v1.pdf

The basic difference is that in 'pre-associate with resoruce reservation', the 
local buffers and resources needed for the eventual 'associate' are reserved
at the switch port.  Therefore the associate will not fail with 
'insufficient resources'. It might otherwise.

thanks
 	Vivek

> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [net-next-2.6 PATCH 2/2] add ndo_set_port_profile op support for enic dynamic vnics
  2010-05-03  4:29                 ` Vivek Kashyap
@ 2010-05-03 11:32                   ` Arnd Bergmann
  2010-05-03 16:18                     ` Vivek Kashyap
  0 siblings, 1 reply; 20+ messages in thread
From: Arnd Bergmann @ 2010-05-03 11:32 UTC (permalink / raw)
  To: Vivek Kashyap; +Cc: Scott Feldman, davem, netdev, chrisw, Jens Osterkamp

On Monday 03 May 2010, Vivek Kashyap wrote:
> > After a successful pre-associate-with-resource-reservation step, we
> > know that the actual associate step will be both fast and successful.
> > After it completes, the VSI is known to be on the destination
> > and all traffic goes there (replacing the gratuitous ARP method we do
> > today).
> >
> > I don't think we'd ever do a pre-associate without the
> > resource-reservation, but the standard defines both. In theory,
> > we could do a pre-associate at every switch in the data center
> > in order to find out if it's possible to migrate there.
> >
> > If you want to have more details, please look at the draft spec at
> > http://www.ieee802.org/1/files/public/docs2010/bg-joint-evb-0410v1.pdf
> 
> The basic difference is that in 'pre-associate with resoruce reservation', the 
> local buffers and resources needed for the eventual 'associate' are reserved
> at the switch port.  Therefore the associate will not fail with 
> 'insufficient resources'. It might otherwise.

Yes, that's exactly what I wrote. So do you have any idea why we would
ever not want to do the resource reservation?

	Arnd

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [net-next-2.6 PATCH 2/2] add ndo_set_port_profile op support for enic dynamic vnics
  2010-05-03 11:32                   ` Arnd Bergmann
@ 2010-05-03 16:18                     ` Vivek Kashyap
  0 siblings, 0 replies; 20+ messages in thread
From: Vivek Kashyap @ 2010-05-03 16:18 UTC (permalink / raw)
  To: Arnd Bergmann; +Cc: Scott Feldman, davem, netdev, chrisw, Jens Osterkamp

> Yes, that's exactly what I wrote. So do you have any idea why we would
> ever not want to do the resource reservation?

I agree that resource reservation is preferable during migration since it 
avoids failure due to lack of resources at the switch (after doing all the 
work of state transfer to the target system).

In other scenarios where the period of resource reservation is long, 
thereby diminishing the number of active vsi flows, the management 
entity might choose not to use resource reservation.

Vivek

>
> 	Arnd
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2010-05-03 16:19 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-04-28  4:42 [net-next-2.6 PATCH 1/2] Add netdev port-profile support (take III, was iovnl) Scott Feldman
2010-04-28  4:42 ` [net-next-2.6 PATCH 2/2] add ndo_set_port_profile op support for enic dynamic vnics Scott Feldman
2010-04-28 13:32   ` Arnd Bergmann
2010-04-28 18:39     ` Scott Feldman
2010-04-28 19:16       ` Arnd Bergmann
2010-04-28 22:38         ` Scott Feldman
2010-04-29 12:27           ` Arnd Bergmann
2010-04-29 14:32             ` Scott Feldman
2010-04-29 15:48               ` Arnd Bergmann
2010-04-29 16:31                 ` Scott Feldman
2010-04-30 20:34                 ` Scott Feldman
2010-05-01 12:36                   ` Arnd Bergmann
2010-05-03  4:29                 ` Vivek Kashyap
2010-05-03 11:32                   ` Arnd Bergmann
2010-05-03 16:18                     ` Vivek Kashyap
2010-04-28 13:13 ` [net-next-2.6 PATCH 1/2] Add netdev port-profile support (take III, was iovnl) Arnd Bergmann
2010-04-28 17:51   ` Scott Feldman
2010-04-28 19:33     ` Arnd Bergmann
2010-04-28 18:54   ` Scott Feldman
2010-04-28 19:37     ` Arnd Bergmann

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).