Netdev List
 help / color / mirror / Atom feed
* [PATCH 2/5] net: xdp: add invalid buffer warning
From: John Fastabend @ 2016-11-18 18:59 UTC (permalink / raw)
  To: tgraf, shm, alexei.starovoitov, daniel, davem
  Cc: john.r.fastabend, netdev, bblanco, john.fastabend, brouer
In-Reply-To: <20161118185517.16137.92123.stgit@john-Precision-Tower-5810>

This adds a warning for drivers to use when encountering an invalid
buffer for XDP. For normal cases this should not happen but to catch
this in virtual/qemu setups that I may not have expected from the
emulation layer having a standard warning is useful.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
---
 include/linux/filter.h |    1 +
 net/core/filter.c      |    6 ++++++
 2 files changed, 7 insertions(+)

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 1f09c52..0c79004 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -595,6 +595,7 @@ int sk_get_filter(struct sock *sk, struct sock_filter __user *filter,
 struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
 				       const struct bpf_insn *patch, u32 len);
 void bpf_warn_invalid_xdp_action(u32 act);
+void bpf_warn_invalid_xdp_buffer(void);
 
 #ifdef CONFIG_BPF_JIT
 extern int bpf_jit_enable;
diff --git a/net/core/filter.c b/net/core/filter.c
index cd9e2ba..b8fb57c 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2722,6 +2722,12 @@ void bpf_warn_invalid_xdp_action(u32 act)
 }
 EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
 
+void bpf_warn_invalid_xdp_buffer(void)
+{
+	WARN_ONCE(1, "Illegal XDP buffer encountered, expect packet loss\n");
+}
+EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_buffer);
+
 static u32 sk_filter_convert_ctx_access(enum bpf_access_type type, int dst_reg,
 					int src_reg, int ctx_off,
 					struct bpf_insn *insn_buf,

^ permalink raw reply related

* [PATCH 3/5] virtio_net: Add XDP support
From: John Fastabend @ 2016-11-18 19:00 UTC (permalink / raw)
  To: tgraf, shm, alexei.starovoitov, daniel, davem
  Cc: john.r.fastabend, netdev, bblanco, john.fastabend, brouer
In-Reply-To: <20161118185517.16137.92123.stgit@john-Precision-Tower-5810>

From: Shrijeet Mukherjee <shrijeet@gmail.com>

This adds XDP support to virtio_net. Some requirements must be
met for XDP to be enabled depending on the mode. First it will
only be supported with LRO disabled so that data is not pushed
across multiple buffers. The MTU must be less than a page size
to avoid having to handle XDP across multiple pages.

If mergeable receive is enabled this first series only supports
the case where header and data are in the same buf which we can
check when a packet is received by looking at num_buf. If the
num_buf is greater than 1 and a XDP program is loaded the packet
is dropped and a warning is thrown. When any_header_sg is set this
does not happen and both header and data is put in a single buffer
as expected so we check this when XDP programs are loaded. Note I
have only tested this with Linux vhost backend.

If big packets mode is enabled and MTU/LRO conditions above are
met then XDP is allowed.

A follow on patch can be generated to solve the mergeable receive
case with num_bufs equal to 2. Buffers greater than two may not
be handled has easily.

Suggested-by: Shrijeet Mukherjee <shrijeet@gmail.com>
Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
---
 drivers/net/virtio_net.c |  144 +++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 140 insertions(+), 4 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 0758cae..16c257d 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -22,6 +22,7 @@
 #include <linux/module.h>
 #include <linux/virtio.h>
 #include <linux/virtio_net.h>
+#include <linux/bpf.h>
 #include <linux/scatterlist.h>
 #include <linux/if_vlan.h>
 #include <linux/slab.h>
@@ -81,6 +82,8 @@ struct receive_queue {
 
 	struct napi_struct napi;
 
+	struct bpf_prog *xdp_prog;
+
 	/* Chain pages by the private ptr. */
 	struct page *pages;
 
@@ -324,6 +327,38 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
 	return skb;
 }
 
+static u32 do_xdp_prog(struct virtnet_info *vi,
+		       struct bpf_prog *xdp_prog,
+		       struct page *page, int offset, int len)
+{
+	int hdr_padded_len;
+	struct xdp_buff xdp;
+	u32 act;
+	u8 *buf;
+
+	buf = page_address(page) + offset;
+
+	if (vi->mergeable_rx_bufs)
+		hdr_padded_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
+	else
+		hdr_padded_len = sizeof(struct padded_vnet_hdr);
+
+	xdp.data = buf + hdr_padded_len;
+	xdp.data_end = xdp.data + (len - vi->hdr_len);
+
+	act = bpf_prog_run_xdp(xdp_prog, &xdp);
+	switch (act) {
+	case XDP_PASS:
+		return XDP_PASS;
+	default:
+		bpf_warn_invalid_xdp_action(act);
+	case XDP_TX:
+	case XDP_ABORTED:
+	case XDP_DROP:
+		return XDP_DROP;
+	}
+}
+
 static struct sk_buff *receive_small(struct virtnet_info *vi, void *buf, unsigned int len)
 {
 	struct sk_buff * skb = buf;
@@ -340,9 +375,19 @@ static struct sk_buff *receive_big(struct net_device *dev,
 				   void *buf,
 				   unsigned int len)
 {
+	struct bpf_prog *xdp_prog;
 	struct page *page = buf;
-	struct sk_buff *skb = page_to_skb(vi, rq, page, 0, len, PAGE_SIZE);
+	struct sk_buff *skb;
+
+	xdp_prog = rcu_dereference(rq->xdp_prog);
+	if (xdp_prog) {
+		u32 act = do_xdp_prog(vi, xdp_prog, page, 0, len);
+
+		if (act == XDP_DROP)
+			goto err;
+	}
 
+	skb = page_to_skb(vi, rq, page, 0, len, PAGE_SIZE);
 	if (unlikely(!skb))
 		goto err;
 
@@ -366,10 +411,25 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 	struct page *page = virt_to_head_page(buf);
 	int offset = buf - page_address(page);
 	unsigned int truesize = max(len, mergeable_ctx_to_buf_truesize(ctx));
+	struct sk_buff *head_skb, *curr_skb;
+	struct bpf_prog *xdp_prog;
 
-	struct sk_buff *head_skb = page_to_skb(vi, rq, page, offset, len,
-					       truesize);
-	struct sk_buff *curr_skb = head_skb;
+	xdp_prog = rcu_dereference(rq->xdp_prog);
+	if (xdp_prog) {
+		u32 act;
+
+		if (num_buf > 1) {
+			bpf_warn_invalid_xdp_buffer();
+			goto err_skb;
+		}
+
+		act = do_xdp_prog(vi, xdp_prog, page, offset, len);
+		if (act == XDP_DROP)
+			goto err_skb;
+	}
+
+	head_skb = page_to_skb(vi, rq, page, offset, len, truesize);
+	curr_skb = head_skb;
 
 	if (unlikely(!curr_skb))
 		goto err_skb;
@@ -1328,6 +1388,13 @@ static int virtnet_set_channels(struct net_device *dev,
 	if (queue_pairs > vi->max_queue_pairs || queue_pairs == 0)
 		return -EINVAL;
 
+	/* For now we don't support modifying channels while XDP is loaded
+	 * also when XDP is loaded all RX queues have XDP programs so we only
+	 * need to check a single RX queue.
+	 */
+	if (vi->rq[0].xdp_prog)
+		return -EINVAL;
+
 	get_online_cpus();
 	err = virtnet_set_queues(vi, queue_pairs);
 	if (!err) {
@@ -1454,6 +1521,68 @@ static int virtnet_set_features(struct net_device *netdev,
 	return 0;
 }
 
+static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog)
+{
+	struct virtnet_info *vi = netdev_priv(dev);
+	struct bpf_prog *old_prog;
+	int i;
+
+	if ((dev->features & NETIF_F_LRO) && prog) {
+		netdev_warn(dev, "can't set XDP while LRO is on, disable LRO first\n");
+		return -EINVAL;
+	}
+
+	if (vi->mergeable_rx_bufs && !vi->any_header_sg) {
+		netdev_warn(dev, "XDP expects header/data in single page\n");
+		return -EINVAL;
+	}
+
+	if (dev->mtu > PAGE_SIZE) {
+		netdev_warn(dev, "XDP requires MTU less than %lu\n", PAGE_SIZE);
+		return -EINVAL;
+	}
+
+	if (prog) {
+		prog = bpf_prog_add(prog, vi->max_queue_pairs - 1);
+		if (IS_ERR(prog))
+			return PTR_ERR(prog);
+	}
+
+	for (i = 0; i < vi->max_queue_pairs; i++) {
+		old_prog = rcu_dereference(vi->rq[i].xdp_prog);
+		rcu_assign_pointer(vi->rq[i].xdp_prog, prog);
+		if (old_prog)
+			bpf_prog_put(old_prog);
+	}
+
+	return 0;
+}
+
+static bool virtnet_xdp_query(struct net_device *dev)
+{
+	struct virtnet_info *vi = netdev_priv(dev);
+	int i;
+
+	for (i = 0; i < vi->max_queue_pairs; i++) {
+		if (vi->rq[i].xdp_prog)
+			return true;
+	}
+	return false;
+}
+
+static int virtnet_xdp(struct net_device *dev, struct netdev_xdp *xdp)
+{
+	switch (xdp->command) {
+	case XDP_SETUP_PROG:
+		return virtnet_xdp_set(dev, xdp->prog);
+	case XDP_QUERY_PROG:
+		xdp->prog_attached = virtnet_xdp_query(dev);
+		return 0;
+	default:
+		return -EINVAL;
+	}
+}
+
 static const struct net_device_ops virtnet_netdev = {
 	.ndo_open            = virtnet_open,
 	.ndo_stop   	     = virtnet_close,
@@ -1471,6 +1600,7 @@ static int virtnet_set_features(struct net_device *netdev,
 	.ndo_busy_poll		= virtnet_busy_poll,
 #endif
 	.ndo_set_features	= virtnet_set_features,
+	.ndo_xdp		= virtnet_xdp,
 };
 
 static void virtnet_config_changed_work(struct work_struct *work)
@@ -1527,11 +1657,17 @@ static void virtnet_free_queues(struct virtnet_info *vi)
 
 static void free_receive_bufs(struct virtnet_info *vi)
 {
+	struct bpf_prog *old_prog;
 	int i;
 
 	for (i = 0; i < vi->max_queue_pairs; i++) {
 		while (vi->rq[i].pages)
 			__free_pages(get_a_page(&vi->rq[i], GFP_KERNEL), 0);
+
+		old_prog = rcu_dereference(vi->rq[i].xdp_prog);
+		RCU_INIT_POINTER(vi->rq[i].xdp_prog, NULL);
+		if (old_prog)
+			bpf_prog_put(old_prog);
 	}
 }
 

^ permalink raw reply related

* Re: pull-request: mac80211 2016-11-18
From: David Miller @ 2016-11-18 19:00 UTC (permalink / raw)
  To: johannes-cdvu00un1VgdHxzADdlk8Q
  Cc: netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-wireless-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20161118075201.30081-1-johannes-cdvu00un1VgdHxzADdlk8Q@public.gmane.org>

From: Johannes Berg <johannes-cdvu00un1VgdHxzADdlk8Q@public.gmane.org>
Date: Fri, 18 Nov 2016 08:52:00 +0100

> Due to travel/vacation, this is a bit late, but there aren't
> that many fixes either. Most interesting/important are the
> fixes from Felix and perhaps the scan entry limit.
> 
> Please pull and let me know if there's any problem.

Pulled, thanks a lot Johannes.

^ permalink raw reply

* [PATCH 4/5] virtio_net: add dedicated XDP transmit queues
From: John Fastabend @ 2016-11-18 19:00 UTC (permalink / raw)
  To: tgraf, shm, alexei.starovoitov, daniel, davem
  Cc: john.r.fastabend, netdev, bblanco, john.fastabend, brouer
In-Reply-To: <20161118185517.16137.92123.stgit@john-Precision-Tower-5810>

XDP requires using isolated transmit queues to avoid interference
with normal networking stack (BQL, NETDEV_TX_BUSY, etc). This patch
adds a XDP queue per cpu when a XDP program is loaded and does not
expose the queues to the OS via the normal API call to
netif_set_real_num_tx_queues(). This way the stack will never push
an skb to these queues.

However virtio/vhost/qemu implementation only allows for creating
TX/RX queue pairs at this time so creating only TX queues was not
possible. And because the associated RX queues are being created I
went ahead and exposed these to the stack and let the backend use
them. This creates more RX queues visible to the network stack than
TX queues which is worth mentioning but does not cause any issues as
far as I can tell.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
---
 drivers/net/virtio_net.c |   32 +++++++++++++++++++++++++++++---
 1 file changed, 29 insertions(+), 3 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 16c257d..631ee07 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -114,6 +114,9 @@ struct virtnet_info {
 	/* # of queue pairs currently used by the driver */
 	u16 curr_queue_pairs;
 
+	/* # of XDP queue pairs currently used by the driver */
+	u16 xdp_queue_pairs;
+
 	/* I like... big packets and I cannot lie! */
 	bool big_packets;
 
@@ -1525,7 +1528,8 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog)
 {
 	struct virtnet_info *vi = netdev_priv(dev);
 	struct bpf_prog *old_prog;
-	int i;
+	u16 xdp_qp = 0, curr_qp;
+	int err, i;
 
 	if ((dev->features & NETIF_F_LRO) && prog) {
 		netdev_warn(dev, "can't set XDP while LRO is on, disable LRO first\n");
@@ -1542,12 +1546,34 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog)
 		return -EINVAL;
 	}
 
+	curr_qp = vi->curr_queue_pairs - vi->xdp_queue_pairs;
+	if (prog)
+		xdp_qp = num_online_cpus();
+
+	/* XDP requires extra queues for XDP_TX */
+	if (curr_qp + xdp_qp > vi->max_queue_pairs) {
+		netdev_warn(dev, "request %i queues but max is %i\n",
+			    curr_qp + xdp_qp, vi->max_queue_pairs);
+		return -ENOMEM;
+	}
+
+	err = virtnet_set_queues(vi, curr_qp + xdp_qp);
+	if (err) {
+		dev_warn(&dev->dev, "XDP Device queue allocation failure.\n");
+		return err;
+	}
+
 	if (prog) {
-		prog = bpf_prog_add(prog, vi->max_queue_pairs - 1);
-		if (IS_ERR(prog))
+		prog = bpf_prog_add(prog, vi->max_queue_pairs);
+		if (IS_ERR(prog)) {
+			virtnet_set_queues(vi, curr_qp);
 			return PTR_ERR(prog);
+		}
 	}
 
+	vi->xdp_queue_pairs = xdp_qp;
+	netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp);
+
 	for (i = 0; i < vi->max_queue_pairs; i++) {
 		old_prog = rcu_dereference(vi->rq[i].xdp_prog);
 		rcu_assign_pointer(vi->rq[i].xdp_prog, prog);

^ permalink raw reply related

* [PATCH 5/5] virtio_net: add XDP_TX support
From: John Fastabend @ 2016-11-18 19:01 UTC (permalink / raw)
  To: tgraf, shm, alexei.starovoitov, daniel, davem
  Cc: john.r.fastabend, netdev, bblanco, john.fastabend, brouer
In-Reply-To: <20161118185517.16137.92123.stgit@john-Precision-Tower-5810>

This adds support for the XDP_TX action to virtio_net. When an XDP
program is run and returns the XDP_TX action the virtio_net XDP
implementation will transmit the packet on a TX queue that aligns
with the current CPU that the XDP packet was processed on.

Before sending the packet the header is zeroed.  Also XDP is expected
to handle checksum correctly so no checksum offload  support is
provided.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
---
 drivers/net/virtio_net.c |   57 ++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 54 insertions(+), 3 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 631ee07..4b22938 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -330,12 +330,40 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
 	return skb;
 }
 
+static void virtnet_xdp_xmit(struct virtnet_info *vi,
+			     unsigned int qnum, struct xdp_buff *xdp)
+{
+	struct send_queue *sq = &vi->sq[qnum];
+	struct virtio_net_hdr_mrg_rxbuf *hdr;
+	unsigned int num_sg, len;
+	void *xdp_sent;
+
+	/* Free up any pending old buffers before queueing new ones. */
+	while ((xdp_sent = virtqueue_get_buf(sq->vq, &len)) != NULL) {
+		struct page *page = virt_to_head_page(xdp_sent);
+
+		put_page(page);
+	}
+
+	/* Zero header and leave csum up to XDP layers */
+	hdr = xdp->data;
+	memset(hdr, 0, vi->hdr_len);
+	hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE;
+	hdr->hdr.flags = VIRTIO_NET_HDR_F_DATA_VALID;
+
+	num_sg = 1;
+	sg_init_one(sq->sg, xdp->data, xdp->data_end - xdp->data);
+	virtqueue_add_outbuf(sq->vq, sq->sg, num_sg, xdp->data, GFP_ATOMIC);
+	virtqueue_kick(sq->vq);
+}
+
 static u32 do_xdp_prog(struct virtnet_info *vi,
 		       struct bpf_prog *xdp_prog,
 		       struct page *page, int offset, int len)
 {
 	int hdr_padded_len;
 	struct xdp_buff xdp;
+	unsigned int qp;
 	u32 act;
 	u8 *buf;
 
@@ -353,9 +381,15 @@ static u32 do_xdp_prog(struct virtnet_info *vi,
 	switch (act) {
 	case XDP_PASS:
 		return XDP_PASS;
+	case XDP_TX:
+		qp = vi->curr_queue_pairs -
+			vi->xdp_queue_pairs +
+			smp_processor_id();
+		xdp.data = buf + (vi->mergeable_rx_bufs ? 0 : 4);
+		virtnet_xdp_xmit(vi, qp, &xdp);
+		return XDP_TX;
 	default:
 		bpf_warn_invalid_xdp_action(act);
-	case XDP_TX:
 	case XDP_ABORTED:
 	case XDP_DROP:
 		return XDP_DROP;
@@ -386,8 +420,15 @@ static struct sk_buff *receive_big(struct net_device *dev,
 	if (xdp_prog) {
 		u32 act = do_xdp_prog(vi, xdp_prog, page, 0, len);
 
-		if (act == XDP_DROP)
+		switch (act) {
+		case XDP_PASS:
+			break;
+		case XDP_TX:
+			goto xdp_xmit;
+		case XDP_DROP:
+		default:
 			goto err;
+		}
 	}
 
 	skb = page_to_skb(vi, rq, page, 0, len, PAGE_SIZE);
@@ -399,6 +440,7 @@ static struct sk_buff *receive_big(struct net_device *dev,
 err:
 	dev->stats.rx_dropped++;
 	give_pages(rq, page);
+xdp_xmit:
 	return NULL;
 }
 
@@ -417,6 +459,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 	struct sk_buff *head_skb, *curr_skb;
 	struct bpf_prog *xdp_prog;
 
+	head_skb = NULL;
 	xdp_prog = rcu_dereference(rq->xdp_prog);
 	if (xdp_prog) {
 		u32 act;
@@ -427,8 +470,15 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 		}
 
 		act = do_xdp_prog(vi, xdp_prog, page, offset, len);
-		if (act == XDP_DROP)
+		switch (act) {
+		case XDP_PASS:
+			break;
+		case XDP_TX:
+			goto xdp_xmit;
+		case XDP_DROP:
+		default:
 			goto err_skb;
+		}
 	}
 
 	head_skb = page_to_skb(vi, rq, page, offset, len, truesize);
@@ -502,6 +552,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 err_buf:
 	dev->stats.rx_dropped++;
 	dev_kfree_skb(head_skb);
+xdp_xmit:
 	return NULL;
 }
 

^ permalink raw reply related

* Re: [PATCH] netns: fix get_net_ns_by_fd(int pid) typo
From: David Miller @ 2016-11-18 19:02 UTC (permalink / raw)
  To: stefanha; +Cc: netdev
In-Reply-To: <1479462106-28529-1-git-send-email-stefanha@redhat.com>

From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Fri, 18 Nov 2016 09:41:46 +0000

> The argument to get_net_ns_by_fd() is a /proc/$PID/ns/net file
> descriptor not a pid.  Fix the typo.
> 
> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>

Applied.

^ permalink raw reply

* Re: [patch net-next] liquidio CN23XX: bitwise vs logical AND typo
From: David Miller @ 2016-11-18 19:04 UTC (permalink / raw)
  To: dan.carpenter
  Cc: derek.chickles, raghu.vatsavayi, satananda.burla, felix.manlunas,
	netdev, kernel-janitors
In-Reply-To: <20161118114734.GB3281@mwanda>

From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Fri, 18 Nov 2016 14:47:35 +0300

> We obviously intended a bitwise AND here, not a logical one.
> 
> Fixes: 8c978d059224 ("liquidio CN23XX: Mailbox support")
> Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>

Applied.

^ permalink raw reply

* Re: [PATCH] liquidio CN23XX: check if PENDING bit is clear using logical and
From: David Miller @ 2016-11-18 19:04 UTC (permalink / raw)
  To: colin.king
  Cc: derek.chickles, satananda.burla, felix.manlunas, raghu.vatsavayi,
	netdev, linux-kernel
In-Reply-To: <20161118184532.5282-1-colin.king@canonical.com>

From: Colin King <colin.king@canonical.com>
Date: Fri, 18 Nov 2016 18:45:32 +0000

> From: Colin Ian King <colin.king@canonical.com>
> 
> the mbox state should be bitwise anded rather than logically anded
> with OCTEON_MBOX_STATE_RESPONSE_PENDING. Fix this by using the
> correct & operator instead of &&.
> 
> Signed-off-by: Colin Ian King <colin.king@canonical.com>

Dan Carpenter already submitted a fix for this.

^ permalink raw reply

* Re: [PATCH net-next] cxgb4: Allocate Tx queues dynamically
From: David Miller @ 2016-11-18 19:04 UTC (permalink / raw)
  To: atul.gupta-ut6Up61K2wZBDgjK7y7TUQ
  Cc: netdev-u79uwXL29TY76Z2rM5mHXA, linux-scsi-u79uwXL29TY76Z2rM5mHXA,
	target-devel-u79uwXL29TY76Z2rM5mHXA,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	linux-crypto-u79uwXL29TY76Z2rM5mHXA, nab-IzHhD5pYlfBP7FQvKIMDCQ,
	jejb-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8,
	martin.petersen-QHcLZuEGTsvQT0dZR+AlfA,
	dledford-H+wXaHxf7aLQT0dZR+AlfA,
	herbert-lOAM2aK0SrRLBo1qDEOMRrpzq4S04n8Q,
	leedom-ut6Up61K2wZBDgjK7y7TUQ, nirranjan-ut6Up61K2wZBDgjK7y7TUQ,
	varun-ut6Up61K2wZBDgjK7y7TUQ,
	swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW,
	hariprasad-ut6Up61K2wZBDgjK7y7TUQ
In-Reply-To: <1479467260-6509-1-git-send-email-atul.gupta-ut6Up61K2wZBDgjK7y7TUQ@public.gmane.org>

From: Atul Gupta <atul.gupta-ut6Up61K2wZBDgjK7y7TUQ@public.gmane.org>
Date: Fri, 18 Nov 2016 16:37:40 +0530

> From: Hariprasad Shenai <hariprasad-ut6Up61K2wZBDgjK7y7TUQ@public.gmane.org>
> 
> Allocate resources dynamically for Upper layer driver's (ULD) like
> cxgbit, iw_cxgb4, cxgb4i and chcr. The resources allocated include Tx
> queues which are allocated when ULD register with cxgb4 driver and freed
> while un-registering. The Tx queues which are shared by ULD shall be
> allocated by first registering driver and un-allocated by last
> unregistering driver.
> 
> Signed-off-by: Atul Gupta <atul.gupta-ut6Up61K2wZBDgjK7y7TUQ@public.gmane.org>

Applied.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH 0/5] XDP for virtio_net
From: John Fastabend @ 2016-11-18 19:06 UTC (permalink / raw)
  To: tgraf, shm, alexei.starovoitov, daniel, davem, Michael S. Tsirkin
  Cc: john.r.fastabend, netdev, bblanco, brouer
In-Reply-To: <20161118185517.16137.92123.stgit@john-Precision-Tower-5810>

On 16-11-18 10:59 AM, John Fastabend wrote:
> This implements virtio_net for the mergeable buffers and big_packet
> modes. I tested this with vhost_net running on qemu and did not see
> any issues.
> 
> There are some restrictions for XDP to be enabled (see patch 3) for
> more details.
> 
>   1. LRO must be off
>   2. MTU must be less than PAGE_SIZE
>   3. queues must be available to dedicate to XDP
>   4. num_bufs received in mergeable buffers must be 1
>   5. big_packet mode must have all data on single page
> 
> Please review any comments/feedback welcome as always.
> 
> Thanks,
> John
> ---
> 

Hi Dave,

Should be obvious but this is for net-next I dropped the tag from my
git-send command.

Also I missed probably the most important person on the CC/TO list.

+Michael Tsirkin.

Thanks,
John

^ permalink raw reply

* Re: [PATCH net] rtnetlink: fix FDB size computation
From: David Miller @ 2016-11-18 19:10 UTC (permalink / raw)
  To: sd; +Cc: netdev, hubert.sokolowski
In-Reply-To: <9bef2a8afa6f8193fcca61ea381b67a52ab878b2.1479477903.git.sd@queasysnail.net>

From: Sabrina Dubroca <sd@queasysnail.net>
Date: Fri, 18 Nov 2016 15:50:39 +0100

> Add missing NDA_VLAN attribute's size.
> 
> Fixes: 1e53d5bb8878 ("net: Pass VLAN ID to rtnl_fdb_notify.")
> Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>

Applied and queued up for -stable, thanks.

^ permalink raw reply

* Re: [PATCH] mlxsw: switchib: add MLXSW_PCI dependency
From: David Miller @ 2016-11-18 19:13 UTC (permalink / raw)
  To: arnd; +Cc: jiri, idosch, ivecera, eladr, netdev, linux-kernel
In-Reply-To: <20161118160127.473555-1-arnd@arndb.de>

From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 18 Nov 2016 17:01:14 +0100

> The newly added switchib driver fails to link if MLXSW_PCI=m:
> 
> drivers/net/ethernet/mellanox/mlxsw/mlxsw_switchib.o: In function^Cmlxsw_sib_module_exit':
> switchib.c:(.exit.text+0x8): undefined reference to `mlxsw_pci_driver_unregister'
> switchib.c:(.exit.text+0x10): undefined reference to `mlxsw_pci_driver_unregister'
> drivers/net/ethernet/mellanox/mlxsw/mlxsw_switchib.o: In function `mlxsw_sib_module_init':
> switchib.c:(.init.text+0x28): undefined reference to `mlxsw_pci_driver_register'
> switchib.c:(.init.text+0x38): undefined reference to `mlxsw_pci_driver_register'
> switchib.c:(.init.text+0x48): undefined reference to `mlxsw_pci_driver_unregister'
> 
> The other two such sub-drivers have a dependency, so add the same one
> here. In theory we could allow this driver if MLXSW_PCI is disabled,
> but it's probably not worth it.
> 
> Signed-off-by: Arnd Bergmann <arnd@arndb.de>

Please resubmit this with a proper fixes tag that identifies the commit that
added the switchib driver.

Thanks.

^ permalink raw reply

* [PATCH net-next]  mlx4: avoid unnecessary dirtying of critical fields
From: Eric Dumazet @ 2016-11-18 20:15 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Tariq Toukan

From: Eric Dumazet <edumazet@google.com>

While stressing a 40Gbit mlx4 NIC with busy polling, I found false
sharing in mlx4 driver that can be easily avoided.

This patch brings an additional 7 % performance improvement in UDP_RR
workload.

1) If we received no frame during one mlx4_en_process_rx_cq()
   invocation, no need to call mlx4_cq_set_ci() and/or dirty ring->cons

2) Do not refill rx buffers if we have plenty of them.
   This avoids false sharing and allows some bulk/batch optimizations.
   Page allocator and its locks will thank us.

Finally, mlx4_en_poll_rx_cq() should not return 0 if it determined
cpu handling NIC IRQ should be changed. We should return budget-1
instead, to not fool net_rx_action() and its netdev_budget.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Tariq Toukan <tariqt@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx4/en_rx.c |   51 +++++++++++--------
 1 file changed, 32 insertions(+), 19 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index 22f08f9ef464..2112494ff43b 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -688,18 +688,23 @@ static void validate_loopback(struct mlx4_en_priv *priv, struct sk_buff *skb)
 	dev_kfree_skb_any(skb);
 }
 
-static void mlx4_en_refill_rx_buffers(struct mlx4_en_priv *priv,
-				     struct mlx4_en_rx_ring *ring)
+static bool mlx4_en_refill_rx_buffers(struct mlx4_en_priv *priv,
+				      struct mlx4_en_rx_ring *ring)
 {
-	int index = ring->prod & ring->size_mask;
+	u32 missing = ring->actual_size - (ring->prod - ring->cons);
 
-	while ((u32) (ring->prod - ring->cons) < ring->actual_size) {
-		if (mlx4_en_prepare_rx_desc(priv, ring, index,
+	/* Try to batch allocations, but not too much. */
+	if (missing < 8)
+		return false;
+	do {
+		if (mlx4_en_prepare_rx_desc(priv, ring,
+					    ring->prod & ring->size_mask,
 					    GFP_ATOMIC | __GFP_COLD))
 			break;
 		ring->prod++;
-		index = ring->prod & ring->size_mask;
-	}
+	} while (--missing);
+
+	return true;
 }
 
 /* When hardware doesn't strip the vlan, we need to calculate the checksum
@@ -1081,15 +1086,20 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
 
 out:
 	rcu_read_unlock();
-	if (doorbell_pending)
-		mlx4_en_xmit_doorbell(priv->tx_ring[TX_XDP][cq->ring]);
-
-	AVG_PERF_COUNTER(priv->pstats.rx_coal_avg, polled);
-	mlx4_cq_set_ci(&cq->mcq);
-	wmb(); /* ensure HW sees CQ consumer before we post new buffers */
-	ring->cons = cq->mcq.cons_index;
-	mlx4_en_refill_rx_buffers(priv, ring);
-	mlx4_en_update_rx_prod_db(ring);
+
+	if (polled) {
+		if (doorbell_pending)
+			mlx4_en_xmit_doorbell(priv->tx_ring[TX_XDP][cq->ring]);
+
+		AVG_PERF_COUNTER(priv->pstats.rx_coal_avg, polled);
+		mlx4_cq_set_ci(&cq->mcq);
+		wmb(); /* ensure HW sees CQ consumer before we post new buffers */
+		ring->cons = cq->mcq.cons_index;
+	}
+
+	if (mlx4_en_refill_rx_buffers(priv, ring))
+		mlx4_en_update_rx_prod_db(ring);
+
 	return polled;
 }
 
@@ -1131,10 +1141,13 @@ int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget)
 			return budget;
 
 		/* Current cpu is not according to smp_irq_affinity -
-		 * probably affinity changed. need to stop this NAPI
-		 * poll, and restart it on the right CPU
+		 * probably affinity changed. Need to stop this NAPI
+		 * poll, and restart it on the right CPU.
+		 * Try to avoid returning a too small value (like 0),
+		 * to not fool net_rx_action() and its netdev_budget
 		 */
-		done = 0;
+		if (done)
+			done--;
 	}
 	/* Done for now */
 	if (napi_complete_done(napi, done))

^ permalink raw reply related

* Re: [PATCH -next] tcp: make undo_cwnd mandatory for congestion modules
From: Neal Cardwell @ 2016-11-18 20:38 UTC (permalink / raw)
  To: Florian Westphal; +Cc: David Miller, Netdev, Eric Dumazet, Yuchung Cheng
In-Reply-To: <20161118185434.GB18071@breakpoint.cc>

On Fri, Nov 18, 2016 at 1:54 PM, Florian Westphal <fw@strlen.de> wrote:
> David Miller <davem@davemloft.net> wrote:
>> If you really suspect that highspeed et al. need to implement their own
>> undo_cwnd instead of using the default reno fallback, I would really
>> rather that this gets either fixed or explicitly marked as likely wrong
>> (in an "XXX" comment or similar).
>
> Ok, fair enough.  I am not familiar with these algorithms, I will check
> what they're doing in more detail and if absolutely needed resubmit this
> patch with XXX/FIXME/TODO comments added.

BTW, FWIW I really like the idea of making undo_cwnd required. It
simplifies the core code and forces CC modules to think about what
undo should look like for their CC module.

And I suspect you are right that those CC modules have an issue that
should be fixed.

neal

^ permalink raw reply

* Re: [PATCH 4/5] virtio_net: add dedicated XDP transmit queues
From: Jakub Kicinski @ 2016-11-18 21:09 UTC (permalink / raw)
  To: John Fastabend
  Cc: tgraf, shm, alexei.starovoitov, daniel, davem, john.r.fastabend,
	netdev, bblanco, brouer
In-Reply-To: <20161118190041.16137.48399.stgit@john-Precision-Tower-5810>

Looks very cool! :)

On Fri, 18 Nov 2016 11:00:41 -0800, John Fastabend wrote:
> @@ -1542,12 +1546,34 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog)
>  		return -EINVAL;
>  	}
>  
> +	curr_qp = vi->curr_queue_pairs - vi->xdp_queue_pairs;
> +	if (prog)
> +		xdp_qp = num_online_cpus();

Is num_online_cpus() correct here?

^ permalink raw reply

* [PATCH net] l2tp: fix racy SOCK_ZAPPED flag check in l2tp_ip{,6}_bind()
From: Guillaume Nault @ 2016-11-18 21:13 UTC (permalink / raw)
  To: netdev; +Cc: James Chapman, Chris Elston, Baozeng Ding, Andrey Konovalov

Lock socket before checking the SOCK_ZAPPED flag in l2tp_ip6_bind().
Without lock, a concurrent call could modify the socket flags between
the sock_flag(sk, SOCK_ZAPPED) test and the lock_sock() call. This way,
a socket could be inserted twice in l2tp_ip6_bind_table. Releasing it
would then leave a stale pointer there, generating use-after-free
errors when walking through the list or modifying adjacent entries.

BUG: KASAN: use-after-free in l2tp_ip6_close+0x22e/0x290 at addr ffff8800081b0ed8
Write of size 8 by task syz-executor/10987
CPU: 0 PID: 10987 Comm: syz-executor Not tainted 4.8.0+ #39
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.8.2-0-g33fbe13 by qemu-project.org 04/01/2014
 ffff880031d97838 ffffffff829f835b ffff88001b5a1640 ffff8800081b0ec0
 ffff8800081b15a0 ffff8800081b6d20 ffff880031d97860 ffffffff8174d3cc
 ffff880031d978f0 ffff8800081b0e80 ffff88001b5a1640 ffff880031d978e0
Call Trace:
 [<ffffffff829f835b>] dump_stack+0xb3/0x118 lib/dump_stack.c:15
 [<ffffffff8174d3cc>] kasan_object_err+0x1c/0x70 mm/kasan/report.c:156
 [<     inline     >] print_address_description mm/kasan/report.c:194
 [<ffffffff8174d666>] kasan_report_error+0x1f6/0x4d0 mm/kasan/report.c:283
 [<     inline     >] kasan_report mm/kasan/report.c:303
 [<ffffffff8174db7e>] __asan_report_store8_noabort+0x3e/0x40 mm/kasan/report.c:329
 [<     inline     >] __write_once_size ./include/linux/compiler.h:249
 [<     inline     >] __hlist_del ./include/linux/list.h:622
 [<     inline     >] hlist_del_init ./include/linux/list.h:637
 [<ffffffff8579047e>] l2tp_ip6_close+0x22e/0x290 net/l2tp/l2tp_ip6.c:239
 [<ffffffff850b2dfd>] inet_release+0xed/0x1c0 net/ipv4/af_inet.c:415
 [<ffffffff851dc5a0>] inet6_release+0x50/0x70 net/ipv6/af_inet6.c:422
 [<ffffffff84c4581d>] sock_release+0x8d/0x1d0 net/socket.c:570
 [<ffffffff84c45976>] sock_close+0x16/0x20 net/socket.c:1017
 [<ffffffff817a108c>] __fput+0x28c/0x780 fs/file_table.c:208
 [<ffffffff817a1605>] ____fput+0x15/0x20 fs/file_table.c:244
 [<ffffffff813774f9>] task_work_run+0xf9/0x170
 [<ffffffff81324aae>] do_exit+0x85e/0x2a00
 [<ffffffff81326dc8>] do_group_exit+0x108/0x330
 [<ffffffff81348cf7>] get_signal+0x617/0x17a0 kernel/signal.c:2307
 [<ffffffff811b49af>] do_signal+0x7f/0x18f0
 [<ffffffff810039bf>] exit_to_usermode_loop+0xbf/0x150 arch/x86/entry/common.c:156
 [<     inline     >] prepare_exit_to_usermode arch/x86/entry/common.c:190
 [<ffffffff81006060>] syscall_return_slowpath+0x1a0/0x1e0 arch/x86/entry/common.c:259
 [<ffffffff85e4d726>] entry_SYSCALL_64_fastpath+0xc4/0xc6
Object at ffff8800081b0ec0, in cache L2TP/IPv6 size: 1448
Allocated:
PID = 10987
 [ 1116.897025] [<ffffffff811ddcb6>] save_stack_trace+0x16/0x20
 [ 1116.897025] [<ffffffff8174c736>] save_stack+0x46/0xd0
 [ 1116.897025] [<ffffffff8174c9ad>] kasan_kmalloc+0xad/0xe0
 [ 1116.897025] [<ffffffff8174cee2>] kasan_slab_alloc+0x12/0x20
 [ 1116.897025] [<     inline     >] slab_post_alloc_hook mm/slab.h:417
 [ 1116.897025] [<     inline     >] slab_alloc_node mm/slub.c:2708
 [ 1116.897025] [<     inline     >] slab_alloc mm/slub.c:2716
 [ 1116.897025] [<ffffffff817476a8>] kmem_cache_alloc+0xc8/0x2b0 mm/slub.c:2721
 [ 1116.897025] [<ffffffff84c4f6a9>] sk_prot_alloc+0x69/0x2b0 net/core/sock.c:1326
 [ 1116.897025] [<ffffffff84c58ac8>] sk_alloc+0x38/0xae0 net/core/sock.c:1388
 [ 1116.897025] [<ffffffff851ddf67>] inet6_create+0x2d7/0x1000 net/ipv6/af_inet6.c:182
 [ 1116.897025] [<ffffffff84c4af7b>] __sock_create+0x37b/0x640 net/socket.c:1153
 [ 1116.897025] [<     inline     >] sock_create net/socket.c:1193
 [ 1116.897025] [<     inline     >] SYSC_socket net/socket.c:1223
 [ 1116.897025] [<ffffffff84c4b46f>] SyS_socket+0xef/0x1b0 net/socket.c:1203
 [ 1116.897025] [<ffffffff85e4d685>] entry_SYSCALL_64_fastpath+0x23/0xc6
Freed:
PID = 10987
 [ 1116.897025] [<ffffffff811ddcb6>] save_stack_trace+0x16/0x20
 [ 1116.897025] [<ffffffff8174c736>] save_stack+0x46/0xd0
 [ 1116.897025] [<ffffffff8174cf61>] kasan_slab_free+0x71/0xb0
 [ 1116.897025] [<     inline     >] slab_free_hook mm/slub.c:1352
 [ 1116.897025] [<     inline     >] slab_free_freelist_hook mm/slub.c:1374
 [ 1116.897025] [<     inline     >] slab_free mm/slub.c:2951
 [ 1116.897025] [<ffffffff81748b28>] kmem_cache_free+0xc8/0x330 mm/slub.c:2973
 [ 1116.897025] [<     inline     >] sk_prot_free net/core/sock.c:1369
 [ 1116.897025] [<ffffffff84c541eb>] __sk_destruct+0x32b/0x4f0 net/core/sock.c:1444
 [ 1116.897025] [<ffffffff84c5aca4>] sk_destruct+0x44/0x80 net/core/sock.c:1452
 [ 1116.897025] [<ffffffff84c5ad33>] __sk_free+0x53/0x220 net/core/sock.c:1460
 [ 1116.897025] [<ffffffff84c5af23>] sk_free+0x23/0x30 net/core/sock.c:1471
 [ 1116.897025] [<ffffffff84c5cb6c>] sk_common_release+0x28c/0x3e0 ./include/net/sock.h:1589
 [ 1116.897025] [<ffffffff8579044e>] l2tp_ip6_close+0x1fe/0x290 net/l2tp/l2tp_ip6.c:243
 [ 1116.897025] [<ffffffff850b2dfd>] inet_release+0xed/0x1c0 net/ipv4/af_inet.c:415
 [ 1116.897025] [<ffffffff851dc5a0>] inet6_release+0x50/0x70 net/ipv6/af_inet6.c:422
 [ 1116.897025] [<ffffffff84c4581d>] sock_release+0x8d/0x1d0 net/socket.c:570
 [ 1116.897025] [<ffffffff84c45976>] sock_close+0x16/0x20 net/socket.c:1017
 [ 1116.897025] [<ffffffff817a108c>] __fput+0x28c/0x780 fs/file_table.c:208
 [ 1116.897025] [<ffffffff817a1605>] ____fput+0x15/0x20 fs/file_table.c:244
 [ 1116.897025] [<ffffffff813774f9>] task_work_run+0xf9/0x170
 [ 1116.897025] [<ffffffff81324aae>] do_exit+0x85e/0x2a00
 [ 1116.897025] [<ffffffff81326dc8>] do_group_exit+0x108/0x330
 [ 1116.897025] [<ffffffff81348cf7>] get_signal+0x617/0x17a0 kernel/signal.c:2307
 [ 1116.897025] [<ffffffff811b49af>] do_signal+0x7f/0x18f0
 [ 1116.897025] [<ffffffff810039bf>] exit_to_usermode_loop+0xbf/0x150 arch/x86/entry/common.c:156
 [ 1116.897025] [<     inline     >] prepare_exit_to_usermode arch/x86/entry/common.c:190
 [ 1116.897025] [<ffffffff81006060>] syscall_return_slowpath+0x1a0/0x1e0 arch/x86/entry/common.c:259
 [ 1116.897025] [<ffffffff85e4d726>] entry_SYSCALL_64_fastpath+0xc4/0xc6
Memory state around the buggy address:
 ffff8800081b0d80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
 ffff8800081b0e00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
>ffff8800081b0e80: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb
                                                    ^
 ffff8800081b0f00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
 ffff8800081b0f80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
==================================================================

The same issue exists with l2tp_ip_bind() and l2tp_ip_bind_table.

Fixes: c51ce49735c1 ("l2tp: fix oops in L2TP IP sockets for connect() AF_UNSPEC case")
Reported-by: Baozeng Ding <sploving1@gmail.com>
Reported-by: Andrey Konovalov <andreyknvl@google.com>
Tested-by: Baozeng Ding <sploving1@gmail.com>
Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
---
 net/l2tp/l2tp_ip.c  | 5 +++--
 net/l2tp/l2tp_ip6.c | 5 +++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index fce25af..982f6c4 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -251,8 +251,6 @@ static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	int ret;
 	int chk_addr_ret;
 
-	if (!sock_flag(sk, SOCK_ZAPPED))
-		return -EINVAL;
 	if (addr_len < sizeof(struct sockaddr_l2tpip))
 		return -EINVAL;
 	if (addr->l2tp_family != AF_INET)
@@ -267,6 +265,9 @@ static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	read_unlock_bh(&l2tp_ip_lock);
 
 	lock_sock(sk);
+	if (!sock_flag(sk, SOCK_ZAPPED))
+		goto out;
+
 	if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_l2tpip))
 		goto out;
 
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index ad3468c..9978d01 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -269,8 +269,6 @@ static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	int addr_type;
 	int err;
 
-	if (!sock_flag(sk, SOCK_ZAPPED))
-		return -EINVAL;
 	if (addr->l2tp_family != AF_INET6)
 		return -EINVAL;
 	if (addr_len < sizeof(*addr))
@@ -296,6 +294,9 @@ static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	lock_sock(sk);
 
 	err = -EINVAL;
+	if (!sock_flag(sk, SOCK_ZAPPED))
+		goto out_unlock;
+
 	if (sk->sk_state != TCP_CLOSE)
 		goto out_unlock;
 
-- 
2.10.2

^ permalink raw reply related

* Potential deadlock BUG in drivers/net/wireless/st/cw1200/sta.c (Linux 4.9)
From: Iago Abal @ 2016-11-18 21:58 UTC (permalink / raw)
  To: Kalle Valo
  Cc: Solomon Peachy, linux-wireless-u79uwXL29TY76Z2rM5mHXA,
	netdev-u79uwXL29TY76Z2rM5mHXA

Hi,

With the help of a static bug finder (EBA -
https://github.com/models-team/eba) I have found a potential deadlock
in drivers/net/wireless/st/cw1200/
sta.c. This happens due to a recursive mutex_lock on `priv->conf_mutex'.

If this is indeed a bug, I will be happy to help with a patch.

A quick (not elegant) fix could be to unlock before the call to
`cw1200_do_unjoin' in line 1174, and lock again afterwards. It seems
that `cw1200_join_complete' is always called with `priv->conf_mutex'
held. Another option could be to add a Boolean parameter to
`cw1200_do_unjoin' to choose whether this function should take the
lock itself. Yet another option would be to have a
`__cw1200_do_unjoin' that does not lock, and make `cw1200_do_unjoin' a
wrapper over this that adds the locking; `cw1200_join_complete' would
call `__cw1200_do_unjoin' instead.

Someone who is actually familiar with this code may have a better
proposal though.

The trace is as follows:

1. Function `cw1200_join_complete_work' takes the first lock in line 1189:

    // see https://github.com/torvalds/linux/blob/v4.9-rc5/drivers/net/wireless/st/cw1200/sta.c#L1189
    mutex_lock(& priv->conf_mutex);

2. and subsequently calls `cw1200_join_complete';
3. which calls `cw1200_do_unjoin' in line 1174;
4. and this latter function takes the lock for the second time in line 1387:

    // see https://github.com/torvalds/linux/blob/v4.9-rc5/drivers/net/wireless/st/cw1200/sta.c#L1387
    mutex_lock(& priv->conf_mutex);

Hope it helps!

--
iago

^ permalink raw reply

* [PATCH] net: macb: add check for dma mapping error in start_xmit()
From: Alexey Khoroshilov @ 2016-11-18 22:40 UTC (permalink / raw)
  To: Nicolas Ferre; +Cc: Alexey Khoroshilov, netdev, linux-kernel, ldv-project

at91ether_start_xmit() does not check for dma mapping errors.

Found by Linux Driver Verification project (linuxtesting.org).

Signed-off-by: Alexey Khoroshilov <khoroshilov@ispras.ru>
---
 drivers/net/ethernet/cadence/macb.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c
index b32444a3ed79..533653bd7aec 100644
--- a/drivers/net/ethernet/cadence/macb.c
+++ b/drivers/net/ethernet/cadence/macb.c
@@ -2673,6 +2673,12 @@ static int at91ether_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		lp->skb_length = skb->len;
 		lp->skb_physaddr = dma_map_single(NULL, skb->data, skb->len,
 							DMA_TO_DEVICE);
+		if (dma_mapping_error(NULL, lp->skb_physaddr)) {
+			dev_kfree_skb_any(skb);
+			dev->stats.tx_dropped++;
+			netdev_err(dev, "%s: DMA mapping error\n", __func__);
+			return NETDEV_TX_OK;
+		}
 
 		/* Set address of the data in the Transmit Address register */
 		macb_writel(lp, TAR, lp->skb_physaddr);
-- 
2.7.4

^ permalink raw reply related

* [RFC 00/10] HFI Virtual Network Interface Controller (VNIC)
From: Vishwanathapura, Niranjana @ 2016-11-18 22:42 UTC (permalink / raw)
  To: Doug Ledford
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, netdev-u79uwXL29TY76Z2rM5mHXA,
	Dennis Dalessandro

Intel Omni-Path Host Fabric Interface (HFI) Virtual Network Interface
Controller (VNIC) feature supports Ethernet functionality over Omni-Path
fabric by encapsulating the Ethernet packets between HFI nodes.

The patterns of exchanges of Omni-Path encapsulated Ethernet packets
involves one or more virtual Ethernet switches overlaid on the Omni-Path
fabric topology. A subset of HFI nodes on the Omni-Path fabric are
permitted to exchange encapsulated Ethernet packets across a particular
virtual Ethernet switch. The virtual Ethernet switches are logical
abstractions achieved by configuring the HFI nodes on the fabric for
header generation and processing. In the simplest configuration all HFI
nodes across the fabric exchange encapsulated Ethernet packets over a
single virtual Ethernet switch. A virtual Ethernet switch, is effectively
an independent Ethernet network. The configuration is performed by an
Ethernet Manager (EM) which is part of the trusted Fabric Manager (FM)
application. HFI nodes can have multiple VNICs each connected to a
different virtual Ethernet switch. The below diagram presents a case
of two virtual Ethernet switches with two HFI nodes.

                             +-------------------+
                             |      Subnet/      |
                             |     Ethernet      |
                             |      Manager      |
                             +-------------------+
                                /          /
                              /           /
                            /            /
                          /             /
+-----------------------------+  +------------------------------+
|  Virtual Ethernet Switch    |  |  Virtual Ethernet Switch     |
|  +---------+    +---------+ |  | +---------+    +---------+   |
|  | VPORT   |    |  VPORT  | |  | |  VPORT  |    |  VPORT  |   |
+--+---------+----+---------+-+  +-+---------+----+---------+---+
         |                 \        /                 |
         |                   \    /                   |
         |                     \/                     |
         |                    /  \                    |
         |                  /      \                  |
     +-----------+------------+  +-----------+------------+
     |   VNIC    |    VNIC    |  |    VNIC   |    VNIC    |
     +-----------+------------+  +-----------+------------+
     |          HFI           |  |          HFI           |
     +------------------------+  +------------------------+

Intel HFI VNIC software design is presented in the below diagram.
HFI VNIC functionality has a HW dependent component and a HW
independent component. HFI VNIC Bus module decouples these two
functionalities.

The HW dependent VNIC functionality is part of the HFI1 driver. It
implements the bus operations to do various tasks including HW resource
allocation for VNIC functionality and actual transmission and reception
of encapsulated Ethernet packets over the fabric. It creates a control
device (per HFI) on the HFI VNIC bus for the control plane operations
and VNIC devices for the data plane. Each VNIC device on the HFI VNIC
bus is addressed by the HFI instance, HFI port, and a VNIC port number
on the HFI port.

The HFI VNIC module implements the HW independent VNIC functionality.
It consists of two drivers. The VNIC Ethernet Management Agent (VEMA)
driver binds with the control device on the VNIC bus and interfaces with
the Infiniband MAD stack. It exchanges the management information with
the Ethernet Manager (EM). The VNIC netdev driver binds with the VNIC
devices on the HFI VNIC bus and interfaces with the Linux network stack,
thus providing standard Ethernet network interfaces. The VNIC netdev
driver encapsulates the Ethernet packets with an Omni-Path header before
passing them to the HFI1 driver for transmission. Similarly, it
de-encapsulates the received Omni-Path packets before passing them to the
network stack. For each VNIC interface, the information required for
encapsulation is configured by EM via VEMA MAD interface.


        +-------------------+ +----------------------+
        |                   | |       Linux          |
        |     IB MAD        | |      Network         |
        |                   | |       Stack          |
        +-------------------+ +----------------------+
                 |                       |
                 |                       |
        +--------------------------------------------+
        |                                            |
        |             HFI VNIC Module                |
        |    (HFI VNIC Netdev and EMA drivers)       |
        |                                            |
        +--------------------------------------------+
                             |
                             |
        +--------------------------------------------+
        |              HFI VNIC Bus                  |
        +--------------------------------------------+
                             |
                             |
        +--------------------------------------------+
        |                                            |
        |      HFI1 Driver with VNIC support         |
        |                                            |
        +--------------------------------------------+

Vishwanathapura, Niranjana (10):
  IB/hfi-vnic: Virtual Network Interface Controller (VNIC) documentation
  IB/hfi-vnic: Virtual Network Interface Controller (VNIC) Bus driver
  IB/hfi-vnic: Virtual Network Interface Controller (VNIC) netdev driver
  IB/hfi-vnic: VNIC Ethernet Management (EM) structure definitions
  IB/hfi-vnic: VNIC statistics support
  IB/hfi-vnic: VNIC MAC table support
  IB/hfi-vnic: VNIC Ethernet Management Agent (VEMA) interface
  IB/hfi-vnic: VNIC Ethernet Management Agent (VEMA) driver
  IB/hfi1: Virtual Network Interface Controller (VNIC) support
  IB/hfi1: VNIC SDMA support

 Documentation/infiniband/hfi_vnic.txt              |   97 ++
 MAINTAINERS                                        |    7 +
 drivers/infiniband/Kconfig                         |    2 +
 drivers/infiniband/hw/hfi1/Kconfig                 |    2 +-
 drivers/infiniband/hw/hfi1/Makefile                |    3 +-
 drivers/infiniband/hw/hfi1/aspm.h                  |   13 +-
 drivers/infiniband/hw/hfi1/chip.c                  |  270 +++++-
 drivers/infiniband/hw/hfi1/chip.h                  |    2 +
 drivers/infiniband/hw/hfi1/debugfs.c               |    6 +-
 drivers/infiniband/hw/hfi1/driver.c                |   78 +-
 drivers/infiniband/hw/hfi1/file_ops.c              |   25 +-
 drivers/infiniband/hw/hfi1/hfi.h                   |   51 +-
 drivers/infiniband/hw/hfi1/init.c                  |   44 +-
 drivers/infiniband/hw/hfi1/mad.c                   |    8 +-
 drivers/infiniband/hw/hfi1/pio.c                   |   17 +
 drivers/infiniband/hw/hfi1/pio.h                   |    6 +
 drivers/infiniband/hw/hfi1/sysfs.c                 |    2 +-
 drivers/infiniband/hw/hfi1/user_exp_rcv.c          |    6 +-
 drivers/infiniband/hw/hfi1/user_pages.c            |    3 +-
 drivers/infiniband/hw/hfi1/vnic.h                  |  183 ++++
 drivers/infiniband/hw/hfi1/vnic_device.c           |  168 ++++
 drivers/infiniband/hw/hfi1/vnic_main.c             |  573 +++++++++++
 drivers/infiniband/hw/hfi1/vnic_sdma.c             |  320 ++++++
 drivers/infiniband/sw/Makefile                     |    1 +
 drivers/infiniband/sw/intel/vnic/Makefile          |    2 +
 drivers/infiniband/sw/intel/vnic/hfi_vnic/Kconfig  |    8 +
 drivers/infiniband/sw/intel/vnic/hfi_vnic/Makefile |    8 +
 .../sw/intel/vnic/hfi_vnic/hfi_vnic_encap.c        |  490 ++++++++++
 .../sw/intel/vnic/hfi_vnic/hfi_vnic_encap.h        |  510 ++++++++++
 .../sw/intel/vnic/hfi_vnic/hfi_vnic_ethtool.c      |  212 ++++
 .../sw/intel/vnic/hfi_vnic/hfi_vnic_internal.h     |  422 ++++++++
 .../sw/intel/vnic/hfi_vnic/hfi_vnic_netdev.c       |  883 +++++++++++++++++
 .../sw/intel/vnic/hfi_vnic/hfi_vnic_vema.c         | 1024 ++++++++++++++++++++
 .../sw/intel/vnic/hfi_vnic/hfi_vnic_vema_iface.c   |  385 ++++++++
 .../infiniband/sw/intel/vnic/hfi_vnic_bus/Kconfig  |    8 +
 .../infiniband/sw/intel/vnic/hfi_vnic_bus/Makefile |    5 +
 .../sw/intel/vnic/hfi_vnic_bus/hfi_vnic_bus.c      |  366 +++++++
 .../infiniband/sw/intel/vnic/include/hfi_vnic.h    |  282 ++++++
 include/rdma/opa_port_info.h                       |    2 +-
 39 files changed, 6395 insertions(+), 99 deletions(-)
 create mode 100644 Documentation/infiniband/hfi_vnic.txt
 create mode 100644 drivers/infiniband/hw/hfi1/vnic.h
 create mode 100644 drivers/infiniband/hw/hfi1/vnic_device.c
 create mode 100644 drivers/infiniband/hw/hfi1/vnic_main.c
 create mode 100644 drivers/infiniband/hw/hfi1/vnic_sdma.c
 create mode 100644 drivers/infiniband/sw/intel/vnic/Makefile
 create mode 100644 drivers/infiniband/sw/intel/vnic/hfi_vnic/Kconfig
 create mode 100644 drivers/infiniband/sw/intel/vnic/hfi_vnic/Makefile
 create mode 100644 drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_encap.c
 create mode 100644 drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_encap.h
 create mode 100644 drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_ethtool.c
 create mode 100644 drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_internal.h
 create mode 100644 drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_netdev.c
 create mode 100644 drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_vema.c
 create mode 100644 drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_vema_iface.c
 create mode 100644 drivers/infiniband/sw/intel/vnic/hfi_vnic_bus/Kconfig
 create mode 100644 drivers/infiniband/sw/intel/vnic/hfi_vnic_bus/Makefile
 create mode 100644 drivers/infiniband/sw/intel/vnic/hfi_vnic_bus/hfi_vnic_bus.c
 create mode 100644 drivers/infiniband/sw/intel/vnic/include/hfi_vnic.h

-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* [RFC 01/10] IB/hfi-vnic: Virtual Network Interface Controller (VNIC) documentation
From: Vishwanathapura, Niranjana @ 2016-11-18 22:42 UTC (permalink / raw)
  To: Doug Ledford
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, netdev-u79uwXL29TY76Z2rM5mHXA,
	Dennis Dalessandro, Niranjana Vishwanathapura
In-Reply-To: <1479508938-63799-1-git-send-email-niranjana.vishwanathapura-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>

Add HFI VNIC design document explaining the VNIC architecture and the
driver design.

Change-Id: I7baa39444579dc582fe1e49b86e9cfc71f0a41a4
Reviewed-by: Dennis Dalessandro <dennis.dalessandro-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
 Documentation/infiniband/hfi_vnic.txt | 97 +++++++++++++++++++++++++++++++++++
 1 file changed, 97 insertions(+)
 create mode 100644 Documentation/infiniband/hfi_vnic.txt

diff --git a/Documentation/infiniband/hfi_vnic.txt b/Documentation/infiniband/hfi_vnic.txt
new file mode 100644
index 0000000..3501288
--- /dev/null
+++ b/Documentation/infiniband/hfi_vnic.txt
@@ -0,0 +1,97 @@
+Intel Omni-Path Host Fabric Interface (HFI) Virtual Network Interface
+Controller (VNIC) feature supports Ethernet functionality over Omni-Path
+fabric by encapsulating the Ethernet packets between HFI nodes.
+
+The patterns of exchanges of Omni-Path encapsulated Ethernet packets
+involves one or more virtual Ethernet switches overlaid on the Omni-Path
+fabric topology. A subset of HFI nodes on the Omni-Path fabric are
+permitted to exchange encapsulated Ethernet packets across a particular
+virtual Ethernet switch. The virtual Ethernet switches are logical
+abstractions achieved by configuring the HFI nodes on the fabric for
+header generation and processing. In the simplest configuration all HFI
+nodes across the fabric exchange encapsulated Ethernet packets over a
+single virtual Ethernet switch. A virtual Ethernet switch, is effectively
+an independent Ethernet network. The configuration is performed by an
+Ethernet Manager (EM) which is part of the trusted Fabric Manager (FM)
+application. HFI nodes can have multiple VNICs each connected to a
+different virtual Ethernet switch. The below diagram presents a case
+of two virtual Ethernet switches with two HFI nodes.
+
+                             +-------------------+
+                             |      Subnet/      |
+                             |     Ethernet      |
+                             |      Manager      |
+                             +-------------------+
+                                /          /
+                              /           /
+                            /            /
+                          /             /
++-----------------------------+  +------------------------------+
+|  Virtual Ethernet Switch    |  |  Virtual Ethernet Switch     |
+|  +---------+    +---------+ |  | +---------+    +---------+   |
+|  | VPORT   |    |  VPORT  | |  | |  VPORT  |    |  VPORT  |   |
++--+---------+----+---------+-+  +-+---------+----+---------+---+
+         |                 \        /                 |
+         |                   \    /                   |
+         |                     \/                     |
+         |                    /  \                    |
+         |                  /      \                  |
+     +-----------+------------+  +-----------+------------+
+     |   VNIC    |    VNIC    |  |    VNIC   |    VNIC    |
+     +-----------+------------+  +-----------+------------+
+     |          HFI           |  |          HFI           |
+     +------------------------+  +------------------------+
+
+Intel HFI VNIC software design is presented in the below diagram.
+HFI VNIC functionality has a HW dependent component and a HW
+independent component. HFI VNIC Bus module decouples these two
+functionalities.
+
+The HW dependent VNIC functionality is part of the HFI1 driver. It
+implements the bus operations to do various tasks including HW resource
+allocation for VNIC functionality and actual transmission and reception
+of encapsulated Ethernet packets over the fabric. It creates a control
+device (per HFI) on the HFI VNIC bus for the control plane operations
+and VNIC devices for the data plane. Each VNIC device on the HFI VNIC
+bus is addressed by the HFI instance, HFI port, and a VNIC port number
+on the HFI port.
+
+The HFI VNIC module implements the HW independent VNIC functionality.
+It consists of two drivers. The VNIC Ethernet Management Agent (VEMA)
+driver binds with the control device on the VNIC bus and interfaces with
+the Infiniband MAD stack. It exchanges the management information with
+the Ethernet Manager (EM). The VNIC netdev driver binds with the VNIC
+devices on the HFI VNIC bus and interfaces with the Linux network stack,
+thus providing standard Ethernet network interfaces. The VNIC netdev
+driver encapsulates the Ethernet packets with an Omni-Path header before
+passing them to the HFI1 driver for transmission. Similarly, it
+de-encapsulates the received Omni-Path packets before passing them to the
+network stack. For each VNIC interface, the information required for
+encapsulation is configured by EM via VEMA MAD interface.
+
+
+        +-------------------+ +----------------------+
+        |                   | |       Linux          |
+        |     IB MAD        | |      Network         |
+        |                   | |       Stack          |
+        +-------------------+ +----------------------+
+                 |                       |
+                 |                       |
+        +--------------------------------------------+
+        |                                            |
+        |             HFI VNIC Module                |
+        |    (HFI VNIC Netdev and EMA drivers)       |
+        |                                            |
+        +--------------------------------------------+
+                             |
+                             |
+        +--------------------------------------------+
+        |              HFI VNIC Bus                  |
+        +--------------------------------------------+
+                             |
+                             |
+        +--------------------------------------------+
+        |                                            |
+        |      HFI1 Driver with VNIC support         |
+        |                                            |
+        +--------------------------------------------+
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [RFC 02/10] IB/hfi-vnic: Virtual Network Interface Controller (VNIC) Bus driver
From: Vishwanathapura, Niranjana @ 2016-11-18 22:42 UTC (permalink / raw)
  To: Doug Ledford
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, netdev-u79uwXL29TY76Z2rM5mHXA,
	Dennis Dalessandro, Niranjana Vishwanathapura
In-Reply-To: <1479508938-63799-1-git-send-email-niranjana.vishwanathapura-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>

HFI VNIC bus driver interfaces between hardware independent VNIC
functionality and the hardware dependent VNIC functionality.
Support creation of Intel HFI VNIC devices and binding with Intel
HFI VNIC drivers. Define the bus operations the HFI VNIC device
should support.

Change-Id: I91f65d0957d4866b133ee2b6b5246c49cbc0ba69
Reviewed-by: Dennis Dalessandro <dennis.dalessandro-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
 MAINTAINERS                                        |   7 +
 drivers/infiniband/Kconfig                         |   1 +
 drivers/infiniband/sw/Makefile                     |   1 +
 drivers/infiniband/sw/intel/vnic/Makefile          |   1 +
 .../infiniband/sw/intel/vnic/hfi_vnic_bus/Kconfig  |   8 +
 .../infiniband/sw/intel/vnic/hfi_vnic_bus/Makefile |   5 +
 .../sw/intel/vnic/hfi_vnic_bus/hfi_vnic_bus.c      | 366 +++++++++++++++++++++
 .../infiniband/sw/intel/vnic/include/hfi_vnic.h    | 282 ++++++++++++++++
 8 files changed, 671 insertions(+)
 create mode 100644 drivers/infiniband/sw/intel/vnic/Makefile
 create mode 100644 drivers/infiniband/sw/intel/vnic/hfi_vnic_bus/Kconfig
 create mode 100644 drivers/infiniband/sw/intel/vnic/hfi_vnic_bus/Makefile
 create mode 100644 drivers/infiniband/sw/intel/vnic/hfi_vnic_bus/hfi_vnic_bus.c
 create mode 100644 drivers/infiniband/sw/intel/vnic/include/hfi_vnic.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 3d838cf..8c37878 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5628,6 +5628,13 @@ F:	drivers/block/cciss*
 F:	include/linux/cciss_ioctl.h
 F:	include/uapi/linux/cciss_ioctl.h
 
+HFI-VNIC DRIVER
+M:	Dennis Dalessandro <dennis.dalessandro-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
+M:	Niranjana Vishwanathapura <niranjana.vishwanathapura-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
+L:	linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
+S:	Supported
+F:	drivers/infiniband/sw/intel/vnic
+
 HFI1 DRIVER
 M:	Mike Marciniszyn <mike.marciniszyn-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
 M:	Dennis Dalessandro <dennis.dalessandro-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index fb3fb89..7fe9095 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -84,6 +84,7 @@ source "drivers/infiniband/ulp/srpt/Kconfig"
 source "drivers/infiniband/ulp/iser/Kconfig"
 source "drivers/infiniband/ulp/isert/Kconfig"
 
+source "drivers/infiniband/sw/intel/vnic/hfi_vnic_bus/Kconfig"
 source "drivers/infiniband/sw/rdmavt/Kconfig"
 source "drivers/infiniband/sw/rxe/Kconfig"
 
diff --git a/drivers/infiniband/sw/Makefile b/drivers/infiniband/sw/Makefile
index 8b095b2..4fa6058 100644
--- a/drivers/infiniband/sw/Makefile
+++ b/drivers/infiniband/sw/Makefile
@@ -1,2 +1,3 @@
 obj-$(CONFIG_INFINIBAND_RDMAVT)		+= rdmavt/
 obj-$(CONFIG_RDMA_RXE)			+= rxe/
+obj-$(CONFIG_INFINIBAND)		+= intel/vnic/
diff --git a/drivers/infiniband/sw/intel/vnic/Makefile b/drivers/infiniband/sw/intel/vnic/Makefile
new file mode 100644
index 0000000..083e55b
--- /dev/null
+++ b/drivers/infiniband/sw/intel/vnic/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_HFI_VNIC_BUS)		+= hfi_vnic_bus/
diff --git a/drivers/infiniband/sw/intel/vnic/hfi_vnic_bus/Kconfig b/drivers/infiniband/sw/intel/vnic/hfi_vnic_bus/Kconfig
new file mode 100644
index 0000000..85952d6
--- /dev/null
+++ b/drivers/infiniband/sw/intel/vnic/hfi_vnic_bus/Kconfig
@@ -0,0 +1,8 @@
+config HFI_VNIC_BUS
+	tristate "Intel HFI VNIC bus support"
+	depends on X86_64
+	---help---
+	This is HFI Virtual Network Interface Controller (VNIC) Bus driver
+	for binding Intel HFI VNIC devices and drivers. It separates the
+	hardware independent VNIC functionaity from the hw dependent. It
+	provides APIs to register and unregister VNIC devices and drivers.
diff --git a/drivers/infiniband/sw/intel/vnic/hfi_vnic_bus/Makefile b/drivers/infiniband/sw/intel/vnic/hfi_vnic_bus/Makefile
new file mode 100644
index 0000000..5fac098
--- /dev/null
+++ b/drivers/infiniband/sw/intel/vnic/hfi_vnic_bus/Makefile
@@ -0,0 +1,5 @@
+# Makefile - Intel HFI Virtual Network Controller bus driver
+# Copyright(c) 2016, Intel Corporation.
+#
+ccflags-y += -I$(src)/../include
+obj-$(CONFIG_HFI_VNIC_BUS) += hfi_vnic_bus.o
diff --git a/drivers/infiniband/sw/intel/vnic/hfi_vnic_bus/hfi_vnic_bus.c b/drivers/infiniband/sw/intel/vnic/hfi_vnic_bus/hfi_vnic_bus.c
new file mode 100644
index 0000000..5455fc7
--- /dev/null
+++ b/drivers/infiniband/sw/intel/vnic/hfi_vnic_bus/hfi_vnic_bus.c
@@ -0,0 +1,366 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains HFI Virtual Network Controller bus driver
+ */
+#include <linux/idr.h>
+#include <linux/module.h>
+
+#include "hfi_vnic.h"
+
+#define HFI_VNIC_ID_HFI_SHFT     16
+#define HFI_VNIC_ID_PORT_SHFT     8
+#define HFI_VNIC_GET_ID(hfi, port, vport)   (((hfi) << HFI_VNIC_ID_HFI_SHFT) | \
+				((port) << HFI_VNIC_ID_PORT_SHFT) | (vport))
+
+/* Unique numbering for vnic control devices */
+static struct ida hfi_vnic_ctrl_ida;
+
+/* Unique numbering for vnic devices */
+static struct idr hfi_vnic_idr;
+
+/* hfi vnic device type */
+static const struct device_type hfi_vnic_dev_type = {
+	.name = "hfi_vnic_dev",
+};
+
+/* hfi vnic control device type */
+static const struct device_type hfi_vnic_ctrl_dev_type = {
+	.name = "hfi_vnic_ctrl_dev",
+};
+
+static inline enum hfi_vnic_drv_type
+hfi_vnic_get_drv_type(struct device_driver *drv)
+{
+	return container_of(drv, struct hfi_vnic_drvwrap, driver)->type;
+}
+
+/* hfi_vnic_match_device - device, driver match function */
+static int hfi_vnic_match_device(struct device *dev, struct device_driver *drv)
+{
+	enum hfi_vnic_drv_type drv_type = hfi_vnic_get_drv_type(drv);
+
+	if ((dev->type == &hfi_vnic_dev_type) && (drv_type == HFI_VNIC_DRV))
+		return 1;
+	else if ((dev->type == &hfi_vnic_ctrl_dev_type) &&
+		 (drv_type == HFI_VNIC_CTRL_DRV))
+		return 1;
+
+	return 0;
+}
+
+/* hfi vnic bus structure */
+static struct bus_type hfi_vnic_bus = {
+	.name = "hfi_vnic_bus",
+	.match = hfi_vnic_match_device,
+};
+
+static void hfi_vnic_dev_release(struct device *dev)
+{
+	struct hfi_vnic_device *vdev = container_of(dev,
+					    struct hfi_vnic_device, dev);
+	kfree(vdev);
+}
+
+/**
+ * hfi_vnic_get_dev - return hfi vnic device
+ * @cdev: pointer to the control device
+ * @port_num: hfi port number
+ * @vport_num: vnic port number
+ *
+ * Return pointer to the vnic device on the given hfi instance
+ * (control device) and hfi port, with specified vnic port number.
+ *
+ */
+struct hfi_vnic_device *hfi_vnic_get_dev(struct hfi_vnic_ctrl_device *cdev,
+					 u8 port_num, u8 vport_num)
+{
+	int id;
+
+	if (vport_num == HFI_MAX_NUM_VNICS)
+		return NULL;
+
+	id = HFI_VNIC_GET_ID(cdev->id, port_num, vport_num);
+	return idr_find(&hfi_vnic_idr, id);
+}
+EXPORT_SYMBOL(hfi_vnic_get_dev);
+
+/**
+ * hfi_vnic_device_register - register hfi vnic device on the hfi vnic bus
+ * @cdev: pointer to the control device
+ * @port_num: hfi port number
+ * @vport_num: vnic port number
+ * @priv: pointer to the device private data
+ * @ops: pointer to the bus operations
+ * @hfi_info: hfi device hw specific information
+ *
+ * A hfi vnic device is created and registered on the hfi vnic bus.
+ * The device is assigned a unique id based on the hfi instance (id of the
+ * control device associated with it), hfi port number and the vnic port
+ * number on the given hfi port.
+ */
+struct hfi_vnic_device *hfi_vnic_device_register(
+					 struct hfi_vnic_ctrl_device *cdev,
+					 u8 port_num, u8 vport_num, void *priv,
+					 struct hfi_vnic_ops *ops,
+					 struct hfi_vnic_info hfi_info)
+{
+	struct hfi_vnic_device *vdev;
+	int id, rc;
+
+	if (vport_num == HFI_MAX_NUM_VNICS)
+		return ERR_PTR(-EINVAL);
+
+	id = HFI_VNIC_GET_ID(cdev->id, port_num, vport_num);
+	vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
+	if (!vdev)
+		return ERR_PTR(-ENOMEM);
+
+	rc = idr_alloc(&hfi_vnic_idr, vdev, id, (id + 1), GFP_NOWAIT);
+	if (rc < 0) {
+		kfree(vdev);
+		goto idr_err;
+	}
+
+	vdev->dev.release = hfi_vnic_dev_release;
+	vdev->dev.bus = &hfi_vnic_bus;
+
+	vdev->id = id;
+	vdev->cdev = cdev;
+	vdev->bus_ops = ops;
+	vdev->hfi_priv = priv;
+	vdev->hfi_info = hfi_info;
+	vdev->port_num = port_num;
+	vdev->vport_num = vport_num;
+	vdev->dev.parent = cdev->dev.parent;
+
+	vdev->dev.type = &hfi_vnic_dev_type;
+	dev_set_name(&vdev->dev, "hfi_vnic_%02x.%02x.%02x",
+		     cdev->id, port_num, vport_num);
+
+	rc = device_register(&vdev->dev);
+	if (rc) {
+		put_device(&vdev->dev);
+		goto dev_err;
+	}
+
+	dev_info(&vdev->dev, "added vport\n");
+	return vdev;
+
+dev_err:
+	idr_remove(&hfi_vnic_idr, id);
+idr_err:
+	return ERR_PTR(rc);
+}
+EXPORT_SYMBOL(hfi_vnic_device_register);
+
+/**
+ * hfi_vnic_device_unregister - remove hfi vnic device from the hfi vnic bus
+ * @vdev: pointer to hfi vnic device
+ */
+void hfi_vnic_device_unregister(struct hfi_vnic_device *vdev)
+{
+	int id = vdev->id;
+
+	dev_info(&vdev->dev, "removing vport\n");
+	device_unregister(&vdev->dev);
+	idr_remove(&hfi_vnic_idr, id);
+}
+EXPORT_SYMBOL(hfi_vnic_device_unregister);
+
+/**
+ * hfi_vnic_driver_register - register hfi vnic driver on the hfi vnic bus
+ * @drv: pointer to hfi vnic driver
+ */
+int hfi_vnic_driver_register(struct hfi_vnic_driver *drv)
+{
+	drv->drvwrap.driver.bus = &hfi_vnic_bus;
+	return driver_register(&drv->drvwrap.driver);
+}
+EXPORT_SYMBOL(hfi_vnic_driver_register);
+
+/**
+ * hfi_vnic_driver_unregister - remove hfi vnic driver from the hfi vnic bus
+ * @drv: pointer to hfi vnic driver
+ */
+void hfi_vnic_driver_unregister(struct hfi_vnic_driver *drv)
+{
+	driver_unregister(&drv->drvwrap.driver);
+}
+EXPORT_SYMBOL(hfi_vnic_driver_unregister);
+
+static void hfi_vnic_ctrl_dev_release(struct device *dev)
+{
+	struct hfi_vnic_ctrl_device *cdev = container_of(dev,
+					    struct hfi_vnic_ctrl_device, dev);
+	kfree(cdev);
+}
+
+/**
+ * hfi_vnic_ctrl_device_register - register hfi vnic control device
+ * @parent: pointer to the parent device
+ * @ibdev: pointer to the ib device
+ * @num_ports: number of hfi ports
+ * @priv: pointer to the device private data
+ * @ops: pointer to the bus operations
+ *
+ * A hfi vnic control device is created and registered on the hfi vnic bus.
+ * The device is assigned a unique id.
+ */
+struct hfi_vnic_ctrl_device *hfi_vnic_ctrl_device_register(
+					   struct device *parent,
+					   struct ib_device *ibdev,
+					   u8 num_ports, void *priv,
+					   struct hfi_vnic_ctrl_ops *ops)
+{
+	struct hfi_vnic_ctrl_device *cdev;
+	int rc;
+
+	cdev = kzalloc(sizeof(*cdev), GFP_KERNEL);
+	if (!cdev)
+		return ERR_PTR(-ENOMEM);
+
+	rc = ida_simple_get(&hfi_vnic_ctrl_ida, 0, 0, GFP_KERNEL);
+	if (rc < 0) {
+		kfree(cdev);
+		goto ida_err;
+	}
+
+	cdev->id = rc;
+	cdev->dev.release = hfi_vnic_ctrl_dev_release;
+	cdev->dev.bus = &hfi_vnic_bus;
+	cdev->dev.parent = parent;
+
+	cdev->ibdev = ibdev;
+	cdev->num_ports = num_ports;
+	cdev->ctrl_ops = ops;
+	cdev->hfi_priv = priv;
+	dev_set_name(&cdev->dev, "hfi_vnic_ctrl_%02x", cdev->id);
+	cdev->dev.type = &hfi_vnic_ctrl_dev_type;
+	rc = device_register(&cdev->dev);
+	if (rc) {
+		put_device(&cdev->dev);
+		goto dev_err;
+	}
+
+	dev_info(&cdev->dev, "added vnic control port\n");
+	return cdev;
+
+dev_err:
+	ida_simple_remove(&hfi_vnic_ctrl_ida, cdev->id);
+ida_err:
+	return ERR_PTR(rc);
+}
+EXPORT_SYMBOL(hfi_vnic_ctrl_device_register);
+
+/**
+ * hfi_vnic_ctrl_device_unregister - remove hfi vnic control device
+ * @cdev: pointer to hfi vnic control device
+ */
+void hfi_vnic_ctrl_device_unregister(struct hfi_vnic_ctrl_device *cdev)
+{
+	int id = cdev->id;
+
+	dev_info(&cdev->dev, "removing vnic control port\n");
+	device_unregister(&cdev->dev);
+	ida_simple_remove(&hfi_vnic_ctrl_ida, id);
+}
+EXPORT_SYMBOL(hfi_vnic_ctrl_device_unregister);
+
+/**
+ * hfi_vnic_ctrl_driver_register - register hfi vnic control driver
+ * @drv: pointer to hfi vnic control driver
+ */
+int hfi_vnic_ctrl_driver_register(struct hfi_vnic_ctrl_driver *drv)
+{
+	drv->drvwrap.driver.bus = &hfi_vnic_bus;
+	return driver_register(&drv->drvwrap.driver);
+}
+EXPORT_SYMBOL(hfi_vnic_ctrl_driver_register);
+
+/**
+ * hfi_vnic_ctrl_driver_unregister - remove hfi vnic control driver
+ * @drv: pointer to hfi vnic control driver
+ */
+void hfi_vnic_ctrl_driver_unregister(struct hfi_vnic_ctrl_driver *drv)
+{
+	driver_unregister(&drv->drvwrap.driver);
+}
+EXPORT_SYMBOL(hfi_vnic_ctrl_driver_unregister);
+
+/* hfi_vnic_bus_init - initialize the hfi vnic bus drvier */
+static int hfi_vnic_bus_init(void)
+{
+	int rc;
+
+	ida_init(&hfi_vnic_ctrl_ida);
+	idr_init(&hfi_vnic_idr);
+
+	rc = bus_register(&hfi_vnic_bus);
+	if (rc) {
+		pr_err("hfi vnic bus init failed %d\n", rc);
+		idr_destroy(&hfi_vnic_idr);
+		ida_destroy(&hfi_vnic_ctrl_ida);
+	}
+
+	return rc;
+}
+postcore_initcall(hfi_vnic_bus_init);
+
+/* hfi_vnic_bus_deinit - remove the hfi vnic bus drvier */
+static void hfi_vnic_bus_deinit(void)
+{
+	bus_unregister(&hfi_vnic_bus);
+	idr_destroy(&hfi_vnic_idr);
+	ida_destroy(&hfi_vnic_ctrl_ida);
+}
+module_exit(hfi_vnic_bus_deinit);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Intel HFI Virtual Network Controller bus driver");
diff --git a/drivers/infiniband/sw/intel/vnic/include/hfi_vnic.h b/drivers/infiniband/sw/intel/vnic/include/hfi_vnic.h
new file mode 100644
index 0000000..a5a723b
--- /dev/null
+++ b/drivers/infiniband/sw/intel/vnic/include/hfi_vnic.h
@@ -0,0 +1,282 @@
+#ifndef _HFI_VNIC_H
+#define _HFI_VNIC_H
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains HFI Virtual Network Interface Controller (VNIC)
+ * driver interfaces
+ */
+
+#include <linux/etherdevice.h>
+#include <linux/module.h>
+
+/* Maximum possible number of VNICs */
+#define HFI_MAX_NUM_VNICS     255
+
+#define HFI_VNIC_MAX_QUEUE    16
+
+#define HFI_VNIC_CAP_SG   BIT(0)
+
+enum hfi_vnic_drv_type {
+	HFI_VNIC_DRV,          /* VNIC NETDEV driver */
+	HFI_VNIC_CTRL_DRV      /* VNIC control driver */
+};
+
+enum {
+	/* Packet received on queue 0 */
+	HFI_VNIC_EVT_RX0,
+	/* Tx wakeup notification on queue 0 */
+	HFI_VNIC_EVT_TX0
+		= HFI_VNIC_EVT_RX0 + HFI_VNIC_MAX_QUEUE,
+	HFI_VNIC_NUM_EVTS
+		= HFI_VNIC_EVT_TX0 + HFI_VNIC_MAX_QUEUE,
+};
+
+struct hfi_vnic_device;
+struct hfi_vnic_ctrl_device;
+
+typedef void (*hfi_vnic_evt_cb_fn)(struct hfi_vnic_device *vdev, u8 evt);
+
+/**
+ * struct hfi_vnic_ops - HFI HW specific VNIC functions
+ * @init: Initialize the hfi device
+ * @deinit: De-initialize the hfi device
+ * @open: Open vnic hfi device for vnic traffic
+ * @close: Close vnic hfi device for vnic traffic
+ * @put_skb: transmit an skb
+ * @get_skb: receive an skb
+ * @get_read_avail: return number of available to read
+ * @get_write_avail: return whether write space is available or not
+ * @select_queue: select tx queue
+ * @config_notify: enable/disable notification
+ */
+struct hfi_vnic_ops {
+	int (*init)(struct hfi_vnic_device *vdev);
+	void (*deinit)(struct hfi_vnic_device *vdev);
+	int (*open)(struct hfi_vnic_device *vdev,
+		    hfi_vnic_evt_cb_fn cb);
+	void (*close)(struct hfi_vnic_device *vdev);
+	int (*put_skb)(struct hfi_vnic_device *vdev,
+		       u8 q_idx, struct sk_buff *skb);
+	struct sk_buff *(*get_skb)(struct hfi_vnic_device *vdev, u8 q_idx);
+	u16 (*get_read_avail)(struct hfi_vnic_device *vdev, u8 q_idx);
+	bool (*get_write_avail)(struct hfi_vnic_device *vdev, u8 q_idx);
+	u8 (*select_queue)(struct hfi_vnic_device *vdev, u8 vl, u8 entropy);
+	void (*config_notify)(struct hfi_vnic_device *vdev,
+			      u8 evt, bool enable);
+};
+
+/**
+ * struct hfi_vnic_ctrl_ops - HFI HW specific VNIC control functions
+ * @add_vport: add a vnic port device
+ * @rem_vport: remove a vnic port device
+ */
+struct hfi_vnic_ctrl_ops {
+	int (*add_vport)(struct hfi_vnic_ctrl_device *cdev,
+			 u8 port_num, u8 vport_num);
+	void (*rem_vport)(struct hfi_vnic_ctrl_device *cdev,
+			  u8 port_num, u8 vport_num);
+};
+
+/**
+ * struct hfi_vnic_stats - HFI HW specific statistics
+ * @rx_fifo_errors: receive packets dropped due to fifo full
+ * @tx_fifo_errors: transmit packets dropped due to fifo full
+ * @rx_missed_errors: receive packets missed due to no memory
+ * @tx_carrier_errors: packet transmits when STL link is down
+ * @rx_bad_veswid: receive packets with invalid vesw id
+ * @rx_logic_errors: receive packets dropped due to other errors
+ * @tx_logic_errors: transmit packets dropped due to other errors
+ *
+ * This structure holds any statistics information that is
+ * collected by HW specific driver layer.
+ */
+struct hfi_vnic_stats {
+	u64  rx_fifo_errors;
+	u64  tx_fifo_errors;
+	u64  rx_missed_errors;
+	u64  tx_carrier_errors;
+	u64  rx_bad_veswid;
+	u64  rx_logic_errors;
+	u64  tx_logic_errors;
+};
+
+/**
+ * struct hfi_vnic_info - HFI HW specific VNIC information
+ * @cap: capabilities
+ * @num_rx_q: number of receive queues
+ * @num_tx_q: number of transmit queues
+ */
+struct hfi_vnic_info {
+	u32  cap;
+	u8   num_rx_q;
+	u8   num_tx_q;
+};
+
+/**
+ * struct hfi_vnic_device - HFI virtual NIC device
+ * @dev: device
+ * @id: unique hfi vnic device instance
+ * @vesw_id: virtual ethernet switch id
+ * @netdev: pointer to associated netdev
+ * @port_num: hfi port instance
+ * @vport_num: vnic port instance on the hfi port
+ * @cdev: vnic control device pointer
+ * @bus_ops: hfi vnic bus operations
+ * @hfi_priv: hfi private data pointer
+ * @hfi_info: hfi information
+ * @hfi_stats: per queue hfi statistics
+ */
+struct hfi_vnic_device {
+	struct device                dev;
+	int                          id;
+	u16                          vesw_id;
+	struct net_device           *netdev;
+	u8                           port_num;
+	u8                           vport_num;
+
+	struct hfi_vnic_ctrl_device *cdev;
+	struct hfi_vnic_ops         *bus_ops;
+	void                        *hfi_priv;
+	struct hfi_vnic_info         hfi_info;
+	struct hfi_vnic_stats        hfi_stats[HFI_VNIC_MAX_QUEUE];
+};
+
+/**
+ * struct hfi_vnic_ctrl_device - HFI virtual NIC control device
+ * @dev: device
+ * @id: unique hfi vnic control device instance
+ * @ibdev: pointer to ib device
+ * @num_ports: number of hfi ports
+ * @ctrl_ops: hfi vnic control operations
+ * @hfi_priv: hfi private data pointer
+ */
+struct hfi_vnic_ctrl_device {
+	struct device             dev;
+	int                       id;
+
+	struct ib_device         *ibdev;
+	u8                        num_ports;
+
+	struct hfi_vnic_ctrl_ops *ctrl_ops;
+	void                     *hfi_priv;
+};
+
+/**
+ * struct hfi_vnic_drvwrap - HFI vnic driver wrapper
+ * @type: driver type
+ * @driver: device driver
+ */
+struct hfi_vnic_drvwrap {
+	enum hfi_vnic_drv_type  type;
+	struct device_driver    driver;
+};
+
+/**
+ * struct hfi_vnic_driver - HFI virtual NIC driver
+ * @drvwrap: driver wrapper
+ */
+struct hfi_vnic_driver {
+	struct hfi_vnic_drvwrap  drvwrap;
+};
+
+/**
+ * struct hfi_vnic_ctrl_driver -  HFI virtual NIC Control driver
+ * @drvwrap: driver wrapper
+ */
+struct hfi_vnic_ctrl_driver {
+	struct hfi_vnic_drvwrap  drvwrap;
+};
+
+/* VNIC device interface functions */
+int hfi_vnic_driver_register(struct hfi_vnic_driver *drv);
+void hfi_vnic_driver_unregister(struct hfi_vnic_driver *drv);
+
+struct hfi_vnic_device *hfi_vnic_device_register(
+					 struct hfi_vnic_ctrl_device *cdev,
+					 u8 port_num, u8 vport_num, void *priv,
+					 struct hfi_vnic_ops *ops,
+					 struct hfi_vnic_info hfi_info);
+void hfi_vnic_device_unregister(struct hfi_vnic_device *vdev);
+
+struct hfi_vnic_device *hfi_vnic_get_dev(struct hfi_vnic_ctrl_device *cdev,
+					 u8 port_num, u8 vport_num);
+
+/* VNIC control device interface functions */
+int hfi_vnic_ctrl_driver_register(struct hfi_vnic_ctrl_driver *drv);
+void hfi_vnic_ctrl_driver_unregister(struct hfi_vnic_ctrl_driver *drv);
+
+struct hfi_vnic_ctrl_device *hfi_vnic_ctrl_device_register(
+					   struct device *parent,
+					   struct ib_device *ibdev,
+					   u8 num_ports, void *priv,
+					   struct hfi_vnic_ctrl_ops *ops);
+void hfi_vnic_ctrl_device_unregister(struct hfi_vnic_ctrl_device *cdev);
+
+/* hfi_vdev_get - Get module and vdev reference counts */
+static inline int hfi_vdev_get(struct hfi_vnic_device *vdev)
+{
+	struct module *owner = vdev->dev.parent->driver->owner;
+
+	if (owner && !try_module_get(owner))
+		return -ENXIO;
+
+	get_device(&vdev->dev);
+	return 0;
+}
+
+/* hfi_vdev_put - Put module and vdev reference counts */
+static inline void hfi_vdev_put(struct hfi_vnic_device *vdev)
+{
+	struct module *owner = vdev->dev.parent->driver->owner;
+
+	put_device(&vdev->dev);
+	module_put(owner);
+}
+
+#endif /* _HFI_VNIC_H */
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [RFC 03/10] IB/hfi-vnic: Virtual Network Interface Controller (VNIC) netdev driver
From: Vishwanathapura, Niranjana @ 2016-11-18 22:42 UTC (permalink / raw)
  To: Doug Ledford
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, netdev-u79uwXL29TY76Z2rM5mHXA,
	Dennis Dalessandro, Niranjana Vishwanathapura, Sadanand Warrier,
	Sudeep Dutt, Tanya K Jajodia, Andrzej Kacprowski
In-Reply-To: <1479508938-63799-1-git-send-email-niranjana.vishwanathapura-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>

HFI VNIC netdev driver supports Ethernet functionality over Omni-Path
fabric by encapsulating Ethernet packets inside Omni-Path packet header.
It interfaces with the network stack to provide standard Ethernet network
interfaces to the user. It binds with the HFI VNIC device and invokes the
bus operations supported by it.

Change-Id: I2613b2c36e548182828e732181e4bde99b8d01dc
Reviewed-by: Dennis Dalessandro <dennis.dalessandro-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Signed-off-by: Sadanand Warrier <sadanand.warrier-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Signed-off-by: Sudeep Dutt <sudeep.dutt-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Signed-off-by: Tanya K Jajodia <tanya.k.jajodia-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Signed-off-by: Andrzej Kacprowski <andrzej.kacprowski-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
 drivers/infiniband/Kconfig                         |   1 +
 drivers/infiniband/sw/intel/vnic/Makefile          |   1 +
 drivers/infiniband/sw/intel/vnic/hfi_vnic/Kconfig  |   8 +
 drivers/infiniband/sw/intel/vnic/hfi_vnic/Makefile |   7 +
 .../sw/intel/vnic/hfi_vnic/hfi_vnic_encap.c        | 239 +++++++++++
 .../sw/intel/vnic/hfi_vnic/hfi_vnic_encap.h        |  62 +++
 .../sw/intel/vnic/hfi_vnic/hfi_vnic_ethtool.c      |  81 ++++
 .../sw/intel/vnic/hfi_vnic/hfi_vnic_internal.h     | 221 ++++++++++
 .../sw/intel/vnic/hfi_vnic/hfi_vnic_netdev.c       | 469 +++++++++++++++++++++
 9 files changed, 1089 insertions(+)
 create mode 100644 drivers/infiniband/sw/intel/vnic/hfi_vnic/Kconfig
 create mode 100644 drivers/infiniband/sw/intel/vnic/hfi_vnic/Makefile
 create mode 100644 drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_encap.c
 create mode 100644 drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_encap.h
 create mode 100644 drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_ethtool.c
 create mode 100644 drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_internal.h
 create mode 100644 drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_netdev.c

diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index 7fe9095..0c419d2 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -85,6 +85,7 @@ source "drivers/infiniband/ulp/iser/Kconfig"
 source "drivers/infiniband/ulp/isert/Kconfig"
 
 source "drivers/infiniband/sw/intel/vnic/hfi_vnic_bus/Kconfig"
+source "drivers/infiniband/sw/intel/vnic/hfi_vnic/Kconfig"
 source "drivers/infiniband/sw/rdmavt/Kconfig"
 source "drivers/infiniband/sw/rxe/Kconfig"
 
diff --git a/drivers/infiniband/sw/intel/vnic/Makefile b/drivers/infiniband/sw/intel/vnic/Makefile
index 083e55b..bb22e22 100644
--- a/drivers/infiniband/sw/intel/vnic/Makefile
+++ b/drivers/infiniband/sw/intel/vnic/Makefile
@@ -1 +1,2 @@
 obj-$(CONFIG_HFI_VNIC_BUS)		+= hfi_vnic_bus/
+obj-$(CONFIG_HFI_VNIC)			+= hfi_vnic/
diff --git a/drivers/infiniband/sw/intel/vnic/hfi_vnic/Kconfig b/drivers/infiniband/sw/intel/vnic/hfi_vnic/Kconfig
new file mode 100644
index 0000000..d03efc9
--- /dev/null
+++ b/drivers/infiniband/sw/intel/vnic/hfi_vnic/Kconfig
@@ -0,0 +1,8 @@
+config HFI_VNIC
+	tristate "Intel HFI VNIC support"
+	depends on X86_64 && INFINIBAND && HFI_VNIC_BUS
+	---help---
+	This is HFI Virtual Network Interface Controller (VNIC) driver
+	for Ethernet over HFI feature. It implements the HW independent
+	VNIC functionality. It interfaces with Linux stack for data path
+	and IB MAD for the control path.
diff --git a/drivers/infiniband/sw/intel/vnic/hfi_vnic/Makefile b/drivers/infiniband/sw/intel/vnic/hfi_vnic/Makefile
new file mode 100644
index 0000000..a05b2f5
--- /dev/null
+++ b/drivers/infiniband/sw/intel/vnic/hfi_vnic/Makefile
@@ -0,0 +1,7 @@
+# Makefile - Intel HFI Virtual Network Controller driver
+# Copyright(c) 2016, Intel Corporation.
+#
+ccflags-y += -I$(src)/../include
+obj-$(CONFIG_HFI_VNIC) += hfi_vnic.o
+
+hfi_vnic-y := hfi_vnic_netdev.o hfi_vnic_encap.o hfi_vnic_ethtool.o
diff --git a/drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_encap.c b/drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_encap.c
new file mode 100644
index 0000000..9804c6d
--- /dev/null
+++ b/drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_encap.c
@@ -0,0 +1,239 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains HFI VNIC encapsulation/decapsulation function.
+ */
+
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+
+#include "hfi_vnic.h"
+#include "hfi_vnic_internal.h"
+
+/**
+ * union hfi_vnic_bypass_hdr - VNIC bypass header
+ * @slid: source lid
+ * @length: length of packet
+ * @becn: backward explicit congestion notification
+ * @dlid: destination lid
+ * @sc: service class
+ * @fecn: forward explicit congestion notification
+ * @l2: L2 type (2=16B)
+ * @lt: link transfer field
+ * @l4: L4 type
+ * @slid_high: upper 4 bits of source lid
+ * @dlid_high: upper 4 bits of destination lid
+ * @pkey: partition key
+ * @entropy: entropy
+ * @age: packet age
+ * @l4_hdr: L4 header
+ */
+union hfi_vnic_bypass_hdr {
+	struct {
+	struct {
+		uint64_t slid   : 20;
+		uint64_t length : 11;
+		uint64_t becn   : 1;
+		uint64_t dlid   : 20;
+		uint64_t sc     : 5;
+		uint64_t rsvd   : 3;
+		uint64_t fecn   : 1;
+		uint64_t l2     : 2;
+		uint64_t lt     : 1;
+	};
+	struct {
+		uint64_t l4        : 8;
+		uint64_t slid_high : 4;
+		uint64_t dlid_high : 4;
+		uint64_t pkey      : 16;
+		uint64_t entropy   : 16;
+		uint64_t age       : 8;
+		uint64_t rsvd1     : 8;
+	};
+	struct {
+		uint32_t rsvd2  : 16;
+		uint32_t l4_hdr : 16;
+	};
+	} __packed;
+	u32 dw[5];
+};
+
+#define HFI_VNIC_SC_MASK 0x1f
+
+/* hfi_vnic_get_dlid - find and return the DLID */
+static uint32_t hfi_vnic_get_dlid(struct hfi_vnic_adapter *adapter,
+				  struct sk_buff *skb, u8 def_port)
+{
+	struct __hfi_veswport_info *info = &adapter->info;
+	struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb);
+	u32 dlid;
+
+	if (is_multicast_ether_addr(mac_hdr->h_dest)) {
+		dlid = info->vesw.u_mcast_dlid;
+	} else {
+		if (is_local_ether_addr(mac_hdr->h_dest)) {
+			dlid = ((uint32_t)mac_hdr->h_dest[5] << 16) |
+				((uint32_t)mac_hdr->h_dest[4] << 8)  |
+				mac_hdr->h_dest[3];
+			if (unlikely(!dlid))
+				v_warn("Null dlid in MAC address\n");
+		} else if (def_port != HFI_VNIC_INVALID_PORT) {
+			dlid = info->vesw.u_ucast_dlid[def_port];
+		}
+	}
+
+	return dlid;
+}
+
+/* hfi_vnic_get_sc - return the service class */
+static u8 hfi_vnic_get_sc(struct __hfi_veswport_info *info,
+			  struct sk_buff *skb)
+{
+	struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb);
+	u16 vlan_tci;
+	u8 sc;
+
+	if (!__vlan_get_tag(skb, &vlan_tci)) {
+		u8 pcp = HFI_VNIC_VLAN_PCP(vlan_tci);
+
+		if (is_multicast_ether_addr(mac_hdr->h_dest))
+			sc = info->vport.pcp_to_sc_mc[pcp];
+		else
+			sc = info->vport.pcp_to_sc_uc[pcp];
+	} else {
+		if (is_multicast_ether_addr(mac_hdr->h_dest))
+			sc = info->vport.non_vlan_sc_mc;
+		else
+			sc = info->vport.non_vlan_sc_uc;
+	}
+
+	return sc & HFI_VNIC_SC_MASK;
+}
+
+/* hfi_vnic_calc_entropy - calculate the packet entropy */
+u8 hfi_vnic_calc_entropy(struct hfi_vnic_adapter *adapter, struct sk_buff *skb)
+{
+	u16 hash16;
+
+	/*
+	 * Get flow based 16-bit hash and then XOR the upper and lower bytes
+	 * to get the entropy.
+	 * __skb_tx_hash limits qcount to 16 bits. Hence, get 15-bit hash.
+	 */
+	hash16 = __skb_tx_hash(adapter->netdev, skb, BIT(15));
+	return (u8)((hash16 >> 8) ^ (hash16 & 0xff));
+}
+
+/* hfi_vnic_get_def_port - get default port based on entropy */
+static inline u8 hfi_vnic_get_def_port(struct hfi_vnic_adapter *adapter,
+				       u8 entropy)
+{
+	u8 flow_id;
+
+	/* Add the upper and lower 4-bits of entropy to get the flow id */
+	flow_id = ((entropy & 0xf) + (entropy >> 4));
+	return adapter->flow_tbl[flow_id & (HFI_VNIC_FLOW_TBL_SIZE - 1)];
+}
+
+/* Calculate packet length including OPA header, crc and padding */
+static inline int hfi_vnic_wire_length(struct sk_buff *skb)
+{
+	u32 pad_len, hlen = HFI_VNIC_HDR_LEN;
+
+	/* padding for 8 bytes size alignment */
+	pad_len = -(skb->len + hlen + HFI_VNIC_ICRC_TAIL_LEN) & 0x7;
+	pad_len += HFI_VNIC_ICRC_TAIL_LEN;
+
+	return (skb->len + hlen + pad_len) >> 3;
+}
+
+/* hfi_vnic_encap_skb - encapsulate skb (ethernet) packet with OPA header */
+int hfi_vnic_encap_skb(struct hfi_vnic_adapter *adapter, struct sk_buff *skb)
+{
+	struct __hfi_veswport_info *info = &adapter->info;
+	union hfi_vnic_bypass_hdr *hdr;
+	u32 dlid;
+	u8 def_port;
+
+	hdr = (union hfi_vnic_bypass_hdr *)(skb->data - HFI_VNIC_HDR_LEN);
+	memset(hdr, 0, HFI_VNIC_HDR_LEN);
+
+	hdr->entropy = hfi_vnic_calc_entropy(adapter, skb);
+	def_port = hfi_vnic_get_def_port(adapter, hdr->entropy);
+
+	hdr->slid = info->vport.encap_slid;
+	hdr->slid_high = info->vport.encap_slid >> 20;
+
+	dlid = hfi_vnic_get_dlid(adapter, skb, def_port);
+	if (unlikely(!dlid))
+		return -EFAULT;
+
+	hdr->dlid = dlid;
+	hdr->dlid_high = dlid >> 20;
+
+	hdr->length = hfi_vnic_wire_length(skb);
+	hdr->sc = hfi_vnic_get_sc(info, skb);
+
+	hdr->l2 = HFI_VNIC_L2_TYPE;
+	hdr->lt = 1;
+
+	hdr->pkey = info->vesw.pkey;
+
+	hdr->l4 = HFI_VNIC_L4_ETHR;
+	hdr->l4_hdr = info->vesw.vesw_id;
+
+	skb_push(skb, HFI_VNIC_HDR_LEN);
+	return 0;
+}
+
+/* hfi_vnic_decap_skb - strip OPA header from the skb (ethernet) packet */
+int hfi_vnic_decap_skb(struct hfi_vnic_rx_queue *rxq, struct sk_buff *skb)
+{
+	skb_pull(skb, HFI_VNIC_HDR_LEN);
+	return 0;
+}
diff --git a/drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_encap.h b/drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_encap.h
new file mode 100644
index 0000000..6786cce
--- /dev/null
+++ b/drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_encap.h
@@ -0,0 +1,62 @@
+#ifndef _HFI_VNIC_ENCAP_H
+#define _HFI_VNIC_ENCAP_H
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains all HFI VNIC declaration required for encapsulation
+ * and decapsulation of Ethernet packets
+ */
+
+#define HFI_VESW_MAX_NUM_DEF_PORT   16
+#define HFI_VNIC_MAX_NUM_PCP        8
+
+/* VNIC configured and operational state values */
+#define HFI_VNIC_STATE_DROP_ALL        0x1
+#define HFI_VNIC_STATE_FORWARDING      0x3
+
+#endif /* _HFI_VNIC_ENCAP_H */
diff --git a/drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_ethtool.c b/drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_ethtool.c
new file mode 100644
index 0000000..32bb9ce
--- /dev/null
+++ b/drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_ethtool.c
@@ -0,0 +1,81 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains HFI VNIC ethtool functions
+ */
+
+#include <linux/ethtool.h>
+
+#include "hfi_vnic.h"
+#include "hfi_vnic_internal.h"
+
+/* vnic_get_drvinfo - get driver info */
+static void vnic_get_drvinfo(struct net_device *netdev,
+			     struct ethtool_drvinfo *drvinfo)
+{
+	struct hfi_vnic_adapter *adapter = netdev_priv(netdev);
+	struct hfi_vnic_device *vdev = adapter->vdev;
+
+	strlcpy(drvinfo->driver, hfi_vnic_driver_name, sizeof(drvinfo->driver));
+	strlcpy(drvinfo->version, hfi_vnic_driver_version,
+		sizeof(drvinfo->version));
+	strlcpy(drvinfo->bus_info, dev_name(&vdev->dev),
+		sizeof(drvinfo->bus_info));
+}
+
+/* ethtool ops */
+static const struct ethtool_ops hfi_vnic_ethtool_ops = {
+	.get_drvinfo = vnic_get_drvinfo,
+	.get_link = ethtool_op_get_link,
+};
+
+/* hfi_vnic_set_ethtool_ops - set ethtool ops */
+void hfi_vnic_set_ethtool_ops(struct net_device *ndev)
+{
+	ndev->ethtool_ops = &hfi_vnic_ethtool_ops;
+}
diff --git a/drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_internal.h b/drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_internal.h
new file mode 100644
index 0000000..8a5e0c1
--- /dev/null
+++ b/drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_internal.h
@@ -0,0 +1,221 @@
+#ifndef _HFI_VNIC_INTERNAL_H
+#define _HFI_VNIC_INTERNAL_H
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains HFI VNIC driver internal declarations
+ */
+
+#include <linux/bitops.h>
+#include <linux/etherdevice.h>
+#include <linux/hashtable.h>
+#include <linux/sizes.h>
+
+#include "hfi_vnic_encap.h"
+
+/* VNIC uses 16B header format */
+#define HFI_VNIC_L2_TYPE    0x2
+
+/* 16 header bytes + 2 reserved bytes */
+#define HFI_VNIC_L2_HDR_LEN   (16 + 2)
+
+#define HFI_VNIC_L4_HDR_LEN   2
+
+#define HFI_VNIC_HDR_LEN      (HFI_VNIC_L2_HDR_LEN + \
+			       HFI_VNIC_L4_HDR_LEN)
+
+#define HFI_VNIC_L4_ETHR  0x78
+
+#define HFI_VNIC_ICRC_LEN   4
+#define HFI_VNIC_TAIL_LEN   1
+#define HFI_VNIC_ICRC_TAIL_LEN  (HFI_VNIC_ICRC_LEN + HFI_VNIC_TAIL_LEN)
+
+#define HFI_VNIC_VLAN_PCP(vlan_tci)  \
+			(((vlan_tci) & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT)
+
+#define HFI_VNIC_SKB_HEADROOM ALIGN(HFI_VNIC_HDR_LEN, 8)
+
+/* Flow to default port redirection table size */
+#define HFI_VNIC_FLOW_TBL_SIZE    32
+
+/* Invalid port number */
+#define HFI_VNIC_INVALID_PORT     0xff
+
+enum hfi_vnic_flags_t {
+	HFI_VNIC_UP,
+	HFI_VNIC_OPEN,
+};
+
+struct hfi_vnic_adapter;
+
+/**
+ * struct __hfi_vesw_info - HFI vnic virtual switch info
+ */
+struct __hfi_vesw_info {
+	u16  fabric_id;
+	u16  vesw_id;
+
+	u8   rsvd0[6];
+	u16  def_fw_id_mask;
+
+	u8   rsvd1[2];
+	u16  pkey;
+
+	u8   rsvd2[4];
+	u32  u_mcast_dlid;
+	u32  u_ucast_dlid[HFI_VESW_MAX_NUM_DEF_PORT];
+
+	u8   rsvd3[44];
+	u16  eth_mtu[HFI_VNIC_MAX_NUM_PCP];
+	u16  eth_mtu_non_vlan;
+	u8   rsvd4[2];
+} __packed;
+
+/**
+ * struct __hfi_per_veswport_info - HFI vnic per port info
+ */
+struct __hfi_per_veswport_info {
+	u32  port_num;
+
+	u8   eth_link_status;
+	u8   rsvd0[3];
+
+	u8   base_mac_addr[ETH_ALEN];
+	u8   config_state;
+	u8   oper_state;
+
+	u16  max_mac_tbl_ent;
+	u16  max_smac_ent;
+	u32  mac_tbl_digest;
+	u8   rsvd1[4];
+
+	u32  encap_slid;
+
+	u8   pcp_to_sc_uc[HFI_VNIC_MAX_NUM_PCP];
+	u8   pcp_to_vl_uc[HFI_VNIC_MAX_NUM_PCP];
+	u8   pcp_to_sc_mc[HFI_VNIC_MAX_NUM_PCP];
+	u8   pcp_to_vl_mc[HFI_VNIC_MAX_NUM_PCP];
+
+	u8   non_vlan_sc_uc;
+	u8   non_vlan_vl_uc;
+	u8   non_vlan_sc_mc;
+	u8   non_vlan_vl_mc;
+
+	u8   rsvd2[48];
+
+	u16  uc_macs_gen_count;
+	u16  mc_macs_gen_count;
+
+	u8   rsvd3[8];
+} __packed;
+
+/**
+ * struct __hfi_veswport_info - HFI vnic port info
+ */
+struct __hfi_veswport_info {
+	struct __hfi_vesw_info            vesw;
+	struct __hfi_per_veswport_info    vport;
+};
+
+/**
+ * struct hfi_vnic_rx_queue - HFI VNIC receive queue
+ * @idx: queue index
+ * @adapter: netdev adapter
+ * @napi: netdev napi structure
+ */
+struct hfi_vnic_rx_queue {
+	u8                        idx;
+	struct hfi_vnic_adapter  *adapter;
+	struct napi_struct        napi;
+};
+
+/**
+ * struct hfi_vnic_adapter - HFI VNIC netdev private data structure
+ * @netdev: pointer to associated netdev
+ * @vdev: pointer to hfi vnic device
+ * @flags: flags indicating various states
+ * @lock: adapter lock
+ * @rxq: receive queue array
+ * @info: virtual ethernet switch port information
+ * @flow_tbl: flow to default port redirection table
+ */
+struct hfi_vnic_adapter {
+	struct net_device        *netdev;
+	struct hfi_vnic_device   *vdev;
+	unsigned long             flags;
+
+	/* Lock used around state updates */
+	struct mutex              lock;
+
+	struct hfi_vnic_rx_queue  rxq[HFI_VNIC_MAX_QUEUE];
+
+	struct __hfi_veswport_info info;
+
+	u8 flow_tbl[HFI_VNIC_FLOW_TBL_SIZE];
+};
+
+#define v_dbg(format, arg...) \
+	netdev_dbg(adapter->netdev, format, ## arg)
+#define v_err(format, arg...) \
+	netdev_err(adapter->netdev, format, ## arg)
+#define v_info(format, arg...) \
+	netdev_info(adapter->netdev, format, ## arg)
+#define v_warn(format, arg...) \
+	netdev_warn(adapter->netdev, format, ## arg)
+#define v_notice(format, arg...) \
+	netdev_notice(adapter->netdev, format, ## arg)
+
+extern char hfi_vnic_driver_name[];
+extern const char hfi_vnic_driver_version[];
+
+int hfi_vnic_encap_skb(struct hfi_vnic_adapter *adapter, struct sk_buff *skb);
+int hfi_vnic_decap_skb(struct hfi_vnic_rx_queue *rxq, struct sk_buff *skb);
+u8 hfi_vnic_calc_entropy(struct hfi_vnic_adapter *adapter, struct sk_buff *skb);
+void hfi_vnic_set_ethtool_ops(struct net_device *ndev);
+
+#endif /* _HFI_VNIC_INTERNAL_H */
diff --git a/drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_netdev.c b/drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_netdev.c
new file mode 100644
index 0000000..7121637
--- /dev/null
+++ b/drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_netdev.c
@@ -0,0 +1,469 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains HFI Virtual Network Interface Controller (VNIC) driver
+ */
+
+#include <linux/module.h>
+#include <linux/if_vlan.h>
+
+#include "hfi_vnic.h"
+#include "hfi_vnic_internal.h"
+
+#define DRV_VERSION "1.0"
+char hfi_vnic_driver_name[] = "hfi_vnic";
+const char hfi_vnic_driver_version[] = DRV_VERSION;
+
+#define HFI_TX_TIMEOUT_MS 1000
+
+#define HFI_VNIC_MIN_ETH_MTU (ETH_ZLEN - ETH_HLEN)
+
+/* hfi_vnic_maybe_stop_tx - stop tx queue if required */
+static void hfi_vnic_maybe_stop_tx(struct hfi_vnic_adapter *adapter, u8 q_idx)
+{
+	struct hfi_vnic_device *vdev = adapter->vdev;
+
+	netif_stop_subqueue(vdev->netdev, q_idx);
+	if (!vdev->bus_ops->get_write_avail(vdev, q_idx))
+		return;
+
+	netif_start_subqueue(vdev->netdev, q_idx);
+}
+
+/* hfi_netdev_start_xmit - transmit function */
+static netdev_tx_t hfi_netdev_start_xmit(struct sk_buff *skb,
+					 struct net_device *netdev)
+{
+	struct hfi_vnic_adapter *adapter = netdev_priv(netdev);
+	struct hfi_vnic_device *vdev = adapter->vdev;
+	u8 q_idx = skb->queue_mapping;
+	bool skip_skb_free = false;
+	int rc = -1;
+
+	v_dbg("xmit: queue %d skb len %d\n", q_idx, skb->len);
+	if (unlikely(adapter->info.vport.oper_state !=
+		     HFI_VNIC_STATE_FORWARDING))
+		goto tx_finish;
+
+	/* pad to ensure mininum ethernet packet length */
+	if (unlikely(skb->len < ETH_ZLEN)) {
+		if (skb_padto(skb, ETH_ZLEN)) {
+			skip_skb_free = true;
+			goto tx_finish;
+		}
+		skb_put(skb, ETH_ZLEN - skb->len);
+	}
+
+	rc = hfi_vnic_encap_skb(adapter, skb);
+	if (unlikely(rc))
+		goto tx_finish;
+
+	/* Get reference to skb as hfi driver might release it */
+	skb_get(skb);
+	rc = vdev->bus_ops->put_skb(vdev, q_idx, skb);
+	/* remove the header */
+	skb_pull(skb, HFI_VNIC_HDR_LEN);
+
+tx_finish:
+	if (unlikely(rc == -EBUSY)) {
+		hfi_vnic_maybe_stop_tx(adapter, q_idx);
+		dev_kfree_skb_any(skb);
+		return NETDEV_TX_BUSY;
+	}
+
+	if (!skip_skb_free)
+		dev_kfree_skb_any(skb);
+	return NETDEV_TX_OK;
+}
+
+/* vnic_handle_rx - handle skb receive */
+static void vnic_handle_rx(struct hfi_vnic_rx_queue *rxq,
+			   int *work_done, int work_to_do)
+{
+	struct hfi_vnic_adapter *adapter = rxq->adapter;
+	struct hfi_vnic_device *vdev = adapter->vdev;
+	struct sk_buff *skb;
+
+	while (1) {
+		if (*work_done >= work_to_do)
+			break;
+
+		skb = vdev->bus_ops->get_skb(vdev, rxq->idx);
+		if (!skb)
+			break;
+
+		if (hfi_vnic_decap_skb(rxq, skb)) {
+			dev_kfree_skb_any(skb);
+			continue;
+		}
+
+		skb_checksum_none_assert(skb);
+		skb->protocol = eth_type_trans(skb, vdev->netdev);
+
+		napi_gro_receive(&rxq->napi, skb);
+		(*work_done)++;
+	}
+}
+
+/* vnic_napi - napi receive polling callback function */
+static int vnic_napi(struct napi_struct *napi, int budget)
+{
+	struct hfi_vnic_rx_queue *rxq = container_of(napi,
+					     struct hfi_vnic_rx_queue, napi);
+	struct hfi_vnic_adapter *adapter = rxq->adapter;
+	struct hfi_vnic_device *vdev = adapter->vdev;
+	u8 evt = rxq->idx + HFI_VNIC_EVT_RX0;
+	int work_done = 0;
+
+	v_dbg("napi %d budget %d\n", rxq->idx, budget);
+	vnic_handle_rx(rxq, &work_done, budget);
+
+	v_dbg("napi %d work_done %d\n", rxq->idx, work_done);
+	if (work_done < budget) {
+		napi_complete(napi);
+		vdev->bus_ops->config_notify(vdev, evt, true);
+	}
+
+	return work_done;
+}
+
+/* vnic_event_cb - handle events from vnic hfi driver */
+static void vnic_event_cb(struct hfi_vnic_device *vdev, u8 evt)
+{
+	struct hfi_vnic_adapter *adapter = netdev_priv(vdev->netdev);
+	struct hfi_vnic_rx_queue *rxq;
+	u8 q_idx;
+
+	v_dbg("received event %d\n", evt);
+	if (evt < vdev->hfi_info.num_rx_q) {
+		q_idx = evt;
+		if (unlikely(adapter->info.vport.oper_state !=
+			     HFI_VNIC_STATE_FORWARDING))
+			return;
+
+		rxq = &adapter->rxq[q_idx];
+		if (napi_schedule_prep(&rxq->napi)) {
+			v_dbg("napi %d scheduling\n", q_idx);
+			vdev->bus_ops->config_notify(vdev, evt, false);
+			__napi_schedule(&rxq->napi);
+		}
+		return;
+	}
+	if ((evt >= HFI_VNIC_EVT_TX0) &&
+	    (evt < (HFI_VNIC_EVT_TX0 + vdev->hfi_info.num_tx_q))) {
+		q_idx = evt - HFI_VNIC_EVT_TX0;
+
+		if (__netif_subqueue_stopped(vdev->netdev, q_idx))
+			netif_wake_subqueue(vdev->netdev, q_idx);
+
+		return;
+	}
+	v_err("Invalid event\n");
+}
+
+static u16 hfi_vnic_select_queue(struct net_device *netdev, struct sk_buff *skb,
+				 void *accel_priv,
+				 select_queue_fallback_t fallback)
+{
+	struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb);
+	struct hfi_vnic_adapter *adapter = netdev_priv(netdev);
+	struct __hfi_veswport_info *info = &adapter->info;
+	struct hfi_vnic_device *vdev = adapter->vdev;
+	u8 vl, entropy;
+
+	if (skb_vlan_tag_present(skb)) {
+		u8 pcp = skb_vlan_tag_get(skb) >> VLAN_PRIO_SHIFT;
+
+		if (is_multicast_ether_addr(mac_hdr->h_dest))
+			vl = info->vport.pcp_to_vl_mc[pcp];
+		else
+			vl = info->vport.pcp_to_vl_uc[pcp];
+	} else {
+		if (is_multicast_ether_addr(mac_hdr->h_dest))
+			vl = info->vport.non_vlan_vl_mc;
+		else
+			vl = info->vport.non_vlan_vl_uc;
+	}
+
+	entropy =  hfi_vnic_calc_entropy(adapter, skb);
+	return vdev->bus_ops->select_queue(vdev, vl, entropy);
+}
+
+/* hfi_netdev_change_mtu - change the MTU */
+static int hfi_netdev_change_mtu(struct net_device *netdev, int new_mtu)
+{
+	struct hfi_vnic_adapter *adapter = netdev_priv(netdev);
+	struct __hfi_veswport_info *info = &adapter->info;
+	u16 min_mtu = HFI_VNIC_MIN_ETH_MTU;
+	u16 max_mtu = max(min_mtu, info->vesw.eth_mtu_non_vlan);
+
+	/* Supported MTUs */
+	if ((new_mtu < min_mtu) || (new_mtu > max_mtu)) {
+		v_err("Unsupported MTU setting\n");
+		return -EINVAL;
+	}
+
+	v_info("changing MTU from %d to %d\n", netdev->mtu, new_mtu);
+	netdev->mtu = new_mtu;
+	return 0;
+}
+
+/* hfi_vnic_up - enable vnic data flow */
+static int hfi_vnic_up(struct hfi_vnic_adapter *adapter)
+{
+	struct hfi_vnic_device *vdev = adapter->vdev;
+	int i, rc;
+
+	rc = vdev->bus_ops->open(vdev, vnic_event_cb);
+	if (rc) {
+		v_dbg("hfi_open failed %d\n", rc);
+		return rc;
+	}
+
+	netif_carrier_on(adapter->netdev);
+	netif_tx_start_all_queues(adapter->netdev);
+	for (i = 0; i < vdev->hfi_info.num_rx_q; i++)
+		napi_enable(&adapter->rxq[i].napi);
+
+	set_bit(HFI_VNIC_UP, &adapter->flags);
+	return 0;
+}
+
+/* hfi_vnic_down - disable vnic data flow */
+static void hfi_vnic_down(struct hfi_vnic_adapter *adapter)
+{
+	struct hfi_vnic_device *vdev = adapter->vdev;
+	int i;
+
+	netif_carrier_off(adapter->netdev);
+	netif_tx_disable(adapter->netdev);
+	for (i = 0; i < vdev->hfi_info.num_rx_q; i++)
+		napi_disable(&adapter->rxq[i].napi);
+
+	vdev->bus_ops->close(vdev);
+	clear_bit(HFI_VNIC_UP, &adapter->flags);
+}
+
+/* hfi_vnic_set_mac_addr - change mac address */
+static int hfi_vnic_set_mac_addr(struct net_device *netdev, void *addr)
+{
+	struct hfi_vnic_adapter *adapter = netdev_priv(netdev);
+	int rc;
+
+	mutex_lock(&adapter->lock);
+	rc = eth_mac_addr(netdev, addr);
+	mutex_unlock(&adapter->lock);
+
+	return rc;
+}
+
+/* hfi_netdev_open - activate network interface */
+static int hfi_netdev_open(struct net_device *netdev)
+{
+	struct hfi_vnic_adapter *adapter = netdev_priv(netdev);
+	int rc;
+
+	mutex_lock(&adapter->lock);
+	rc = hfi_vnic_up(adapter);
+	if (rc)
+		goto open_done;
+
+	set_bit(HFI_VNIC_OPEN, &adapter->flags);
+	v_info("opened\n");
+open_done:
+	mutex_unlock(&adapter->lock);
+	return rc;
+}
+
+/* hfi_netdev_close - disable network interface */
+static int hfi_netdev_close(struct net_device *netdev)
+{
+	struct hfi_vnic_adapter *adapter = netdev_priv(netdev);
+
+	mutex_lock(&adapter->lock);
+	if (test_bit(HFI_VNIC_UP, &adapter->flags))
+		hfi_vnic_down(adapter);
+
+	clear_bit(HFI_VNIC_OPEN, &adapter->flags);
+	mutex_unlock(&adapter->lock);
+	v_info("closed\n");
+	return 0;
+}
+
+/* netdev ops */
+static const struct net_device_ops hfi_netdev_ops = {
+	.ndo_open = hfi_netdev_open,
+	.ndo_stop = hfi_netdev_close,
+	.ndo_start_xmit = hfi_netdev_start_xmit,
+	.ndo_change_mtu = hfi_netdev_change_mtu,
+	.ndo_select_queue = hfi_vnic_select_queue,
+	.ndo_set_mac_address = hfi_vnic_set_mac_addr,
+};
+
+/* hfi_vnic_drv_probe - device initialization routine */
+static int hfi_vnic_drv_probe(struct device *dev)
+{
+	struct net_device *netdev;
+	struct hfi_vnic_adapter *adapter;
+	struct hfi_vnic_device *vdev = container_of(dev,
+					    struct hfi_vnic_device, dev);
+	int i, rc;
+
+	if (vdev->hfi_info.num_rx_q > HFI_VNIC_MAX_QUEUE ||
+	    vdev->hfi_info.num_tx_q > HFI_VNIC_MAX_QUEUE) {
+		dev_err(dev, "Number of VNIC (rx %d, tx %d) queues > Max Queue Size (%d)",
+			vdev->hfi_info.num_rx_q, vdev->hfi_info.num_tx_q,
+			HFI_VNIC_MAX_QUEUE);
+		return -EINVAL;
+	}
+	netdev = alloc_etherdev_mqs(sizeof(struct hfi_vnic_adapter),
+				    vdev->hfi_info.num_tx_q,
+				    vdev->hfi_info.num_rx_q);
+	if (!netdev)
+		return -ENOMEM;
+	adapter = netdev_priv(netdev);
+	adapter->netdev = netdev;
+	adapter->vdev = vdev;
+	vdev->netdev = netdev;
+	netdev->features = NETIF_F_HIGHDMA;
+	if (vdev->hfi_info.cap & HFI_VNIC_CAP_SG)
+		netdev->features |= NETIF_F_SG;
+	netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+	netdev->hw_features = netdev->features;
+	netdev->vlan_features = netdev->features;
+	netdev->watchdog_timeo = msecs_to_jiffies(HFI_TX_TIMEOUT_MS);
+	netdev->netdev_ops = &hfi_netdev_ops;
+	netdev->hard_header_len += HFI_VNIC_SKB_HEADROOM;
+	mutex_init(&adapter->lock);
+	strcpy(netdev->name, "veth%d");
+
+	hfi_vnic_set_ethtool_ops(netdev);
+	for (i = 0; i < vdev->hfi_info.num_rx_q; i++) {
+		adapter->rxq[i].idx = i;
+		adapter->rxq[i].adapter = adapter;
+		netif_napi_add(netdev, &adapter->rxq[i].napi, vnic_napi, 64);
+	}
+
+	rc = vdev->bus_ops->init(vdev);
+	if (rc)
+		goto hw_err;
+
+	rc = register_netdev(netdev);
+	if (rc)
+		goto netdev_err;
+
+	netif_carrier_off(netdev);
+	v_info("initialized\n");
+
+	return 0;
+
+netdev_err:
+	vdev->bus_ops->deinit(vdev);
+hw_err:
+	mutex_destroy(&adapter->lock);
+	free_netdev(netdev);
+	dev_err(dev, "initialization failed %d\n", rc);
+
+	return rc;
+}
+
+/* hfi_vnic_drv_remove - device removal routine */
+static int hfi_vnic_drv_remove(struct device *dev)
+{
+	struct hfi_vnic_device *vdev = container_of(dev,
+					    struct hfi_vnic_device, dev);
+	struct hfi_vnic_adapter *adapter = netdev_priv(vdev->netdev);
+
+	unregister_netdev(vdev->netdev);
+	vdev->bus_ops->deinit(vdev);
+	mutex_destroy(&adapter->lock);
+	free_netdev(vdev->netdev);
+
+	dev_info(dev, "removed\n");
+	return 0;
+}
+
+/* HFI Virtual Network Driver */
+static struct hfi_vnic_driver hfi_vnic_drv = {
+	.drvwrap = {
+		.type = HFI_VNIC_DRV,
+		.driver = {
+			.name   = hfi_vnic_driver_name,
+			.probe  = hfi_vnic_drv_probe,
+			.remove = hfi_vnic_drv_remove
+		}
+	}
+};
+
+/* hfi_vnic_init_module - driver registration routine */
+static int __init hfi_vnic_init_module(void)
+{
+	int rc;
+
+	pr_info("HFI Virtual Network Driver - %s\n",
+		hfi_vnic_driver_version);
+
+	rc = hfi_vnic_driver_register(&hfi_vnic_drv);
+	if (rc)
+		pr_err("VNIC driver register failed %d\n", rc);
+
+	return rc;
+}
+module_init(hfi_vnic_init_module);
+
+/* hfi_vnic_exit_module - driver Exit cleanup routine */
+static void __exit hfi_vnic_exit_module(void)
+{
+	hfi_vnic_driver_unregister(&hfi_vnic_drv);
+}
+module_exit(hfi_vnic_exit_module);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Intel HFI Virtual Network Controller driver");
+MODULE_VERSION(DRV_VERSION);
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [RFC 04/10] IB/hfi-vnic: VNIC Ethernet Management (EM) structure definitions
From: Vishwanathapura, Niranjana @ 2016-11-18 22:42 UTC (permalink / raw)
  To: Doug Ledford
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, netdev-u79uwXL29TY76Z2rM5mHXA,
	Dennis Dalessandro, Niranjana Vishwanathapura, Sadanand Warrier,
	Tanya K Jajodia
In-Reply-To: <1479508938-63799-1-git-send-email-niranjana.vishwanathapura-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>

Define VNIC EM MAD structures and the associated macros. These structures
are used for information exchange between VNIC EM agent on the HFI host
and the Ethernet manager.

Change-Id: If4837ec74e5b0eecc81774a52ab92fffea4b6338
Reviewed-by: Dennis Dalessandro <dennis.dalessandro-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Signed-off-by: Sadanand Warrier <sadanand.warrier-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Signed-off-by: Tanya K Jajodia <tanya.k.jajodia-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
 .../sw/intel/vnic/hfi_vnic/hfi_vnic_encap.h        | 444 +++++++++++++++++++++
 .../sw/intel/vnic/hfi_vnic/hfi_vnic_internal.h     |  35 +-
 2 files changed, 478 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_encap.h b/drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_encap.h
index 6786cce..9ed5221 100644
--- a/drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_encap.h
+++ b/drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_encap.h
@@ -52,11 +52,455 @@
  * and decapsulation of Ethernet packets
  */
 
+#include <linux/types.h>
+#include <rdma/ib_mad.h>
+
+/* Maximum number of vnics supported */
+#define HFI_MAX_VPORTS_SUPPORTED 256
+
+/* EMA class version */
+#define HFI_EMA_CLASS_VERSION               0x80
+
+/*
+ * Define the Intel vendor management class for HFI
+ * ETHERNET MANAGEMENT
+ */
+#define HFI_MGMT_CLASS_INTEL_EMA            0x34
+
+/* EM attribute IDs */
+#define HFI_EM_ATTR_CLASS_PORT_INFO                 0x0001
+#define HFI_EM_ATTR_VESWPORT_INFO                   0x0011
+#define HFI_EM_ATTR_VESWPORT_MAC_ENTRIES            0x0012
+#define HFI_EM_ATTR_IFACE_UCAST_MACS                0x0013
+#define HFI_EM_ATTR_IFACE_MCAST_MACS                0x0014
+#define HFI_EM_ATTR_DELETE_VESW                     0x0015
+#define HFI_EM_ATTR_VESWPORT_SUMMARY_COUNTERS       0x0020
+#define HFI_EM_ATTR_VESWPORT_ERROR_COUNTERS         0x0022
+
 #define HFI_VESW_MAX_NUM_DEF_PORT   16
 #define HFI_VNIC_MAX_NUM_PCP        8
 
+#define HFI_VNIC_EMA_DATA    (OPA_MGMT_MAD_SIZE - IB_MGMT_VENDOR_HDR)
+
+/* Defines for vendor specific notice(trap) attributes */
+#define HFI_INTEL_EMA_NOTICE_TYPE_INFO 0x04
+
+/* INTEL OUI */
+#define INTEL_OUI_1 0x00
+#define INTEL_OUI_2 0x06
+#define INTEL_OUI_3 0x6a
+
+/* Trap opcodes sent from VNIC */
+#define HFI_VESWPORT_TRAP_IFACE_UCAST_MAC_CHANGE 0x1
+#define HFI_VESWPORT_TRAP_IFACE_MCAST_MAC_CHANGE 0x2
+#define HFI_VESWPORT_TRAP_ETH_LINK_STATUS_CHANGE 0x3
+
 /* VNIC configured and operational state values */
 #define HFI_VNIC_STATE_DROP_ALL        0x1
 #define HFI_VNIC_STATE_FORWARDING      0x3
 
+/**
+ * struct hfi_vesw_info - HFI vnic switch information
+ * @fabric_id: 10-bit fabric id
+ * @vesw_id: 12-bit virtual ethernet switch id
+ * @def_port_mask: bitmask of default ports
+ * @pkey: partition key
+ * @u_mcast_dlid: unknown multicast dlid
+ * @u_ucast_dlid: array of unknown unicast dlids
+ * @eth_mtu: MTUs for each vlan PCP
+ * @eth_mtu_non_vlan: MTU for non vlan packets
+ */
+struct hfi_vesw_info {
+	__be16  fabric_id;
+	__be16  vesw_id;
+
+	u8      rsvd0[6];
+	__be16  def_port_mask;
+
+	u8      rsvd1[2];
+	__be16  pkey;
+
+	u8      rsvd2[4];
+	__be32  u_mcast_dlid;
+	__be32  u_ucast_dlid[HFI_VESW_MAX_NUM_DEF_PORT];
+
+	u8      rsvd3[44];
+	__be16  eth_mtu[HFI_VNIC_MAX_NUM_PCP];
+	__be16  eth_mtu_non_vlan;
+	u8      rsvd4[2];
+} __packed;
+
+/**
+ * struct hfi_per_veswport_info - HFI vnic per port information
+ * @port_num: port number
+ * @eth_link_status: current ethernet link state
+ * @base_mac_addr: base mac address
+ * @config_state: configured port state
+ * @oper_state: operational port state
+ * @max_mac_tbl_ent: max number of mac table entries
+ * @max_smac_ent: max smac entries in mac table
+ * @mac_tbl_digest: mac table digest
+ * @encap_slid: base slid for the port
+ * @pcp_to_sc_uc: sc by pcp index for unicast ethernet packets
+ * @pcp_to_vl_uc: vl by pcp index for unicast ethernet packets
+ * @pcp_to_sc_mc: sc by pcp index for multicast ethernet packets
+ * @pcp_to_vl_mc: vl by pcp index for multicast ethernet packets
+ * @non_vlan_sc_uc: sc for non-vlan unicast ethernet packets
+ * @non_vlan_vl_uc: vl for non-vlan unicast ethernet packets
+ * @non_vlan_sc_mc: sc for non-vlan multicast ethernet packets
+ * @non_vlan_vl_mc: vl for non-vlan multicast ethernet packets
+ * @uc_macs_gen_count: generation count for unicast macs list
+ * @mc_macs_gen_count: generation count for multicast macs list
+ */
+struct hfi_per_veswport_info {
+	__be32  port_num;
+
+	u8      eth_link_status;
+	u8      rsvd0[3];
+
+	u8      base_mac_addr[ETH_ALEN];
+	u8      config_state;
+	u8      oper_state;
+
+	__be16  max_mac_tbl_ent;
+	__be16  max_smac_ent;
+	__be32  mac_tbl_digest;
+	u8      rsvd1[4];
+
+	__be32  encap_slid;
+
+	u8      pcp_to_sc_uc[HFI_VNIC_MAX_NUM_PCP];
+	u8      pcp_to_vl_uc[HFI_VNIC_MAX_NUM_PCP];
+	u8      pcp_to_sc_mc[HFI_VNIC_MAX_NUM_PCP];
+	u8      pcp_to_vl_mc[HFI_VNIC_MAX_NUM_PCP];
+
+	u8      non_vlan_sc_uc;
+	u8      non_vlan_vl_uc;
+	u8      non_vlan_sc_mc;
+	u8      non_vlan_vl_mc;
+
+	u8      rsvd2[48];
+
+	__be16  uc_macs_gen_count;
+	__be16  mc_macs_gen_count;
+
+	u8      rsvd3[8];
+} __packed;
+
+/**
+ * struct hfi_veswport_info - HFI vnic port information
+ * @vesw: HFI vnic switch information
+ * @vport: HFI vnic per port information
+ *
+ * On host, each of the virtual ethernet ports belongs
+ * to a different virtual ethernet switches.
+ */
+struct hfi_veswport_info {
+	struct hfi_vesw_info          vesw;
+	struct hfi_per_veswport_info  vport;
+};
+
+/**
+ * union __hfi_vnic_dlid_sd - vnic dlid and side data needed.
+ * @sd_is_src_mac: 1 = entry is SMAC, 0 = not SMAC
+ * @dlid: Destination lid corresponding to MAC addr
+ */
+union __hfi_vnic_dlid_sd {
+	struct {
+		u32  sd_reserved    : 5;
+		u32  sd_is_src_mac  : 1;
+		u32  rsvd0          : 2;
+		u32  dlid           : 24;
+	};
+	u32 dw;
+};
+
+/* Same as __hfi_vnic_dlid_sd, but with a big endian attribute */
+union hfi_vnic_dlid_sd {
+	union __hfi_vnic_dlid_sd u;
+	__be32 dw;
+};
+
+/**
+ * struct hfi_veswport_mactable_entry - single entry in the forwarding table
+ * @mac_addr: MAC address
+ * @mac_addr_mask: MAC address bit mask
+ * @dlid_sd: Matching DLID and side data
+ *
+ * On the host each virtual ethernet port will have
+ * a forwarding table. These tables are used to
+ * map a MAC to a LID and other data. For more
+ * details see struct hfi_veswport_mactable_entries.
+ * This is the structure of a single mactable entry
+ */
+struct hfi_veswport_mactable_entry {
+	u8                      mac_addr[ETH_ALEN];
+	u8                      mac_addr_mask[ETH_ALEN];
+	union hfi_vnic_dlid_sd  dlid_sd;
+} __packed;
+
+/**
+ * struct hfi_veswport_mactable - Forwarding table array
+ * @offset: mac table starting offset
+ * @num_entries: Number of entries to get or set
+ * @mac_tbl_digest: mac table digest
+ * @tbl_entries[]: Array of table entries
+ *
+ * The EM sends down this structure in a MAD indicating
+ * the starting offset in the forwarding table that this
+ * entry is to be loaded into and the number of entries
+ * that that this MAD instance contains
+ * The mac_tbl_digest has been added to this MAD structure. It will be set by
+ * the EM and it will be used by the EM to check if there are any
+ * discrepancies with this value and the value
+ * maintained by the EM in the case of VNIC device being deleted or unloaded
+ * A new instantiation of a VNIC will always have a value of zero.
+ * This value is stored as part of the vnic adapter structure and will be
+ * accessed by the GET and SET routines for both the mactable entries and the
+ * veswport info.
+ */
+struct hfi_veswport_mactable {
+	__be16                              offset;
+	__be16                              num_entries;
+	__be32                              mac_tbl_digest;
+	struct hfi_veswport_mactable_entry  tbl_entries[0];
+} __packed;
+
+/**
+ * struct hfi_veswport_summary_counters - summary counters
+ * @vp_instance: vport instance on the HFI port
+ * @vesw_id: virtual ethernet switch id
+ * @veswport_num: virtual ethernet switch port number
+ * @tx_errors: transmit errors
+ * @rx_errors: receive errors
+ * @tx_packets: transmit packets
+ * @rx_packets: receive packets
+ * @tx_bytes: transmit bytes
+ * @rx_bytes: receive bytes
+ * @tx_unicast: unicast packets transmitted
+ * @tx_mcastbcast: multicast/broadcast packets transmitted
+ * @tx_untagged: non-vlan packets transmitted
+ * @tx_vlan: vlan packets transmitted
+ * @tx_64_size: transmit packet length is 64 bytes
+ * @tx_65_127: transmit packet length is >=65 and < 127 bytes
+ * @tx_128_255: transmit packet length is >=128 and < 255 bytes
+ * @tx_256_511: transmit packet length is >=256 and < 511 bytes
+ * @tx_512_1023: transmit packet length is >=512 and < 1023 bytes
+ * @tx_1024_1518: transmit packet length is >=1024 and < 1518 bytes
+ * @tx_1519_max: transmit packet length >= 1519 bytes
+ * @rx_unicast: unicast packets received
+ * @rx_mcastbcast: multicast/broadcast packets received
+ * @rx_untagged: non-vlan packets received
+ * @rx_vlan: vlan packets received
+ * @rx_64_size: received packet length is 64 bytes
+ * @rx_65_127: received packet length is >=65 and < 127 bytes
+ * @rx_128_255: received packet length is >=128 and < 255 bytes
+ * @rx_256_511: received packet length is >=256 and < 511 bytes
+ * @rx_512_1023: received packet length is >=512 and < 1023 bytes
+ * @rx_1024_1518: received packet length is >=1024 and < 1518 bytes
+ * @rx_1519_max: received packet length >= 1519 bytes
+ *
+ * All the above are counters of corresponding conditions.
+ */
+struct hfi_veswport_summary_counters {
+	__be16  vp_instance;
+	__be16  vesw_id;
+	__be32  veswport_num;
+
+	__be64  tx_errors;
+	__be64  rx_errors;
+	__be64  tx_packets;
+	__be64  rx_packets;
+	__be64  tx_bytes;
+	__be64  rx_bytes;
+
+	__be64  tx_unicast;
+	__be64  tx_mcastbcast;
+
+	__be64  tx_untagged;
+	__be64  tx_vlan;
+
+	__be64  tx_64_size;
+	__be64  tx_65_127;
+	__be64  tx_128_255;
+	__be64  tx_256_511;
+	__be64  tx_512_1023;
+	__be64  tx_1024_1518;
+	__be64  tx_1519_max;
+
+	__be64  rx_unicast;
+	__be64  rx_mcastbcast;
+
+	__be64  rx_untagged;
+	__be64  rx_vlan;
+
+	__be64  rx_64_size;
+	__be64  rx_65_127;
+	__be64  rx_128_255;
+	__be64  rx_256_511;
+	__be64  rx_512_1023;
+	__be64  rx_1024_1518;
+	__be64  rx_1519_max;
+
+	__be64  reserved[16];
+} __packed;
+
+/**
+ * struct hfi_veswport_error_counters - error counters
+ * @vp_instance: vport instance on the HFI port
+ * @vesw_id: virtual ethernet switch id
+ * @veswport_num: virtual ethernet switch port number
+ * @tx_errors: transmit errors
+ * @rx_errors: receive errors
+ * @tx_smac_filt: smac filter errors
+ * @tx_dlid_zero: transmit packets with invalid dlid
+ * @tx_logic: other transmit errors
+ * @tx_drop_state: packet tansmission in non-forward port state
+ * @rx_bad_veswid: received packet with invalid vesw id
+ * @rx_runt: received ethernet packet with length < 64 bytes
+ * @rx_oversize: received ethernet packet with length > MTU size
+ * @rx_eth_down: received packets when interface is down
+ * @rx_drop_state: received packets in non-forwarding port state
+ * @rx_logic: other receive errors
+ *
+ * All the above are counters of corresponding erorr conditions.
+ */
+struct hfi_veswport_error_counters {
+	__be16  vp_instance;
+	__be16  vesw_id;
+	__be32  veswport_num;
+
+	__be64  tx_errors;
+	__be64  rx_errors;
+
+	__be64  rsvd0;
+	__be64  tx_smac_filt;
+	__be64  rsvd1;
+	__be64  rsvd2;
+	__be64  rsvd3;
+	__be64  tx_dlid_zero;
+	__be64  rsvd4;
+	__be64  tx_logic;
+	__be64  rsvd5;
+	__be64  tx_drop_state;
+
+	__be64  rx_bad_veswid;
+	__be64  rsvd6;
+	__be64  rx_runt;
+	__be64  rx_oversize;
+	__be64  rsvd7;
+	__be64  rx_eth_down;
+	__be64  rx_drop_state;
+	__be64  rx_logic;
+	__be64  rsvd8;
+
+	__be64  rsvd9[16];
+} __packed;
+
+/**
+ * struct hfi_veswport_trap - Trap message sent to EM by VNIC
+ * @fabric_id: 10 bit fabric id
+ * @veswid: 12 bit virtual ethernet switch id
+ * @veswportnum: logical port number on the Virtual switch
+ * @hfiportnum: physical port num (redundant on host)
+ * @veswportindex: switch port index on hfi port 0 based
+ * @opcode: operation
+ * @reserved: 32 bit for alignment
+ *
+ * The VNIC will send trap messages to the Ethernet manager to
+ * inform it about changes to the VNIC config, behaviour etc.
+ * This is the format of the trap payload.
+ */
+struct hfi_veswport_trap {
+	__be16  fabric_id;
+	__be16  veswid;
+	__be32  veswportnum;
+	__be16  hfiportnum;
+	u8      veswportindex;
+	u8      opcode;
+	__be32  reserved;
+} __packed;
+
+/**
+ * struct hfi_vnic_iface_macs_entry - single entry in the mac list
+ * @mac_addr: MAC address
+ */
+struct hfi_vnic_iface_mac_entry {
+	u8 mac_addr[ETH_ALEN];
+};
+
+/**
+ * struct hfi_veswport_iface_macs - Msg to set globally administered MAC
+ * @start_idx: position of first entry (0 based)
+ * @num_macs_in_msg: number of MACs in this message
+ * @tot_macs_in_lst: The total number of MACs the agent has
+ * @gen_count: gen_count to indicate change
+ * @entry: The mac list entry
+ *
+ * Same attribute IDS and attribute modifiers as in locally administered
+ * addresses used to set globally administered addresses
+ */
+struct hfi_veswport_iface_macs {
+	__be16 start_idx;
+	__be16 num_macs_in_msg;
+	__be16 tot_macs_in_lst;
+	__be16 gen_count;
+	struct hfi_vnic_iface_mac_entry entry[0];
+} __packed;
+
+/**
+ * struct hfi_vnic_vema_mad - Generic VEMA MAD
+ * @mad_hdr: Generic MAD header
+ * @rmpp_hdr: RMPP header for vendor specific MADs
+ * @oui: Unique org identifier
+ * @data: MAD data
+ */
+struct hfi_vnic_vema_mad {
+	struct ib_mad_hdr  mad_hdr;
+	struct ib_rmpp_hdr rmpp_hdr;
+	u8                 reserved;
+	u8                 oui[3];
+	u8                 data[HFI_VNIC_EMA_DATA];
+};
+
+/**
+ * struct hfi_vnic_notice_attr - Generic Notice MAD
+ * @gen_type: Generic/Specific bit and type of notice
+ * @oui_1: Vendor ID byte 1
+ * @oui_2: Vendor ID byte 2
+ * @oui_3: Vendor ID byte 3
+ * @trap_num: Trap number
+ * @toggle_count: Notice toggle bit and count value
+ * @issuer_lid: Trap issuer's lid
+ * @issuer_gid: Issuer GID (only if Report method)
+ * @raw_data: Trap message body
+ */
+struct hfi_vnic_notice_attr {
+	u8     gen_type;
+	u8     oui_1;
+	u8     oui_2;
+	u8     oui_3;
+	__be16 trap_num;
+	__be16 toggle_count;
+	__be32 issuer_lid;
+	__be32 reserved;
+	u8     issuer_gid[16];
+	u8     raw_data[64];
+} __packed;
+
+/**
+ * struct hfi_vnic_vema_mad_trap - Generic VEMA MAD Trap
+ * @mad_hdr: Generic MAD header
+ * @rmpp_hdr: RMPP header for vendor specific MADs
+ * @oui: Unique org identifier
+ * @notice: Notice structure
+ */
+struct hfi_vnic_vema_mad_trap {
+	struct ib_mad_hdr            mad_hdr;
+	struct ib_rmpp_hdr           rmpp_hdr;
+	u8                           reserved;
+	u8                           oui[3];
+	struct hfi_vnic_notice_attr  notice;
+};
+
 #endif /* _HFI_VNIC_ENCAP_H */
diff --git a/drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_internal.h b/drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_internal.h
index 8a5e0c1..4dbb117 100644
--- a/drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_internal.h
+++ b/drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_internal.h
@@ -95,13 +95,15 @@ enum hfi_vnic_flags_t {
 
 /**
  * struct __hfi_vesw_info - HFI vnic virtual switch info
+ *
+ * Same as hfi_vesw_info without bitwise attribute.
  */
 struct __hfi_vesw_info {
 	u16  fabric_id;
 	u16  vesw_id;
 
 	u8   rsvd0[6];
-	u16  def_fw_id_mask;
+	u16  def_port_mask;
 
 	u8   rsvd1[2];
 	u16  pkey;
@@ -118,6 +120,8 @@ struct __hfi_vesw_info {
 
 /**
  * struct __hfi_per_veswport_info - HFI vnic per port info
+ *
+ * Same as hfi_per_veswport_info without bitwise attribute.
  */
 struct __hfi_per_veswport_info {
 	u32  port_num;
@@ -156,6 +160,8 @@ struct __hfi_per_veswport_info {
 
 /**
  * struct __hfi_veswport_info - HFI vnic port info
+ *
+ * Same as hfi_veswport_info without bitwise attribute.
  */
 struct __hfi_veswport_info {
 	struct __hfi_vesw_info            vesw;
@@ -163,6 +169,21 @@ struct __hfi_veswport_info {
 };
 
 /**
+ * struct __hfi_veswport_trap - HFI vnic trap info
+ *
+ * Same as hfi_veswport_trap without bitwise attribute.
+ */
+struct __hfi_veswport_trap {
+	u16	fabric_id;
+	u16	veswid;
+	u32	veswportnum;
+	u16	hfiportnum;
+	u8	veswportindex;
+	u8	opcode;
+	u32	reserved;
+} __packed;
+
+/**
  * struct hfi_vnic_rx_queue - HFI VNIC receive queue
  * @idx: queue index
  * @adapter: netdev adapter
@@ -210,6 +231,18 @@ struct hfi_vnic_adapter {
 #define v_notice(format, arg...) \
 	netdev_notice(adapter->netdev, format, ## arg)
 
+/* The maximum allowed entries in the mac table */
+#define HFI_VNIC_MAC_TBL_MAX_ENTRIES  2048
+/* Limit of smac entries in mac table */
+#define HFI_VNIC_MAX_SMAC_LIMIT       256
+
+/* The last octet of the MAC address is used as the key to the hash table */
+#define HFI_VNIC_MAC_HASH_IDX         5
+
+/* The VNIC MAC hash table is of size 2^8 */
+#define HFI_VNIC_MAC_TBL_HASH_BITS    8
+#define HFI_VNIC_MAC_TBL_SIZE  BIT(HFI_VNIC_MAC_TBL_HASH_BITS)
+
 extern char hfi_vnic_driver_name[];
 extern const char hfi_vnic_driver_version[];
 
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [RFC 07/10] IB/hfi-vnic: VNIC Ethernet Management Agent (VEMA) interface
From: Vishwanathapura, Niranjana @ 2016-11-18 22:42 UTC (permalink / raw)
  To: Doug Ledford
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, netdev-u79uwXL29TY76Z2rM5mHXA,
	Dennis Dalessandro, Niranjana Vishwanathapura, Sadanand Warrier,
	Tanya K Jajodia
In-Reply-To: <1479508938-63799-1-git-send-email-niranjana.vishwanathapura-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>

HFI VNIC EMA interface functions are the management interfaces to the HFI
VNIC netdev driver. Implement the required GET/SET management interface
functions and processing of new management information. Add support to
send trap notifications upon various events like interface status change,
unicast/multicast mac list update and mac address change.

Change-Id: I18ccdc0a898ecd7ddcaca795f0a3d205c24b7e6b
Reviewed-by: Dennis Dalessandro <dennis.dalessandro-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Signed-off-by: Sadanand Warrier <sadanand.warrier-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Signed-off-by: Tanya K Jajodia <tanya.k.jajodia-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
 drivers/infiniband/sw/intel/vnic/hfi_vnic/Makefile |   3 +-
 .../sw/intel/vnic/hfi_vnic/hfi_vnic_encap.h        |   4 +
 .../sw/intel/vnic/hfi_vnic/hfi_vnic_internal.h     |  24 ++
 .../sw/intel/vnic/hfi_vnic/hfi_vnic_netdev.c       | 159 ++++++++-
 .../sw/intel/vnic/hfi_vnic/hfi_vnic_vema_iface.c   | 385 +++++++++++++++++++++
 5 files changed, 572 insertions(+), 3 deletions(-)
 create mode 100644 drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_vema_iface.c

diff --git a/drivers/infiniband/sw/intel/vnic/hfi_vnic/Makefile b/drivers/infiniband/sw/intel/vnic/hfi_vnic/Makefile
index a05b2f5..375cd09 100644
--- a/drivers/infiniband/sw/intel/vnic/hfi_vnic/Makefile
+++ b/drivers/infiniband/sw/intel/vnic/hfi_vnic/Makefile
@@ -4,4 +4,5 @@
 ccflags-y += -I$(src)/../include
 obj-$(CONFIG_HFI_VNIC) += hfi_vnic.o
 
-hfi_vnic-y := hfi_vnic_netdev.o hfi_vnic_encap.o hfi_vnic_ethtool.o
+hfi_vnic-y := hfi_vnic_netdev.o hfi_vnic_encap.o hfi_vnic_ethtool.o \
+              hfi_vnic_vema_iface.o
diff --git a/drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_encap.h b/drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_encap.h
index 9ed5221..4e6f367 100644
--- a/drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_encap.h
+++ b/drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_encap.h
@@ -99,6 +99,10 @@
 #define HFI_VNIC_STATE_DROP_ALL        0x1
 #define HFI_VNIC_STATE_FORWARDING      0x3
 
+/* VNIC Ethernet link status */
+#define HFI_VNIC_ETH_LINK_UP     1
+#define HFI_VNIC_ETH_LINK_DOWN   2
+
 /**
  * struct hfi_vesw_info - HFI vnic switch information
  * @fabric_id: 10-bit fabric id
diff --git a/drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_internal.h b/drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_internal.h
index 21a43f6..8ebed89 100644
--- a/drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_internal.h
+++ b/drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_internal.h
@@ -261,6 +261,9 @@ struct hfi_vnic_rx_queue {
  * @lock: adapter lock
  * @rxq: receive queue array
  * @info: virtual ethernet switch port information
+ * @vema_mac_addr: mac address configured by vema
+ * @umac_hash: unicast maclist hash
+ * @mmac_hash: multicast maclist hash
  * @mactbl: hash table of MAC entries
  * @mactbl_lock: mac table lock
  * @stats_lock: statistics lock
@@ -286,6 +289,9 @@ struct hfi_vnic_adapter {
 	struct hfi_vnic_rx_queue  rxq[HFI_VNIC_MAX_QUEUE];
 
 	struct __hfi_veswport_info  info;
+	u8                          vema_mac_addr[ETH_ALEN];
+	u32                         umac_hash;
+	u32                         mmac_hash;
 	struct hlist_head  __rcu   *mactbl;
 
 	/* Lock used to protect updates to mac table */
@@ -378,12 +384,30 @@ struct hfi_vnic_mac_tbl_node {
 int hfi_vnic_encap_skb(struct hfi_vnic_adapter *adapter, struct sk_buff *skb);
 int hfi_vnic_decap_skb(struct hfi_vnic_rx_queue *rxq, struct sk_buff *skb);
 u8 hfi_vnic_calc_entropy(struct hfi_vnic_adapter *adapter, struct sk_buff *skb);
+void hfi_vnic_process_vema_config(struct hfi_vnic_adapter *adapter);
 void hfi_vnic_release_mac_tbl(struct hfi_vnic_adapter *adapter);
 void hfi_vnic_query_mac_tbl(struct hfi_vnic_adapter *adapter,
 			    struct hfi_veswport_mactable *tbl);
 int hfi_vnic_update_mac_tbl(struct hfi_vnic_adapter *adapter,
 			    struct hfi_veswport_mactable *tbl);
+void hfi_vnic_query_ucast_macs(struct hfi_vnic_adapter *adapter,
+			       struct hfi_veswport_iface_macs *macs);
+void hfi_vnic_query_mcast_macs(struct hfi_vnic_adapter *adapter,
+			       struct hfi_veswport_iface_macs *macs);
 void hfi_vnic_update_stats(struct net_device *netdev);
+void hfi_vnic_get_summary_counters(struct hfi_vnic_adapter *adapter,
+				   struct hfi_veswport_summary_counters *cntrs);
+void hfi_vnic_get_error_counters(struct hfi_vnic_adapter *adapter,
+				 struct hfi_veswport_error_counters *cntrs);
+void hfi_vnic_get_vesw_info(struct hfi_vnic_adapter *adapter,
+			    struct hfi_vesw_info *info);
+void hfi_vnic_set_vesw_info(struct hfi_vnic_adapter *adapter,
+			    struct hfi_vesw_info *info);
+void hfi_vnic_get_per_veswport_info(struct hfi_vnic_adapter *adapter,
+				    struct hfi_per_veswport_info *info);
+void hfi_vnic_set_per_veswport_info(struct hfi_vnic_adapter *adapter,
+				    struct hfi_per_veswport_info *info);
+void hfi_vnic_vema_report_event(struct hfi_vnic_adapter *adapter, u8 event);
 void hfi_vnic_set_ethtool_ops(struct net_device *ndev);
 
 #endif /* _HFI_VNIC_INTERNAL_H */
diff --git a/drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_netdev.c b/drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_netdev.c
index ee18610..75a3fd2 100644
--- a/drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_netdev.c
+++ b/drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_netdev.c
@@ -51,6 +51,7 @@
 
 #include <linux/module.h>
 #include <linux/if_vlan.h>
+#include <linux/crc32.h>
 
 #include "hfi_vnic.h"
 #include "hfi_vnic_internal.h"
@@ -533,7 +534,96 @@ static void hfi_vnic_down(struct hfi_vnic_adapter *adapter)
 	clear_bit(HFI_VNIC_UP, &adapter->flags);
 }
 
-/* hfi_vnic_set_mac_addr - change mac address */
+/* hfi_vnic_process_vema_config - process vema configuration updates */
+void hfi_vnic_process_vema_config(struct hfi_vnic_adapter *adapter)
+{
+	struct __hfi_veswport_info *info = &adapter->info;
+	struct hfi_vnic_device *vdev = adapter->vdev;
+	u8 port_num[HFI_VESW_MAX_NUM_DEF_PORT] = { 0 };
+	u16 port_mask, mtu_limit = ETH_ZLEN - ETH_HLEN;
+	u8 i, port_count = 0;
+
+	/*
+	 * If the base_mac_addr is changed, update the interface mac address.
+	 * If the mac address is rejected, do not accept new base_mac_addr.
+	 */
+	if (memcmp(info->vport.base_mac_addr, adapter->vema_mac_addr,
+		   ARRAY_SIZE(info->vport.base_mac_addr))) {
+		struct sockaddr saddr;
+
+		memcpy(saddr.sa_data, info->vport.base_mac_addr,
+		       ARRAY_SIZE(info->vport.base_mac_addr));
+		mutex_lock(&adapter->lock);
+		if (!eth_mac_addr(adapter->netdev, &saddr))
+			memcpy(adapter->vema_mac_addr,
+			       info->vport.base_mac_addr, ETH_ALEN);
+		else
+			memcpy(info->vport.base_mac_addr,
+			       adapter->vema_mac_addr, ETH_ALEN);
+		mutex_unlock(&adapter->lock);
+	}
+
+	/*
+	 * If vesw_id is being changed, and if the vnic interface
+	 * is up, reset the hfi interface to ensure new vesw_id
+	 * is picked by hfi driver
+	 */
+	if (vdev->vesw_id != info->vesw.vesw_id) {
+		mutex_lock(&adapter->lock);
+		if (test_bit(HFI_VNIC_UP, &adapter->flags))
+			hfi_vnic_down(adapter);
+
+		vdev->vesw_id = info->vesw.vesw_id;
+		if (test_bit(HFI_VNIC_OPEN, &adapter->flags))
+			hfi_vnic_up(adapter);
+
+		mutex_unlock(&adapter->lock);
+	}
+
+	/* Handle MTU limit change */
+	mtu_limit = max(info->vesw.eth_mtu_non_vlan, mtu_limit);
+	rtnl_lock();
+	if (adapter->netdev->mtu > mtu_limit)
+		dev_set_mtu(adapter->netdev, mtu_limit);
+	rtnl_unlock();
+
+	/* Update flow to default port redirection table */
+	port_mask = info->vesw.def_port_mask;
+	for (i = 0; i < HFI_VESW_MAX_NUM_DEF_PORT; i++) {
+		if (port_mask & 1)
+			port_num[port_count++] = i;
+		port_mask >>= 1;
+	}
+
+	/*
+	 * Build the flow table. Flow table is required when destination LID
+	 * is not available. Up to HFI_VNIC_FLOW_TBL_SIZE flows supported.
+	 * Each flow need a default port number to get its dlid from the
+	 * u_ucast_dlid array.
+	 */
+	for (i = 0; i < HFI_VNIC_FLOW_TBL_SIZE; i++)
+		adapter->flow_tbl[i] = port_count ? port_num[i % port_count] :
+						    HFI_VNIC_INVALID_PORT;
+
+	/* Operational state can only be DROP_ALL or FORWARDING */
+	if (info->vport.config_state == HFI_VNIC_STATE_FORWARDING)
+		info->vport.oper_state = HFI_VNIC_STATE_FORWARDING;
+	else
+		info->vport.oper_state = HFI_VNIC_STATE_DROP_ALL;
+}
+
+/*
+ * Set the power on default values in adapter's vema interface structure.
+ */
+static inline void hfi_vnic_set_pod_values(struct hfi_vnic_adapter *adapter)
+{
+	adapter->info.vport.max_mac_tbl_ent = HFI_VNIC_MAC_TBL_MAX_ENTRIES;
+	adapter->info.vport.max_smac_ent = HFI_VNIC_MAX_SMAC_LIMIT;
+	adapter->info.vport.config_state = HFI_VNIC_STATE_DROP_ALL;
+	adapter->info.vport.eth_link_status = HFI_VNIC_ETH_LINK_DOWN;
+}
+
+/* hfi_vnic_set_mac_addr - change mac address and send trap */
 static int hfi_vnic_set_mac_addr(struct net_device *netdev, void *addr)
 {
 	struct hfi_vnic_adapter *adapter = netdev_priv(netdev);
@@ -542,8 +632,62 @@ static int hfi_vnic_set_mac_addr(struct net_device *netdev, void *addr)
 	mutex_lock(&adapter->lock);
 	rc = eth_mac_addr(netdev, addr);
 	mutex_unlock(&adapter->lock);
+	if (rc)
+		return rc;
 
-	return rc;
+	adapter->info.vport.uc_macs_gen_count++;
+	hfi_vnic_vema_report_event(adapter,
+				   HFI_VESWPORT_TRAP_IFACE_UCAST_MAC_CHANGE);
+	return 0;
+}
+
+/*
+ * hfi_vnic_mac_send_event - post event on possible mac list exchange
+ *  Send trap when digest from uc/mc mac list differs from previous run.
+ *  Digest is evaluated similar to how cksum does.
+ */
+static void hfi_vnic_mac_send_event(struct net_device *netdev, u8 event)
+{
+	struct hfi_vnic_adapter *adapter = netdev_priv(netdev);
+	struct netdev_hw_addr *ha;
+	struct netdev_hw_addr_list *hw_list;
+	u32 *ref_crc;
+	u32 l, crc = 0;
+
+	switch (event) {
+	case HFI_VESWPORT_TRAP_IFACE_UCAST_MAC_CHANGE:
+		hw_list = &netdev->uc;
+		adapter->info.vport.uc_macs_gen_count++;
+		ref_crc = &adapter->umac_hash;
+		break;
+	case HFI_VESWPORT_TRAP_IFACE_MCAST_MAC_CHANGE:
+		hw_list = &netdev->mc;
+		adapter->info.vport.mc_macs_gen_count++;
+		ref_crc = &adapter->mmac_hash;
+		break;
+	default:
+		return;
+	}
+	netdev_hw_addr_list_for_each(ha, hw_list) {
+		crc = crc32_le(crc, ha->addr, ETH_ALEN);
+	}
+	l = netdev_hw_addr_list_count(hw_list) * ETH_ALEN;
+	crc = ~crc32_le(crc, (void *)&l, sizeof(l));
+
+	if (crc != *ref_crc) {
+		*ref_crc = crc;
+		hfi_vnic_vema_report_event(adapter, event);
+	}
+}
+
+/* hfi_vnic_set_rx_mode - handle uc/mc mac list change */
+static void hfi_vnic_set_rx_mode(struct net_device *netdev)
+{
+	hfi_vnic_mac_send_event(netdev,
+				HFI_VESWPORT_TRAP_IFACE_UCAST_MAC_CHANGE);
+
+	hfi_vnic_mac_send_event(netdev,
+				HFI_VESWPORT_TRAP_IFACE_MCAST_MAC_CHANGE);
 }
 
 /* hfi_netdev_open - activate network interface */
@@ -557,6 +701,10 @@ static int hfi_netdev_open(struct net_device *netdev)
 	if (rc)
 		goto open_done;
 
+	/* Update eth link status and send trap */
+	adapter->info.vport.eth_link_status = HFI_VNIC_ETH_LINK_UP;
+	hfi_vnic_vema_report_event(adapter,
+				   HFI_VESWPORT_TRAP_ETH_LINK_STATUS_CHANGE);
 	set_bit(HFI_VNIC_OPEN, &adapter->flags);
 	v_info("opened\n");
 open_done:
@@ -573,6 +721,10 @@ static int hfi_netdev_close(struct net_device *netdev)
 	if (test_bit(HFI_VNIC_UP, &adapter->flags))
 		hfi_vnic_down(adapter);
 
+	/* Update eth link status and send trap */
+	adapter->info.vport.eth_link_status = HFI_VNIC_ETH_LINK_DOWN;
+	hfi_vnic_vema_report_event(adapter,
+				   HFI_VESWPORT_TRAP_ETH_LINK_STATUS_CHANGE);
 	clear_bit(HFI_VNIC_OPEN, &adapter->flags);
 	mutex_unlock(&adapter->lock);
 	v_info("closed\n");
@@ -586,6 +738,7 @@ static int hfi_netdev_close(struct net_device *netdev)
 	.ndo_start_xmit = hfi_netdev_start_xmit,
 	.ndo_change_mtu = hfi_netdev_change_mtu,
 	.ndo_get_stats64 = hfi_vnic_get_stats64,
+	.ndo_set_rx_mode = hfi_vnic_set_rx_mode,
 	.ndo_select_queue = hfi_vnic_select_queue,
 	.ndo_set_mac_address = hfi_vnic_set_mac_addr,
 };
@@ -636,6 +789,8 @@ static int hfi_vnic_drv_probe(struct device *dev)
 		netif_napi_add(netdev, &adapter->rxq[i].napi, vnic_napi, 64);
 	}
 
+	hfi_vnic_set_pod_values(adapter);
+
 	rc = vdev->bus_ops->init(vdev);
 	if (rc)
 		goto hw_err;
diff --git a/drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_vema_iface.c b/drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_vema_iface.c
new file mode 100644
index 0000000..4a87826
--- /dev/null
+++ b/drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_vema_iface.c
@@ -0,0 +1,385 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains HFI VNIC EMA Interface functions.
+ */
+
+#include "hfi_vnic.h"
+#include "hfi_vnic_internal.h"
+
+/**
+ * hfi_vnic_vema_report_event - sent trap to report the specified event
+ * @adapter: vnic port adapter
+ * @event: event to be reported
+ *
+ * This function calls vema api to sent a trap for the given event.
+ */
+void hfi_vnic_vema_report_event(struct hfi_vnic_adapter *adapter, u8 event)
+{
+	struct __hfi_veswport_info *info = &adapter->info;
+	struct hfi_vnic_device *vdev = adapter->vdev;
+	struct __hfi_veswport_trap trap_data;
+
+	trap_data.fabric_id = info->vesw.fabric_id;
+	trap_data.veswid = info->vesw.vesw_id;
+	trap_data.veswportnum = info->vport.port_num;
+	trap_data.hfiportnum = vdev->port_num;
+	trap_data.veswportindex = vdev->vport_num;
+	trap_data.opcode = event;
+
+	/* Need to send trap here */
+}
+
+/**
+ * hfi_vnic_get_error_counters - get summary counters
+ * @adapter: vnic port adapter
+ * @cntrs: pointer to destination summary counters structure
+ *
+ * This function populates the summary counters that is maintained by the
+ * given adapter to destination address provided.
+ */
+void hfi_vnic_get_summary_counters(struct hfi_vnic_adapter *adapter,
+				   struct hfi_veswport_summary_counters *cntrs)
+{
+	__be64 *dst;
+	u64 *src;
+
+	mutex_lock(&adapter->stats_lock);
+	/* update stats */
+	hfi_vnic_update_stats(adapter->netdev);
+
+	cntrs->vp_instance = cpu_to_be16(adapter->vdev->vport_num);
+	cntrs->vesw_id = cpu_to_be16(adapter->vdev->vesw_id);
+	cntrs->veswport_num = cpu_to_be32(adapter->info.vport.port_num);
+
+	/*
+	 * This loop depends on layout of
+	 * struct hfi_veswport_summary_counters and
+	 * struct __hfi_vnic_summary_counter
+	 */
+	for (dst = &cntrs->tx_errors, src = &adapter->sum_cntrs.tx_errors;
+	     dst < &cntrs->reserved[0]; dst++, src++) {
+		*dst = cpu_to_be64(*src);
+	}
+
+	mutex_unlock(&adapter->stats_lock);
+}
+
+/**
+ * hfi_vnic_get_error_counters - get error counters
+ * @adapter: vnic port adapter
+ * @cntrs: pointer to destination error counters structure
+ *
+ * This function populates the error counters that is maintained by the
+ * given adapter to destination address provided.
+ */
+void hfi_vnic_get_error_counters(struct hfi_vnic_adapter *adapter,
+				 struct hfi_veswport_error_counters *cntrs)
+{
+	mutex_lock(&adapter->stats_lock);
+	/* update stats */
+	hfi_vnic_update_stats(adapter->netdev);
+
+	cntrs->vp_instance = cpu_to_be16(adapter->vdev->vport_num);
+	cntrs->vesw_id = cpu_to_be16(adapter->vdev->vesw_id);
+	cntrs->veswport_num = cpu_to_be32(adapter->info.vport.port_num);
+
+	cntrs->tx_errors = cpu_to_be64(adapter->err_cntrs.tx_errors);
+	cntrs->rx_errors = cpu_to_be64(adapter->err_cntrs.rx_errors);
+	cntrs->tx_smac_filt = cpu_to_be64(adapter->err_cntrs.tx_smac_filt);
+	cntrs->tx_dlid_zero = cpu_to_be64(adapter->err_cntrs.tx_dlid_zero);
+	cntrs->tx_logic = cpu_to_be64(adapter->err_cntrs.tx_logic);
+	cntrs->tx_drop_state = cpu_to_be64(adapter->err_cntrs.tx_drop_state);
+
+	cntrs->rx_bad_veswid = cpu_to_be64(adapter->err_cntrs.rx_bad_veswid);
+	cntrs->rx_runt = cpu_to_be64(adapter->err_cntrs.rx_runt);
+	cntrs->rx_oversize = cpu_to_be64(adapter->err_cntrs.rx_oversize);
+	cntrs->rx_eth_down = cpu_to_be64(adapter->err_cntrs.rx_eth_down);
+	cntrs->rx_drop_state = cpu_to_be64(adapter->err_cntrs.rx_drop_state);
+	cntrs->rx_logic = cpu_to_be64(adapter->err_cntrs.rx_logic);
+	mutex_unlock(&adapter->stats_lock);
+}
+
+/**
+ * hfi_vnic_get_vesw_info -- Get the vesw information
+ * @adapter: vnic port adapter
+ * @info: pointer to destination vesw info structure
+ *
+ * This function copies the vesw info that is maintained by the
+ * given adapter to destination address provided.
+ */
+void hfi_vnic_get_vesw_info(struct hfi_vnic_adapter *adapter,
+			    struct hfi_vesw_info *info)
+{
+	struct __hfi_vesw_info *src = &adapter->info.vesw;
+	int i;
+
+	info->fabric_id = cpu_to_be16(src->fabric_id);
+	info->vesw_id = cpu_to_be16(src->vesw_id);
+	memcpy(info->rsvd0, src->rsvd0, ARRAY_SIZE(src->rsvd0));
+	info->def_port_mask = cpu_to_be16(src->def_port_mask);
+	memcpy(info->rsvd1, src->rsvd1, ARRAY_SIZE(src->rsvd1));
+	info->pkey = cpu_to_be16(src->pkey);
+
+	memcpy(info->rsvd2, src->rsvd2, ARRAY_SIZE(src->rsvd2));
+	info->u_mcast_dlid = cpu_to_be32(src->u_mcast_dlid);
+	for (i = 0; i < HFI_VESW_MAX_NUM_DEF_PORT; i++)
+		info->u_ucast_dlid[i] = cpu_to_be32(src->u_ucast_dlid[i]);
+
+	memcpy(info->rsvd3, src->rsvd3, ARRAY_SIZE(src->rsvd3));
+	for (i = 0; i < HFI_VNIC_MAX_NUM_PCP; i++)
+		info->eth_mtu[i] = cpu_to_be16(src->eth_mtu[i]);
+
+	info->eth_mtu_non_vlan = cpu_to_be16(src->eth_mtu_non_vlan);
+	memcpy(info->rsvd4, src->rsvd4, ARRAY_SIZE(src->rsvd4));
+}
+
+/**
+ * hfi_vnic_set_vesw_info -- Set the vesw information
+ * @adapter: vnic port adapter
+ * @info: pointer to vesw info structure
+ *
+ * This function updates the vesw info that is maintained by the
+ * given adapter with vesw info provided. Reserved fields are stored
+ * and returned back to EM as is.
+ */
+void hfi_vnic_set_vesw_info(struct hfi_vnic_adapter *adapter,
+			    struct hfi_vesw_info *info)
+{
+	struct __hfi_vesw_info *dst = &adapter->info.vesw;
+	int i;
+
+	dst->fabric_id = be16_to_cpu(info->fabric_id);
+	dst->vesw_id = be16_to_cpu(info->vesw_id);
+	memcpy(dst->rsvd0, info->rsvd0, ARRAY_SIZE(info->rsvd0));
+	dst->def_port_mask = be16_to_cpu(info->def_port_mask);
+	memcpy(dst->rsvd1, info->rsvd1, ARRAY_SIZE(info->rsvd1));
+	dst->pkey = be16_to_cpu(info->pkey);
+
+	memcpy(dst->rsvd2, info->rsvd2, ARRAY_SIZE(info->rsvd2));
+	dst->u_mcast_dlid = be32_to_cpu(info->u_mcast_dlid);
+	for (i = 0; i < HFI_VESW_MAX_NUM_DEF_PORT; i++)
+		dst->u_ucast_dlid[i] = be32_to_cpu(info->u_ucast_dlid[i]);
+
+	memcpy(dst->rsvd3, info->rsvd3, ARRAY_SIZE(info->rsvd3));
+	for (i = 0; i < HFI_VNIC_MAX_NUM_PCP; i++)
+		dst->eth_mtu[i] = be16_to_cpu(info->eth_mtu[i]);
+
+	dst->eth_mtu_non_vlan = be16_to_cpu(info->eth_mtu_non_vlan);
+	memcpy(dst->rsvd4, info->rsvd4, ARRAY_SIZE(info->rsvd4));
+}
+
+/**
+ * hfi_vnic_get_per_veswport_info -- Get the vesw per port information
+ * @adapter: vnic port adapter
+ * @info: pointer to destination vport info structure
+ *
+ * This function copies the vesw per port info that is maintained by the
+ * given adapter to destination address provided.
+ * Note that the read only fields are not copied.
+ */
+void hfi_vnic_get_per_veswport_info(struct hfi_vnic_adapter *adapter,
+				    struct hfi_per_veswport_info *info)
+{
+	struct __hfi_per_veswport_info *src = &adapter->info.vport;
+
+	info->port_num = cpu_to_be32(src->port_num);
+	info->eth_link_status = src->eth_link_status;
+	memcpy(info->rsvd0, src->rsvd0, ARRAY_SIZE(src->rsvd0));
+
+	memcpy(info->base_mac_addr, src->base_mac_addr,
+	       ARRAY_SIZE(info->base_mac_addr));
+	info->config_state = src->config_state;
+	info->oper_state = src->oper_state;
+	info->max_mac_tbl_ent = cpu_to_be16(src->max_mac_tbl_ent);
+	info->max_smac_ent = cpu_to_be16(src->max_smac_ent);
+	info->mac_tbl_digest = cpu_to_be32(src->mac_tbl_digest);
+	memcpy(info->rsvd1, src->rsvd1, ARRAY_SIZE(src->rsvd1));
+
+	info->encap_slid = cpu_to_be32(src->encap_slid);
+	memcpy(info->pcp_to_sc_uc, src->pcp_to_sc_uc,
+	       ARRAY_SIZE(info->pcp_to_sc_uc));
+	memcpy(info->pcp_to_vl_uc, src->pcp_to_vl_uc,
+	       ARRAY_SIZE(info->pcp_to_vl_uc));
+	memcpy(info->pcp_to_sc_mc, src->pcp_to_sc_mc,
+	       ARRAY_SIZE(info->pcp_to_sc_mc));
+	memcpy(info->pcp_to_vl_mc, src->pcp_to_vl_mc,
+	       ARRAY_SIZE(info->pcp_to_vl_mc));
+	info->non_vlan_sc_uc = src->non_vlan_sc_uc;
+	info->non_vlan_vl_uc = src->non_vlan_vl_uc;
+	info->non_vlan_sc_mc = src->non_vlan_sc_mc;
+	info->non_vlan_vl_mc = src->non_vlan_vl_mc;
+	memcpy(info->rsvd2, src->rsvd2, ARRAY_SIZE(src->rsvd2));
+
+	info->uc_macs_gen_count = cpu_to_be16(src->uc_macs_gen_count);
+	info->mc_macs_gen_count = cpu_to_be16(src->mc_macs_gen_count);
+	memcpy(info->rsvd3, src->rsvd3, ARRAY_SIZE(src->rsvd3));
+}
+
+/**
+ * hfi_vnic_set_per_veswport_info -- Set vesw per port information
+ * @adapter: vnic port adapter
+ * @info: pointer to vport info structure
+ *
+ * This function updates the vesw per port info that is maintained by the
+ * given adapter with vesw per port info provided. Reserved fields are
+ * stored and returned back to EM as is.
+ */
+void hfi_vnic_set_per_veswport_info(struct hfi_vnic_adapter *adapter,
+				    struct hfi_per_veswport_info *info)
+{
+	struct __hfi_per_veswport_info *dst = &adapter->info.vport;
+
+	dst->port_num = be32_to_cpu(info->port_num);
+	memcpy(dst->rsvd0, info->rsvd0, ARRAY_SIZE(info->rsvd0));
+
+	memcpy(dst->base_mac_addr, info->base_mac_addr,
+	       ARRAY_SIZE(dst->base_mac_addr));
+	dst->config_state = info->config_state;
+	memcpy(dst->rsvd1, info->rsvd1, ARRAY_SIZE(info->rsvd1));
+
+	dst->encap_slid = be32_to_cpu(info->encap_slid);
+	memcpy(dst->pcp_to_sc_uc, info->pcp_to_sc_uc,
+	       ARRAY_SIZE(dst->pcp_to_sc_uc));
+	memcpy(dst->pcp_to_vl_uc, info->pcp_to_vl_uc,
+	       ARRAY_SIZE(dst->pcp_to_vl_uc));
+	memcpy(dst->pcp_to_sc_mc, info->pcp_to_sc_mc,
+	       ARRAY_SIZE(dst->pcp_to_sc_mc));
+	memcpy(dst->pcp_to_vl_mc, info->pcp_to_vl_mc,
+	       ARRAY_SIZE(dst->pcp_to_vl_mc));
+	dst->non_vlan_sc_uc = info->non_vlan_sc_uc;
+	dst->non_vlan_vl_uc = info->non_vlan_vl_uc;
+	dst->non_vlan_sc_mc = info->non_vlan_sc_mc;
+	dst->non_vlan_vl_mc = info->non_vlan_vl_mc;
+	memcpy(dst->rsvd2, info->rsvd2, ARRAY_SIZE(info->rsvd2));
+	memcpy(dst->rsvd3, info->rsvd3, ARRAY_SIZE(info->rsvd3));
+}
+
+/**
+ * hfi_vnic_query_mcast_macs - query multicast mac list
+ * @adapter: vnic port adapter
+ * @macs: pointer mac list
+ *
+ * This function populates the provided mac list with the configured
+ * multicast addresses in the adapter.
+ */
+void hfi_vnic_query_mcast_macs(struct hfi_vnic_adapter *adapter,
+			       struct hfi_veswport_iface_macs *macs)
+{
+	u16 start_idx, num_macs, idx = 0, count = 0;
+	struct netdev_hw_addr *ha;
+
+	start_idx = be16_to_cpu(macs->start_idx);
+	num_macs = be16_to_cpu(macs->num_macs_in_msg);
+	netdev_for_each_mc_addr(ha, adapter->netdev) {
+		struct hfi_vnic_iface_mac_entry *entry = &macs->entry[count];
+
+		if (start_idx > idx++)
+			continue;
+		else if (num_macs == count)
+			break;
+		memcpy(entry, ha->addr, sizeof(*entry));
+		count++;
+	}
+
+	macs->tot_macs_in_lst = cpu_to_be16(netdev_mc_count(adapter->netdev));
+	macs->num_macs_in_msg = cpu_to_be16(count);
+	macs->gen_count = cpu_to_be16(adapter->info.vport.mc_macs_gen_count);
+}
+
+/**
+ * hfi_vnic_query_ucast_macs - query unicast mac list
+ * @adapter: vnic port adapter
+ * @macs: pointer mac list
+ *
+ * This function populates the provided mac list with the configured
+ * unicast addresses in the adapter.
+ */
+void hfi_vnic_query_ucast_macs(struct hfi_vnic_adapter *adapter,
+			       struct hfi_veswport_iface_macs *macs)
+{
+	u16 start_idx, tot_macs, num_macs, idx = 0, count = 0;
+	struct netdev_hw_addr *ha;
+
+	start_idx = be16_to_cpu(macs->start_idx);
+	num_macs = be16_to_cpu(macs->num_macs_in_msg);
+	/* loop through dev_addrs list first */
+	for_each_dev_addr(adapter->netdev, ha) {
+		struct hfi_vnic_iface_mac_entry *entry = &macs->entry[count];
+
+		/* Do not include EM specified MAC address */
+		if (!memcmp(adapter->info.vport.base_mac_addr, ha->addr,
+			    ARRAY_SIZE(adapter->info.vport.base_mac_addr)))
+			continue;
+
+		if (start_idx > idx++)
+			continue;
+		else if (num_macs == count)
+			break;
+		memcpy(entry, ha->addr, sizeof(*entry));
+		count++;
+	}
+
+	/* loop through uc list */
+	netdev_for_each_uc_addr(ha, adapter->netdev) {
+		struct hfi_vnic_iface_mac_entry *entry = &macs->entry[count];
+
+		if (start_idx > idx++)
+			continue;
+		else if (num_macs == count)
+			break;
+		memcpy(entry, ha->addr, sizeof(*entry));
+		count++;
+	}
+
+	tot_macs = netdev_hw_addr_list_count(&adapter->netdev->dev_addrs) +
+		   netdev_uc_count(adapter->netdev);
+	macs->tot_macs_in_lst = cpu_to_be16(tot_macs);
+	macs->num_macs_in_msg = cpu_to_be16(count);
+	macs->gen_count = cpu_to_be16(adapter->info.vport.uc_macs_gen_count);
+}
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [RFC 08/10] IB/hfi-vnic: VNIC Ethernet Management Agent (VEMA) driver
From: Vishwanathapura, Niranjana @ 2016-11-18 22:42 UTC (permalink / raw)
  To: Doug Ledford
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, netdev-u79uwXL29TY76Z2rM5mHXA,
	Dennis Dalessandro, Sadanand Warrier, Niranjana Vishwanathapura,
	Tanya K Jajodia, Sudeep Dutt
In-Reply-To: <1479508938-63799-1-git-send-email-niranjana.vishwanathapura-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>

HFI VEMA driver interfaces with the Infiniband MAD stack to exchange the
management information packets with the Ethernet Manager (EM).
It interfaces with the HFI VNIC netdev driver to SET/GET the management
information. The information exchanged with the EM includes class port
details, encapsulation configuration, various counters, unicast and
multicast MAC list and the MAC table. It also supports sending traps
to the EM.

Change-Id: I7439f96858c9019455da1e924a0201eb27177b85
Reviewed-by: Dennis Dalessandro <dennis.dalessandro-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Signed-off-by: Sadanand Warrier <sadanand.warrier-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Signed-off-by: Tanya K Jajodia <tanya.k.jajodia-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Signed-off-by: Sudeep Dutt <sudeep.dutt-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
 drivers/infiniband/sw/intel/vnic/hfi_vnic/Makefile |    2 +-
 .../sw/intel/vnic/hfi_vnic/hfi_vnic_internal.h     |    9 +
 .../sw/intel/vnic/hfi_vnic/hfi_vnic_netdev.c       |    9 +-
 .../sw/intel/vnic/hfi_vnic/hfi_vnic_vema.c         | 1024 ++++++++++++++++++++
 .../sw/intel/vnic/hfi_vnic/hfi_vnic_vema_iface.c   |    2 +-
 5 files changed, 1043 insertions(+), 3 deletions(-)
 create mode 100644 drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_vema.c

diff --git a/drivers/infiniband/sw/intel/vnic/hfi_vnic/Makefile b/drivers/infiniband/sw/intel/vnic/hfi_vnic/Makefile
index 375cd09..e05b72b 100644
--- a/drivers/infiniband/sw/intel/vnic/hfi_vnic/Makefile
+++ b/drivers/infiniband/sw/intel/vnic/hfi_vnic/Makefile
@@ -5,4 +5,4 @@ ccflags-y += -I$(src)/../include
 obj-$(CONFIG_HFI_VNIC) += hfi_vnic.o
 
 hfi_vnic-y := hfi_vnic_netdev.o hfi_vnic_encap.o hfi_vnic_ethtool.o \
-              hfi_vnic_vema_iface.o
+              hfi_vnic_vema.o hfi_vnic_vema_iface.o
diff --git a/drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_internal.h b/drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_internal.h
index 8ebed89..fbebf68 100644
--- a/drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_internal.h
+++ b/drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_internal.h
@@ -268,6 +268,8 @@ struct hfi_vnic_rx_queue {
  * @mactbl_lock: mac table lock
  * @stats_lock: statistics lock
  * @flow_tbl: flow to default port redirection table
+ * @trap_timeout: trap timeout
+ * @trap_count: no. of traps allowed within timeout period
  * @q_sum_cntrs: per queue EM summary counters
  * @q_err_cntrs: per queue EM error counters
  * @q_rx_logic_errors: per queue rx logic (default) errors
@@ -301,6 +303,8 @@ struct hfi_vnic_adapter {
 	struct mutex stats_lock;
 
 	u8 flow_tbl[HFI_VNIC_FLOW_TBL_SIZE];
+	unsigned long trap_timeout;
+	u8            trap_count;
 
 	struct __hfi_vnic_summary_counters  q_sum_cntrs[HFI_VNIC_MAX_QUEUE];
 	struct __hfi_vnic_error_counters    q_err_cntrs[HFI_VNIC_MAX_QUEUE];
@@ -410,4 +414,9 @@ void hfi_vnic_set_per_veswport_info(struct hfi_vnic_adapter *adapter,
 void hfi_vnic_vema_report_event(struct hfi_vnic_adapter *adapter, u8 event);
 void hfi_vnic_set_ethtool_ops(struct net_device *ndev);
 
+int hfi_vnic_vema_init(void);
+void hfi_vnic_vema_deinit(void);
+void hfi_vnic_vema_send_trap(struct hfi_vnic_adapter *adapter,
+			     struct __hfi_veswport_trap *data, u32 lid);
+
 #endif /* _HFI_VNIC_INTERNAL_H */
diff --git a/drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_netdev.c b/drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_netdev.c
index 75a3fd2..4ee5bb6 100644
--- a/drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_netdev.c
+++ b/drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_netdev.c
@@ -855,9 +855,15 @@ static int __init hfi_vnic_init_module(void)
 	pr_info("HFI Virtual Network Driver - %s\n",
 		hfi_vnic_driver_version);
 
-	rc = hfi_vnic_driver_register(&hfi_vnic_drv);
+	rc = hfi_vnic_vema_init();
 	if (rc)
+		return rc;
+
+	rc = hfi_vnic_driver_register(&hfi_vnic_drv);
+	if (rc) {
 		pr_err("VNIC driver register failed %d\n", rc);
+		hfi_vnic_vema_deinit();
+	}
 
 	return rc;
 }
@@ -867,6 +873,7 @@ static int __init hfi_vnic_init_module(void)
 static void __exit hfi_vnic_exit_module(void)
 {
 	hfi_vnic_driver_unregister(&hfi_vnic_drv);
+	hfi_vnic_vema_deinit();
 }
 module_exit(hfi_vnic_exit_module);
 
diff --git a/drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_vema.c b/drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_vema.c
new file mode 100644
index 0000000..b947cdf
--- /dev/null
+++ b/drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_vema.c
@@ -0,0 +1,1024 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains HFI Virtual Network Interface Controller (VNIC)
+ * Ethernet Management Agent (EMA) driver
+ */
+
+#include <rdma/ib_addr.h>
+#include <rdma/ib_smi.h>
+
+#include "hfi_vnic.h"
+#include "hfi_vnic_internal.h"
+
+/*
+ * The trap service level is kept in bits 3 to 7 in the trap_sl_rsvd
+ * field in the class port info MAD.
+ */
+#define GET_TRAP_SL_FROM_CLASS_PORT_INFO(x)  (((x) >> 3) & 0x1f)
+
+/* Cap trap bursts to a reasonable limit good for normal cases */
+#define HFI_VNIC_TRAP_BURST_LIMIT 4
+
+/*
+ * VNIC trap limit timeout.
+ * Inverse of cap2_mask response time out (1.0737 secs) = 0.9
+ * secs approx IB spec 13.4.6.2.1 PortInfoSubnetTimeout and
+ * 13.4.9 Traps.
+ */
+#define HFI_VNIC_TRAP_TIMEOUT  ((4096 * (1UL << 18)) / 1000)
+
+#define HFI_VNIC_UNSUP_ATTR  \
+		cpu_to_be16(IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD_ATTRIB)
+
+#define HFI_VNIC_INVAL_ATTR  \
+		cpu_to_be16(IB_MGMT_MAD_STATUS_INVALID_ATTRIB_VALUE)
+
+#define HFI_VNIC_CLASS_CAP_TRAP  cpu_to_be16(1 << 8)
+
+struct hfi_class_port_info {
+	u8 base_version;
+	u8 class_version;
+	__be16 cap_mask;
+	__be32 cap_mask2_resp_time;
+
+	u8 redirect_gid[16];
+	__be32 redirect_tc_fl;
+	__be32 redirect_lid;
+	__be32 redirect_sl_qp;
+	__be32 redirect_qkey;
+
+	u8 trap_gid[16];
+	__be32 trap_tc_fl;
+	__be32 trap_lid;
+	__be32 trap_hl_qp;
+	__be32 trap_qkey;
+
+	__be16 trap_pkey;
+	__be16 redirect_pkey;
+
+	u8 trap_sl_rsvd;
+	u8 reserved[3];
+} __packed;
+
+/**
+ * struct hfi_vnic_vema_port -- VNIC VEMA port details
+ * @cdev:     pointer to device
+ * @mad_agent: pointer to mad agent for port
+ * @class_port_info: Class port info information.
+ * @tid: Transaction id
+ * @port_num:  port number on HFI device
+ * @lock: adapter interface lock
+ * @vnic_mask: Bit mask for vnic presence
+ */
+struct hfi_vnic_vema_port {
+	struct hfi_vnic_ctrl_device    *cdev;
+	struct ib_mad_agent            *mad_agent;
+	struct hfi_class_port_info      class_port_info;
+	u64                             tid;
+	u8                              port_num;
+
+	/* Lock to query/update network adapter */
+	struct mutex                    lock;
+	DECLARE_BITMAP(vnic_mask, HFI_MAX_VPORTS_SUPPORTED);
+};
+
+static const char hfi_vnic_ctrl_driver_name[] = "hfi_vnic_ctrl";
+
+/**
+ * vema_get_vport_num -- Get the vnic from the mad
+ * @recvd_mad:  Received mad
+ *
+ * Return: returns value of the vnic port number
+ */
+static inline u8 vema_get_vport_num(struct hfi_vnic_vema_mad *recvd_mad)
+{
+	return be32_to_cpu(recvd_mad->mad_hdr.attr_mod) >> 16 & 0xff;
+}
+
+/**
+ * vema_mac_tbl_req_ok -- Check if mac request has correct values
+ * @mac_tbl: mac table
+ *
+ * This function checks for the validity of the offset and number of
+ * entries required.
+ *
+ * Return: true if offset and num_entries are valid
+ */
+static inline bool vema_mac_tbl_req_ok(struct hfi_veswport_mactable *mac_tbl)
+{
+	u16 offset, num_entries;
+	u16 req_entries = ((HFI_VNIC_EMA_DATA - sizeof(*mac_tbl)) /
+			   sizeof(mac_tbl->tbl_entries[0]));
+
+	offset = be16_to_cpu(mac_tbl->offset);
+	num_entries = be16_to_cpu(mac_tbl->num_entries);
+
+	return ((num_entries <= req_entries) &&
+		(offset + num_entries <= HFI_VNIC_MAC_TBL_MAX_ENTRIES));
+}
+
+/**
+ * vema_parms_from_recv_mad -- Get req params from recvd mad
+ * @recvd_mad: received mad
+ * @port: ptr to port struct on which MAD was recvd
+ * @adapter: ptr to ptr to adapter to be filled in
+ *
+ * Return: 0 if success, else non-zero.
+ */
+static int vema_parms_from_recv_mad(struct hfi_vnic_vema_mad *recvd_mad,
+				    struct hfi_vnic_vema_port *port,
+				    struct hfi_vnic_adapter **adapter)
+{
+	struct hfi_vnic_device *vdev;
+	u8 vport_num;
+
+	vport_num = vema_get_vport_num(recvd_mad);
+	vdev = hfi_vnic_get_dev(port->cdev, port->port_num, vport_num);
+	if (!vdev) {
+		dev_err(&port->cdev->dev,
+			"%s:vnic_num %d vdev access err\n", __func__,
+			vport_num);
+		return -EINVAL;
+	}
+	*adapter = netdev_priv(vdev->netdev);
+
+	return 0;
+}
+
+/*
+ * Return the power on default values in the port info structure
+ * in big endian format as required by MAD.
+ */
+static inline void vema_get_pod_values(struct hfi_veswport_info *port_info)
+{
+	memset(port_info, 0, sizeof(*port_info));
+	port_info->vport.max_mac_tbl_ent =
+		cpu_to_be16(HFI_VNIC_MAC_TBL_MAX_ENTRIES);
+	port_info->vport.max_smac_ent =
+		cpu_to_be16(HFI_VNIC_MAX_SMAC_LIMIT);
+	port_info->vport.oper_state = HFI_VNIC_STATE_DROP_ALL;
+	port_info->vport.config_state = HFI_VNIC_STATE_DROP_ALL;
+}
+
+/**
+ * vema_create_vnic -- Create a new vnic device
+ * @port: ptr to hfi_vnic_vema_port struct
+ * @vport_num: Vnic number (to be created)
+ * @adapter: Prt to ptr to adapter associated with vnic
+ *
+ * Create the new vnic device.
+ * Return a pointer to the adapter structure within the new vnic in
+ * the third variable.
+ */
+static int vema_create_vnic(struct hfi_vnic_vema_port *port, u8 vport_num,
+			    struct hfi_vnic_adapter **adapter)
+{
+	struct hfi_vnic_device *vdev;
+	int ret;
+
+	ret = port->cdev->ctrl_ops->add_vport(port->cdev, port->port_num,
+					      vport_num);
+	if (ret) {
+		dev_err(&port->cdev->dev,
+			"%s:vnic %d not created\n", __func__, vport_num);
+	} else {
+		vdev = hfi_vnic_get_dev(port->cdev, port->port_num, vport_num);
+		if (!vdev) {
+			dev_err(&port->cdev->dev,
+				"%s:vnic_num %d vdev access err\n", __func__,
+				vport_num);
+			ret = -ENODEV;
+		} else {
+			*adapter = netdev_priv(vdev->netdev);
+			set_bit(vport_num, port->vnic_mask);
+		}
+	}
+
+	return ret;
+}
+
+/**
+ * vema_get_class_port_info -- Get class info for port
+ * @port:  Port on whic MAD was received
+ * @recvd_mad: pointer to the received mad
+ * @rsp_mad:   pointer to respose mad
+ *
+ * This function copies the latest class port info value set for the
+ * port and stores it for generating traps
+ */
+static void vema_get_class_port_info(struct hfi_vnic_vema_port *port,
+				     struct hfi_vnic_vema_mad *recvd_mad,
+				     struct hfi_vnic_vema_mad *rsp_mad)
+{
+	struct hfi_class_port_info *port_info;
+
+	port_info = (struct hfi_class_port_info *)rsp_mad->data;
+	memcpy(port_info, &port->class_port_info, sizeof(*port_info));
+	port_info->base_version = OPA_MGMT_BASE_VERSION,
+	port_info->class_version = HFI_EMA_CLASS_VERSION;
+
+	/* Agent generates traps */
+	port_info->cap_mask = HFI_VNIC_CLASS_CAP_TRAP;
+
+	/*
+	 * Since a get routine is always sent by the EM first we
+	 * set the expected response time to
+	 * 4.096 usec * 2^18 == 1.0737 sec here.
+	 */
+	port_info->cap_mask2_resp_time = cpu_to_be32(18);
+}
+
+/**
+ * vema_set_class_port_info -- Get class info for port
+ * @port:  Port on whic MAD was received
+ * @recvd_mad: pointer to the received mad
+ * @rsp_mad:   pointer to respose mad
+ *
+ * This function updates the port class info for the specific vnic
+ * and sets up the response mad data
+ */
+static void vema_set_class_port_info(struct hfi_vnic_vema_port *port,
+				     struct hfi_vnic_vema_mad *recvd_mad,
+				     struct hfi_vnic_vema_mad *rsp_mad)
+{
+	memcpy(&port->class_port_info, recvd_mad->data,
+	       sizeof(port->class_port_info));
+
+	vema_get_class_port_info(port, recvd_mad, rsp_mad);
+}
+
+/**
+ * vema_get_veswport_info -- Get veswport info
+ * @port:      source port on which MAD was received
+ * @recvd_mad: pointer to the received mad
+ * @rsp_mad:   pointer to respose mad
+ */
+static void vema_get_veswport_info(struct hfi_vnic_vema_port *port,
+				   struct hfi_vnic_vema_mad *recvd_mad,
+				   struct hfi_vnic_vema_mad *rsp_mad)
+{
+	struct hfi_veswport_info *port_info =
+				  (struct hfi_veswport_info *)rsp_mad->data;
+	struct hfi_vnic_adapter *adapter;
+	u8 vport_num;
+
+	vport_num = vema_get_vport_num(recvd_mad);
+
+	if (test_bit(vport_num, port->vnic_mask)) {
+		if (vema_parms_from_recv_mad(recvd_mad, port, &adapter)) {
+			dev_err(&port->cdev->dev,
+				"%s:vnic adapter not found\n", __func__);
+			goto err_exit;
+		} else {
+			memset(port_info, 0, sizeof(*port_info));
+			hfi_vnic_get_vesw_info(adapter, &port_info->vesw);
+			hfi_vnic_get_per_veswport_info(adapter,
+						       &port_info->vport);
+		}
+	} else {
+		vema_get_pod_values(port_info);
+	}
+	return;
+
+err_exit:
+	rsp_mad->mad_hdr.status = HFI_VNIC_INVAL_ATTR;
+}
+
+/**
+ * vema_set_veswport_info -- Set veswport info
+ * @port:      source port on which MAD was received
+ * @recvd_mad: pointer to the received mad
+ * @rsp_mad:   pointer to respose mad
+ *
+ * This function gets the port class infor for vnic
+ */
+static void vema_set_veswport_info(struct hfi_vnic_vema_port *port,
+				   struct hfi_vnic_vema_mad *recvd_mad,
+				   struct hfi_vnic_vema_mad *rsp_mad)
+{
+	struct hfi_veswport_info *port_info;
+	struct hfi_vnic_adapter *adapter;
+	u8 vport_num;
+
+	vport_num = vema_get_vport_num(recvd_mad);
+
+	if (test_bit(vport_num, port->vnic_mask)) {
+		if (vema_parms_from_recv_mad(recvd_mad, port, &adapter))
+			goto err_exit;
+	} else if (vema_create_vnic(port, vport_num, &adapter)) {
+		dev_err(&port->cdev->dev,
+			"%s:vnic %d not created\n", __func__, vport_num);
+		goto err_exit;
+	}
+
+	port_info = (struct hfi_veswport_info *)recvd_mad->data;
+	hfi_vnic_set_vesw_info(adapter, &port_info->vesw);
+	hfi_vnic_set_per_veswport_info(adapter, &port_info->vport);
+
+	/* Process the new config settings */
+	hfi_vnic_process_vema_config(adapter);
+
+	vema_get_veswport_info(port, recvd_mad, rsp_mad);
+	return;
+
+err_exit:
+	rsp_mad->mad_hdr.status = HFI_VNIC_INVAL_ATTR;
+}
+
+/**
+ * vema_get_mac_entries -- Get MAC entries in VNIC MAC table
+ * @port:      source port on which MAD was received
+ * @recvd_mad: pointer to the received mad
+ * @rsp_mad:   pointer to respose mad
+ *
+ * This function gets the MAC entries that are programmed into
+ * the VNIC MAC forwarding table. It checks for the validity of
+ * the index into the MAC table and the number of entries that
+ * are to be retrieved.
+ */
+static void vema_get_mac_entries(struct hfi_vnic_vema_port *port,
+				 struct hfi_vnic_vema_mad *recvd_mad,
+				 struct hfi_vnic_vema_mad *rsp_mad)
+{
+	struct hfi_veswport_mactable *mac_tbl_in, *mac_tbl_out;
+	struct hfi_vnic_adapter *adapter;
+
+	if (vema_parms_from_recv_mad(recvd_mad, port, &adapter)) {
+		rsp_mad->mad_hdr.status = HFI_VNIC_INVAL_ATTR;
+		return;
+	}
+
+	mac_tbl_in = (struct hfi_veswport_mactable *)recvd_mad->data;
+	mac_tbl_out = (struct hfi_veswport_mactable *)rsp_mad->data;
+
+	if (vema_mac_tbl_req_ok(mac_tbl_in)) {
+		mac_tbl_out->offset = mac_tbl_in->offset;
+		mac_tbl_out->num_entries = mac_tbl_in->num_entries;
+		hfi_vnic_query_mac_tbl(adapter, mac_tbl_out);
+	} else {
+		rsp_mad->mad_hdr.status = HFI_VNIC_INVAL_ATTR;
+	}
+}
+
+/**
+ * vema_set_mac_entries -- Set MAC entries in VNIC MAC table
+ * @port:      source port on which MAD was received
+ * @recvd_mad: pointer to the received mad
+ * @rsp_mad:   pointer to respose mad
+ *
+ * This function sets the MAC entries in the VNIC forwarding table
+ * It checks for the validity of the index and the number of forwarding
+ * table entries to be programmed.
+ */
+static void vema_set_mac_entries(struct hfi_vnic_vema_port *port,
+				 struct hfi_vnic_vema_mad *recvd_mad,
+				 struct hfi_vnic_vema_mad *rsp_mad)
+{
+	struct hfi_veswport_mactable *mac_tbl;
+	struct hfi_vnic_adapter *adapter;
+
+	if (vema_parms_from_recv_mad(recvd_mad, port, &adapter)) {
+		rsp_mad->mad_hdr.status = HFI_VNIC_INVAL_ATTR;
+		return;
+	}
+
+	mac_tbl = (struct hfi_veswport_mactable *)recvd_mad->data;
+	if (vema_mac_tbl_req_ok(mac_tbl)) {
+		if (hfi_vnic_update_mac_tbl(adapter, mac_tbl))
+			rsp_mad->mad_hdr.status = HFI_VNIC_UNSUP_ATTR;
+	} else {
+		rsp_mad->mad_hdr.status = HFI_VNIC_UNSUP_ATTR;
+	}
+	vema_get_mac_entries(port, recvd_mad, rsp_mad);
+}
+
+/**
+ * vema_set_delete_vesw -- Reset VESW info to POD values
+ * @port:      source port on which MAD was received
+ * @recvd_mad: pointer to the received mad
+ * @rsp_mad:   pointer to respose mad
+ *
+ * This function clears all the fields of veswport info for the requested vesw
+ * and sets them back to the power-on default values. It does not delete the
+ * vesw.
+ */
+static void vema_set_delete_vesw(struct hfi_vnic_vema_port *port,
+				 struct hfi_vnic_vema_mad *recvd_mad,
+				 struct hfi_vnic_vema_mad *rsp_mad)
+{
+	struct hfi_veswport_info *port_info =
+				  (struct hfi_veswport_info *)rsp_mad->data;
+	struct hfi_vnic_adapter *adapter;
+
+	if (vema_parms_from_recv_mad(recvd_mad, port, &adapter)) {
+		rsp_mad->mad_hdr.status = HFI_VNIC_INVAL_ATTR;
+		return;
+	}
+
+	vema_get_pod_values(port_info);
+	hfi_vnic_set_vesw_info(adapter, &port_info->vesw);
+	hfi_vnic_set_per_veswport_info(adapter, &port_info->vport);
+
+	/* Process the new config settings */
+	hfi_vnic_process_vema_config(adapter);
+
+	hfi_vnic_release_mac_tbl(adapter);
+
+	vema_get_veswport_info(port, recvd_mad, rsp_mad);
+}
+
+/**
+ * vema_get_mac_list -- Get the unicast/multicast macs.
+ * @port:      source port on which MAD was received
+ * @recvd_mad: Received mad contains fields to set vnic parameters
+ * @rsp_mad:   Response mad to be built
+ * @attr_id:   Attribute ID indicating multicast or unicast mac list
+ */
+static void vema_get_mac_list(struct hfi_vnic_vema_port *port,
+			      struct hfi_vnic_vema_mad *recvd_mad,
+			      struct hfi_vnic_vema_mad *rsp_mad,
+			      u16 attr_id)
+{
+	struct hfi_veswport_iface_macs *macs_in, *macs_out;
+	int max_entries = (HFI_VNIC_EMA_DATA - sizeof(*macs_out)) / ETH_ALEN;
+	struct hfi_vnic_adapter *adapter;
+
+	if (vema_parms_from_recv_mad(recvd_mad, port, &adapter)) {
+		rsp_mad->mad_hdr.status = HFI_VNIC_INVAL_ATTR;
+		return;
+	}
+
+	macs_in = (struct hfi_veswport_iface_macs *)recvd_mad->data;
+	macs_out = (struct hfi_veswport_iface_macs *)rsp_mad->data;
+
+	macs_out->start_idx = macs_in->start_idx;
+	if (macs_in->num_macs_in_msg)
+		macs_out->num_macs_in_msg = macs_in->num_macs_in_msg;
+	else
+		macs_out->num_macs_in_msg = cpu_to_be16(max_entries);
+
+	if (attr_id == HFI_EM_ATTR_IFACE_MCAST_MACS)
+		hfi_vnic_query_mcast_macs(adapter, macs_out);
+	else
+		hfi_vnic_query_ucast_macs(adapter, macs_out);
+}
+
+/**
+ * vema_get_summary_counters -- Gets summary counters.
+ * @port:      source port on which MAD was received
+ * @recvd_mad: Received mad contains fields to set vnic parameters
+ * @rsp_mad:   Response mad to be built
+ */
+static void vema_get_summary_counters(struct hfi_vnic_vema_port *port,
+				      struct hfi_vnic_vema_mad *recvd_mad,
+				      struct hfi_vnic_vema_mad *rsp_mad)
+{
+	struct hfi_veswport_summary_counters *cntrs;
+	struct hfi_vnic_adapter *adapter;
+
+	if (vema_parms_from_recv_mad(recvd_mad, port, &adapter)) {
+		rsp_mad->mad_hdr.status = HFI_VNIC_INVAL_ATTR;
+		return;
+	}
+	cntrs = (struct hfi_veswport_summary_counters *)rsp_mad->data;
+	hfi_vnic_get_summary_counters(adapter, cntrs);
+}
+
+/**
+ * vema_get_error_counters -- Gets summary counters.
+ * @port:      source port on which MAD was received
+ * @recvd_mad: Received mad contains fields to set vnic parameters
+ * @rsp_mad:   Response mad to be built
+ */
+static void vema_get_error_counters(struct hfi_vnic_vema_port *port,
+				    struct hfi_vnic_vema_mad *recvd_mad,
+				    struct hfi_vnic_vema_mad *rsp_mad)
+{
+	struct hfi_veswport_error_counters *cntrs;
+	struct hfi_vnic_adapter *adapter;
+
+	if (vema_parms_from_recv_mad(recvd_mad, port, &adapter)) {
+		rsp_mad->mad_hdr.status = HFI_VNIC_INVAL_ATTR;
+		return;
+	}
+	cntrs = (struct hfi_veswport_error_counters *)rsp_mad->data;
+	hfi_vnic_get_error_counters(adapter, cntrs);
+}
+
+/**
+ * vema_get -- Process received get MAD
+ * @port:      source port on which MAD was received
+ * @recvd_mad: Received mad
+ * @rsp_mad:   Response mad to be built
+ */
+static void vema_get(struct hfi_vnic_vema_port *port,
+		     struct hfi_vnic_vema_mad *recvd_mad,
+		     struct hfi_vnic_vema_mad *rsp_mad)
+{
+	u16 attr_id = be16_to_cpu(recvd_mad->mad_hdr.attr_id);
+
+	switch (attr_id) {
+	case HFI_EM_ATTR_CLASS_PORT_INFO:
+		vema_get_class_port_info(port, recvd_mad, rsp_mad);
+		break;
+	case HFI_EM_ATTR_VESWPORT_INFO:
+		vema_get_veswport_info(port, recvd_mad, rsp_mad);
+		break;
+	case HFI_EM_ATTR_VESWPORT_MAC_ENTRIES:
+		vema_get_mac_entries(port, recvd_mad, rsp_mad);
+		break;
+	case HFI_EM_ATTR_IFACE_UCAST_MACS:
+		/* fall through */
+	case HFI_EM_ATTR_IFACE_MCAST_MACS:
+		vema_get_mac_list(port, recvd_mad, rsp_mad, attr_id);
+		break;
+	case HFI_EM_ATTR_VESWPORT_SUMMARY_COUNTERS:
+		vema_get_summary_counters(port, recvd_mad, rsp_mad);
+		break;
+	case HFI_EM_ATTR_VESWPORT_ERROR_COUNTERS:
+		vema_get_error_counters(port, recvd_mad, rsp_mad);
+		break;
+	default:
+		rsp_mad->mad_hdr.status = HFI_VNIC_UNSUP_ATTR;
+		break;
+	}
+}
+
+/**
+ * vema_set -- Process received set MAD
+ * @port:      source port on which MAD was received
+ * @recvd_mad: Received mad contains fields to set vnic parameters
+ * @rsp_mad:   Response mad to be built
+ */
+static void vema_set(struct hfi_vnic_vema_port *port,
+		     struct hfi_vnic_vema_mad *recvd_mad,
+		     struct hfi_vnic_vema_mad *rsp_mad)
+{
+	u16 attr_id = be16_to_cpu(recvd_mad->mad_hdr.attr_id);
+
+	switch (attr_id) {
+	case HFI_EM_ATTR_CLASS_PORT_INFO:
+		vema_set_class_port_info(port, recvd_mad, rsp_mad);
+		break;
+	case HFI_EM_ATTR_VESWPORT_INFO:
+		vema_set_veswport_info(port, recvd_mad, rsp_mad);
+		break;
+	case HFI_EM_ATTR_VESWPORT_MAC_ENTRIES:
+		vema_set_mac_entries(port, recvd_mad, rsp_mad);
+		break;
+	case HFI_EM_ATTR_DELETE_VESW:
+		vema_set_delete_vesw(port, recvd_mad, rsp_mad);
+		break;
+	default:
+		rsp_mad->mad_hdr.status = HFI_VNIC_UNSUP_ATTR;
+		break;
+	}
+}
+
+/**
+ * vema_send -- Send handler for VEMA MAD agent
+ * @mad_agent: pointer to the mad agent
+ * @mad_wc:    pointer to mad send work completion information
+ *
+ * Free all the data structures associated with the sent MAD
+ */
+static void vema_send(struct ib_mad_agent *mad_agent,
+		      struct ib_mad_send_wc *mad_wc)
+{
+	ib_destroy_ah(mad_wc->send_buf->ah);
+	ib_free_send_mad(mad_wc->send_buf);
+}
+
+/**
+ * vema_recv -- Recv handler for VEMA MAD agent
+ * @mad_agent: pointer to the mad agent
+ * @send_buf: Send buffer if found, else NULL
+ * @mad_wc:    pointer to mad send work completion information
+ *
+ * Handle only set and get methods and respond to other methods
+ * as unsupported. Allocate response buffer and address handle
+ * for the response MAD.
+ */
+static void vema_recv(struct ib_mad_agent *mad_agent,
+		      struct ib_mad_send_buf *send_buf,
+		      struct ib_mad_recv_wc *mad_wc)
+{
+	struct hfi_vnic_vema_port *port;
+	struct ib_ah              *ah;
+	struct ib_mad_send_buf    *rsp;
+	struct hfi_vnic_vema_mad  *vema_mad;
+
+	if (!mad_wc || !mad_wc->recv_buf.mad)
+		return;
+
+	port = mad_agent->context;
+	ah = ib_create_ah_from_wc(mad_agent->qp->pd, mad_wc->wc,
+				  mad_wc->recv_buf.grh, mad_agent->port_num);
+	if (IS_ERR(ah))
+		goto free_recv_mad;
+
+	rsp = ib_create_send_mad(mad_agent, mad_wc->wc->src_qp,
+				 mad_wc->wc->pkey_index, 0,
+				 IB_MGMT_VENDOR_HDR, HFI_VNIC_EMA_DATA,
+				 GFP_KERNEL, OPA_MGMT_BASE_VERSION);
+	if (IS_ERR(rsp))
+		goto err_rsp;
+
+	rsp->ah = ah;
+	vema_mad = rsp->mad;
+	memcpy(vema_mad, mad_wc->recv_buf.mad, IB_MGMT_VENDOR_HDR);
+	vema_mad->mad_hdr.method = IB_MGMT_METHOD_GET_RESP;
+	vema_mad->mad_hdr.status = 0;
+
+	/* Lock ensures network adapter is not removed */
+	mutex_lock(&port->lock);
+
+	switch (mad_wc->recv_buf.mad->mad_hdr.method) {
+	case IB_MGMT_METHOD_GET:
+		vema_get(port, (struct hfi_vnic_vema_mad *)mad_wc->recv_buf.mad,
+			 vema_mad);
+		break;
+	case IB_MGMT_METHOD_SET:
+		vema_set(port, (struct hfi_vnic_vema_mad *)mad_wc->recv_buf.mad,
+			 vema_mad);
+		break;
+	default:
+		vema_mad->mad_hdr.status = HFI_VNIC_UNSUP_ATTR;
+		break;
+	}
+	mutex_unlock(&port->lock);
+
+	if (!ib_post_send_mad(rsp, NULL)) {
+		/*
+		 * with post send successful ah and send mad
+		 * will be destroyed in send handler
+		 */
+		goto free_recv_mad;
+	}
+
+	ib_free_send_mad(rsp);
+
+err_rsp:
+	ib_destroy_ah(ah);
+free_recv_mad:
+	ib_free_recv_mad(mad_wc);
+}
+
+/**
+ * vema_get_port -- Gets the hfi_vnic_vema_port
+ * @cdev: pointer to control dev
+ * @port_num: Port number
+ *
+ * This function loops through the ports and returns
+ * the hfi_vnic_vema port structure that is associated
+ * with the HFI port number
+ *
+ * Return: ptr to requested hfi_vnic_vema_port strucure
+ *         if success, NULL if not
+ */
+static struct hfi_vnic_vema_port *
+vema_get_port(struct hfi_vnic_ctrl_device *cdev, u16 port_num)
+{
+	struct hfi_vnic_vema_port *port_base;
+
+	if (port_num > cdev->num_ports)
+		return NULL;
+
+	port_base = (struct hfi_vnic_vema_port *)dev_get_drvdata(&cdev->dev);
+	return port_base + (port_num - 1);
+}
+
+/**
+ * vema_unregister -- Unregisters agent
+ * @cdev: pointer to control device
+ *
+ * This deletes the registration by VEMA for MADs
+ */
+static void vema_unregister(struct hfi_vnic_ctrl_device *cdev)
+{
+	struct hfi_vnic_vema_port *port, *port_base;
+	int i, j;
+
+	port_base = (struct hfi_vnic_vema_port *)dev_get_drvdata(&cdev->dev);
+	for (i = 0, port = port_base; i < cdev->num_ports; i++, port++) {
+		/* Lock ensures no MAD is being processed */
+		mutex_lock(&port->lock);
+		for (j = 0; j <  HFI_MAX_VPORTS_SUPPORTED; j++) {
+			if (test_bit(j, port->vnic_mask)) {
+				port->cdev->ctrl_ops->rem_vport(cdev,
+								port->port_num,
+								j);
+				clear_bit(j, port->vnic_mask);
+			}
+		}
+		mutex_unlock(&port->lock);
+		if (port->mad_agent)
+			ib_unregister_mad_agent(port->mad_agent);
+
+		mutex_destroy(&port->lock);
+	}
+
+	kfree(port_base);
+}
+
+/**
+ * vema_register -- Registers agent
+ * @cdev: pointer to control device
+ *
+ * This function registers the handlers for the VEMA MADs
+ *
+ * Return: returns 0 on success. non zero otherwise
+ */
+static int vema_register(struct hfi_vnic_ctrl_device *cdev)
+{
+	struct hfi_vnic_vema_port *port, *port_base;
+
+	struct ib_mad_reg_req reg_req = {
+		.mgmt_class = HFI_MGMT_CLASS_INTEL_EMA,
+		.mgmt_class_version = OPA_MGMT_BASE_VERSION,
+		.oui = { INTEL_OUI_1, INTEL_OUI_2, INTEL_OUI_3 }
+	};
+	int i;
+
+	port_base = kcalloc(cdev->num_ports, sizeof(*port), GFP_KERNEL);
+	if (!port_base)
+		return -ENOMEM;
+
+	dev_set_drvdata(&cdev->dev, port_base);
+
+	set_bit(IB_MGMT_METHOD_GET, reg_req.method_mask);
+	set_bit(IB_MGMT_METHOD_SET, reg_req.method_mask);
+
+	/* register mad agent for each port on dev */
+	for (i = 0, port = port_base; i < cdev->num_ports; i++, port++) {
+		port->cdev = cdev;
+		port->port_num = i + 1;
+		mutex_init(&port->lock);
+		port->mad_agent = ib_register_mad_agent(cdev->ibdev, i + 1,
+							IB_QPT_GSI,
+							&reg_req,
+							IB_MGMT_RMPP_VERSION,
+							vema_send,
+							vema_recv,
+							port,
+							0);
+		if (IS_ERR(port->mad_agent)) {
+			int ret = PTR_ERR(port->mad_agent);
+
+			port->mad_agent = NULL;
+			vema_unregister(cdev);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * hfi_vnic_vema_send_trap -- This function sends a trap to the EM
+ * @cdev: pointer to vnic control device
+ * @data: pointer to trap data filled by calling function
+ * @lid:  issuers lid (encap_slid from vesw_port_info)
+ *
+ * This function is called from the VNIC driver to send a trap if there
+ * is somethng the EM should be notified about. These events currently
+ * are
+ * 1) UNICAST INTERFACE MACADDRESS changes
+ * 2) MULTICAST INTERFACE MACADDRESS changes
+ * 3) ETHERNET LINK STATUS changes
+ * While allocating the send mad the remote site qpn used is 1
+ * as this is the well known QP.
+ *
+ */
+void hfi_vnic_vema_send_trap(struct hfi_vnic_adapter *adapter,
+			     struct __hfi_veswport_trap *data, u32 lid)
+{
+	struct hfi_vnic_ctrl_device *cdev = adapter->vdev->cdev;
+	struct ib_mad_send_buf *send_buf;
+	struct hfi_vnic_vema_port *port;
+	struct ib_device *ibp;
+	struct hfi_vnic_vema_mad_trap *trap_mad;
+	struct hfi_class_port_info *class;
+	struct ib_ah_attr ah_attr;
+	struct ib_ah *ah;
+	struct hfi_veswport_trap *trap;
+	u32 trap_lid;
+	u16 pkey_idx;
+
+	if (!cdev)
+		goto err_exit;
+	ibp = cdev->ibdev;
+	port = vema_get_port(cdev, data->hfiportnum);
+	if (!port || !port->mad_agent)
+		goto err_exit;
+
+	if (time_before(jiffies, adapter->trap_timeout)) {
+		if (adapter->trap_count == HFI_VNIC_TRAP_BURST_LIMIT) {
+			v_warn("Trap rate exceeded\n");
+			goto err_exit;
+		} else {
+			adapter->trap_count++;
+		}
+	} else {
+		adapter->trap_count = 0;
+	}
+
+	class = &port->class_port_info;
+	/* Set up address handle */
+	memset(&ah_attr, 0, sizeof(ah_attr));
+	ah_attr.sl = GET_TRAP_SL_FROM_CLASS_PORT_INFO(class->trap_sl_rsvd);
+	ah_attr.port_num = port->port_num;
+	trap_lid = be32_to_cpu(class->trap_lid);
+	/*
+	 * check for trap lid validity, must not be zero
+	 * The trap sink could change after we fashion the MAD but since traps
+	 * are not guaranteed we won't use a lock as anyway the change will take
+	 * place even with locking.
+	 */
+	if (!trap_lid) {
+		dev_err(&cdev->dev, "%s: Invalid dlid\n", __func__);
+		goto err_exit;
+	}
+
+	ah_attr.dlid = trap_lid;
+	ah = ib_create_ah(port->mad_agent->qp->pd, &ah_attr);
+	if (IS_ERR(ah)) {
+		dev_err(&cdev->dev,
+			"%s:Couldn't create new AH = %p\n", __func__, ah);
+		dev_err(&cdev->dev,
+			"%s:dlid = %d, sl = %d, port = %d\n", __func__,
+			ah_attr.dlid, ah_attr.sl, ah_attr.port_num);
+		goto err_exit;
+	}
+
+	if (ib_find_pkey(ibp, data->hfiportnum, IB_DEFAULT_PKEY_FULL,
+			 &pkey_idx) < 0) {
+		dev_err(&cdev->dev,
+			"%s:full key not found, defaulting to partial\n",
+			__func__);
+		if (ib_find_pkey(ibp, data->hfiportnum, IB_DEFAULT_PKEY_PARTIAL,
+				 &pkey_idx) < 0)
+			pkey_idx = 1;
+	}
+
+	send_buf = ib_create_send_mad(port->mad_agent, 1, pkey_idx, 0,
+				      IB_MGMT_VENDOR_HDR, IB_MGMT_MAD_DATA,
+				      GFP_KERNEL, OPA_MGMT_BASE_VERSION);
+	if (IS_ERR(send_buf)) {
+		dev_err(&cdev->dev, "%s:Couldn't allocate send buf\n",
+			__func__);
+		goto err_sndbuf;
+	}
+
+	send_buf->ah = ah;
+
+	/* Set up common MAD hdr */
+	trap_mad = send_buf->mad;
+	trap_mad->mad_hdr.base_version = OPA_MGMT_BASE_VERSION;
+	trap_mad->mad_hdr.mgmt_class = HFI_MGMT_CLASS_INTEL_EMA;
+	trap_mad->mad_hdr.class_version = HFI_EMA_CLASS_VERSION;
+	trap_mad->mad_hdr.method = IB_MGMT_METHOD_TRAP;
+	port->tid++;
+	trap_mad->mad_hdr.tid = cpu_to_be64(port->tid);
+	trap_mad->mad_hdr.attr_id = IB_SMP_ATTR_NOTICE;
+
+	/* Set up vendor OUI */
+	trap_mad->oui[0] = INTEL_OUI_1;
+	trap_mad->oui[1] = INTEL_OUI_2;
+	trap_mad->oui[2] = INTEL_OUI_3;
+
+	/* Setup notice attribute portion */
+	trap_mad->notice.gen_type = HFI_INTEL_EMA_NOTICE_TYPE_INFO << 1;
+	trap_mad->notice.oui_1 = INTEL_OUI_1;
+	trap_mad->notice.oui_2 = INTEL_OUI_2;
+	trap_mad->notice.oui_3 = INTEL_OUI_3;
+	trap_mad->notice.issuer_lid = cpu_to_be32(lid);
+
+	/* copy the actual trap data */
+	trap = (struct hfi_veswport_trap *)trap_mad->notice.raw_data;
+	trap->fabric_id = cpu_to_be16(data->fabric_id);
+	trap->veswid = cpu_to_be16(data->veswid);
+	trap->veswportnum = cpu_to_be32(data->veswportnum);
+	trap->hfiportnum = cpu_to_be16(data->hfiportnum);
+	trap->veswportindex = data->veswportindex;
+	trap->opcode = data->opcode;
+
+	/* If successful send set up rate limit timeout else bail */
+	if (ib_post_send_mad(send_buf, NULL)) {
+		ib_free_send_mad(send_buf);
+	} else {
+		if (adapter->trap_count)
+			return;
+		adapter->trap_timeout = jiffies +
+					usecs_to_jiffies(HFI_VNIC_TRAP_TIMEOUT);
+		return;
+	}
+
+err_sndbuf:
+	ib_destroy_ah(ah);
+err_exit:
+	v_err("%s: Aborting trap\n", __func__);
+}
+
+/* hfi_vnic_ctrl_drv_probe - control device initialization routine */
+static int hfi_vnic_ctrl_drv_probe(struct device *dev)
+{
+	struct hfi_vnic_ctrl_device *cdev = container_of(dev,
+					 struct hfi_vnic_ctrl_device, dev);
+	int rc;
+
+	/* Initialize hfi vnic management agent (vema) */
+	rc = vema_register(cdev);
+	if (!rc)
+		dev_info(dev, "initialized\n");
+
+	return rc;
+}
+
+/* hfi_vnic_ctrl_drv_remove - control device removal routine */
+static int hfi_vnic_ctrl_drv_remove(struct device *dev)
+{
+	struct hfi_vnic_ctrl_device *cdev = container_of(dev,
+					 struct hfi_vnic_ctrl_device, dev);
+
+	vema_unregister(cdev);
+
+	dev_info(dev, "removed\n");
+	return 0;
+}
+
+/* HFI Virtual Network Control Driver */
+static struct hfi_vnic_ctrl_driver hfi_vnic_ctrl_drv = {
+	.drvwrap = {
+		.type = HFI_VNIC_CTRL_DRV,
+		.driver = {
+			.name   = hfi_vnic_ctrl_driver_name,
+			.probe  = hfi_vnic_ctrl_drv_probe,
+			.remove = hfi_vnic_ctrl_drv_remove
+		}
+	}
+};
+
+/* hfi_vnic_vema_init - initialize vema */
+int __init hfi_vnic_vema_init(void)
+{
+	int rc;
+
+	rc = hfi_vnic_ctrl_driver_register(&hfi_vnic_ctrl_drv);
+	if (rc)
+		pr_err("VNIC ctrl driver register failed %d\n", rc);
+
+	return rc;
+}
+
+/* hfi_vnic_vema_deinit - deinitialize vema */
+void hfi_vnic_vema_deinit(void)
+{
+	hfi_vnic_ctrl_driver_unregister(&hfi_vnic_ctrl_drv);
+}
diff --git a/drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_vema_iface.c b/drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_vema_iface.c
index 4a87826..98ddaaf 100644
--- a/drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_vema_iface.c
+++ b/drivers/infiniband/sw/intel/vnic/hfi_vnic/hfi_vnic_vema_iface.c
@@ -72,7 +72,7 @@ void hfi_vnic_vema_report_event(struct hfi_vnic_adapter *adapter, u8 event)
 	trap_data.veswportindex = vdev->vport_num;
 	trap_data.opcode = event;
 
-	/* Need to send trap here */
+	hfi_vnic_vema_send_trap(adapter, &trap_data, info->vport.encap_slid);
 }
 
 /**
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox