Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH V2 02/14] net:  Switch to using the new packet offload infrustructure
From: Vlad Yasevich @ 2012-11-15 18:49 UTC (permalink / raw)
  To: netdev; +Cc: davem, eric.dumazet
In-Reply-To: <1353005363-6974-1-git-send-email-vyasevic@redhat.com>

Convert to using the new GSO/GRO registration mechanism and new
packet offload structure.

Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
---
 include/linux/netdevice.h |    6 ------
 net/core/dev.c            |   19 +++++++++----------
 net/ipv4/af_inet.c        |    5 +++++
 net/ipv6/af_inet6.c       |    6 ++++++
 4 files changed, 20 insertions(+), 16 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index a896a47..12c217d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1499,12 +1499,6 @@ struct packet_type {
 					 struct net_device *,
 					 struct packet_type *,
 					 struct net_device *);
-	struct sk_buff		*(*gso_segment)(struct sk_buff *skb,
-						netdev_features_t features);
-	int			(*gso_send_check)(struct sk_buff *skb);
-	struct sk_buff		**(*gro_receive)(struct sk_buff **head,
-					       struct sk_buff *skb);
-	int			(*gro_complete)(struct sk_buff *skb);
 	bool			(*id_match)(struct packet_type *ptype,
 					    struct sock *sk);
 	void			*af_packet_priv;
diff --git a/net/core/dev.c b/net/core/dev.c
index 10967e5..13f9b85 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2072,7 +2072,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb,
 	netdev_features_t features)
 {
 	struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
-	struct packet_type *ptype;
+	struct packet_offload *ptype;
 	__be16 type = skb->protocol;
 	int vlan_depth = ETH_HLEN;
 	int err;
@@ -2101,9 +2101,8 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb,
 	}
 
 	rcu_read_lock();
-	list_for_each_entry_rcu(ptype,
-			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
-		if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
+	list_for_each_entry_rcu(ptype, &offload_base, list) {
+		if (ptype->type == type && ptype->gso_segment) {
 			if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
 				err = ptype->gso_send_check(skb);
 				segs = ERR_PTR(err);
@@ -3522,9 +3521,9 @@ static void flush_backlog(void *arg)
 
 static int napi_gro_complete(struct sk_buff *skb)
 {
-	struct packet_type *ptype;
+	struct packet_offload *ptype;
 	__be16 type = skb->protocol;
-	struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
+	struct list_head *head = &offload_base;
 	int err = -ENOENT;
 
 	if (NAPI_GRO_CB(skb)->count == 1) {
@@ -3534,7 +3533,7 @@ static int napi_gro_complete(struct sk_buff *skb)
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(ptype, head, list) {
-		if (ptype->type != type || ptype->dev || !ptype->gro_complete)
+		if (ptype->type != type || !ptype->gro_complete)
 			continue;
 
 		err = ptype->gro_complete(skb);
@@ -3584,9 +3583,9 @@ EXPORT_SYMBOL(napi_gro_flush);
 enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 {
 	struct sk_buff **pp = NULL;
-	struct packet_type *ptype;
+	struct packet_offload *ptype;
 	__be16 type = skb->protocol;
-	struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
+	struct list_head *head = &offload_base;
 	int same_flow;
 	int mac_len;
 	enum gro_result ret;
@@ -3599,7 +3598,7 @@ enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(ptype, head, list) {
-		if (ptype->type != type || ptype->dev || !ptype->gro_receive)
+		if (ptype->type != type || !ptype->gro_receive)
 			continue;
 
 		skb_set_network_header(skb, skb_gro_offset(skb));
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 766c596..4c99c5f 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1662,6 +1662,10 @@ static int ipv4_proc_init(void);
 static struct packet_type ip_packet_type __read_mostly = {
 	.type = cpu_to_be16(ETH_P_IP),
 	.func = ip_rcv,
+};
+
+static struct packet_offload ip_packet_offload __read_mostly = {
+	.type = cpu_to_be16(ETH_P_IP),
 	.gso_send_check = inet_gso_send_check,
 	.gso_segment = inet_gso_segment,
 	.gro_receive = inet_gro_receive,
@@ -1781,6 +1785,7 @@ static int __init inet_init(void)
 
 	ipfrag_init();
 
+	dev_add_offload(&ip_packet_offload);
 	dev_add_pack(&ip_packet_type);
 
 	rc = 0;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index a974247..6e24517 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -938,6 +938,10 @@ out_unlock:
 static struct packet_type ipv6_packet_type __read_mostly = {
 	.type = cpu_to_be16(ETH_P_IPV6),
 	.func = ipv6_rcv,
+};
+
+static struct packet_offload ipv6_packet_offload __read_mostly = {
+	.type = cpu_to_be16(ETH_P_IPV6),
 	.gso_send_check = ipv6_gso_send_check,
 	.gso_segment = ipv6_gso_segment,
 	.gro_receive = ipv6_gro_receive,
@@ -946,6 +950,7 @@ static struct packet_type ipv6_packet_type __read_mostly = {
 
 static int __init ipv6_packet_init(void)
 {
+	dev_add_offload(&ipv6_packet_offload);
 	dev_add_pack(&ipv6_packet_type);
 	return 0;
 }
@@ -953,6 +958,7 @@ static int __init ipv6_packet_init(void)
 static void ipv6_packet_cleanup(void)
 {
 	dev_remove_pack(&ipv6_packet_type);
+	dev_remove_offload(&ipv6_packet_offload);
 }
 
 static int __net_init ipv6_init_mibs(struct net *net)
-- 
1.7.7.6

^ permalink raw reply related

* [PATCH V2 01/14] net:  Add generic packet offload infrastructure.
From: Vlad Yasevich @ 2012-11-15 18:49 UTC (permalink / raw)
  To: netdev; +Cc: davem, eric.dumazet
In-Reply-To: <1353005363-6974-1-git-send-email-vyasevic@redhat.com>

Create a new data structure to contain the GRO/GSO callbacks and add
a new registration mechanism.

Singed-off-by: Vlad Yasevich <vyasevic@redhat.com>
---
 include/linux/netdevice.h |   14 ++++++++
 net/core/dev.c            |   80 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 94 insertions(+), 0 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index f8eda02..a896a47 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1511,6 +1511,17 @@ struct packet_type {
 	struct list_head	list;
 };
 
+struct packet_offload {
+	__be16			type;	/* This is really htons(ether_type). */
+	struct sk_buff		*(*gso_segment)(struct sk_buff *skb,
+						netdev_features_t features);
+	int			(*gso_send_check)(struct sk_buff *skb);
+	struct sk_buff		**(*gro_receive)(struct sk_buff **head,
+					       struct sk_buff *skb);
+	int			(*gro_complete)(struct sk_buff *skb);
+	struct list_head	list;
+};
+
 #include <linux/notifier.h>
 
 /* netdevice notifier chain. Please remember to update the rtnetlink
@@ -1605,6 +1616,9 @@ extern struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short
 extern void		dev_add_pack(struct packet_type *pt);
 extern void		dev_remove_pack(struct packet_type *pt);
 extern void		__dev_remove_pack(struct packet_type *pt);
+extern void		dev_add_offload(struct packet_offload *po);
+extern void		dev_remove_offload(struct packet_offload *po);
+extern void		__dev_remove_offload(struct packet_offload *po);
 
 extern struct net_device	*dev_get_by_flags_rcu(struct net *net, unsigned short flags,
 						      unsigned short mask);
diff --git a/net/core/dev.c b/net/core/dev.c
index 09cb3f6..10967e5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -176,8 +176,10 @@
 #define PTYPE_HASH_MASK	(PTYPE_HASH_SIZE - 1)
 
 static DEFINE_SPINLOCK(ptype_lock);
+static DEFINE_SPINLOCK(offload_lock);
 static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
 static struct list_head ptype_all __read_mostly;	/* Taps */
+static struct list_head offload_base __read_mostly;
 
 /*
  * The @dev_base_head list is protected by @dev_base_lock and the rtnl
@@ -470,6 +472,82 @@ void dev_remove_pack(struct packet_type *pt)
 }
 EXPORT_SYMBOL(dev_remove_pack);
 
+
+/**
+ *	dev_add_offload - register offload handlers
+ *	@po: protocol offload declaration
+ *
+ *	Add protocol offload handlers to the networking stack. The passed
+ *	&proto_offload is linked into kernel lists and may not be freed until
+ *	it has been removed from the kernel lists.
+ *
+ *	This call does not sleep therefore it can not
+ *	guarantee all CPU's that are in middle of receiving packets
+ *	will see the new offload handlers (until the next received packet).
+ */
+void dev_add_offload(struct packet_offload *po)
+{
+	struct list_head *head = &offload_base;
+
+	spin_lock(&offload_lock);
+	list_add_rcu(&po->list, head);
+	spin_unlock(&offload_lock);
+}
+EXPORT_SYMBOL(dev_add_offload);
+
+/**
+ *	__dev_remove_offload	 - remove offload handler
+ *	@po: packet offload declaration
+ *
+ *	Remove a protocol offload handler that was previously added to the
+ *	kernel offload handlers by dev_add_offload(). The passed &offload_type
+ *	is removed from the kernel lists and can be freed or reused once this
+ *	function returns.
+ *
+ *      The packet type might still be in use by receivers
+ *	and must not be freed until after all the CPU's have gone
+ *	through a quiescent state.
+ */
+void __dev_remove_offload(struct packet_offload *po)
+{
+	struct list_head *head = &offload_base;
+	struct packet_offload *po1;
+
+	spin_lock(&ptype_lock);
+
+	list_for_each_entry(po1, head, list) {
+		if (po == po1) {
+			list_del_rcu(&po->list);
+			goto out;
+		}
+	}
+
+	pr_warn("dev_remove_offload: %p not found\n", po);
+out:
+	spin_unlock(&ptype_lock);
+}
+EXPORT_SYMBOL(__dev_remove_offload);
+
+/**
+ *	dev_remove_offload	 - remove packet offload handler
+ *	@po: packet offload declaration
+ *
+ *	Remove a packet offload handler that was previously added to the kernel
+ *	offload handlers by dev_add_offload(). The passed &offload_type is
+ *	removed from the kernel lists and can be freed or reused once this
+ *	function returns.
+ *
+ *	This call sleeps to guarantee that no CPU is looking at the packet
+ *	type after return.
+ */
+void dev_remove_offload(struct packet_offload *po)
+{
+	__dev_remove_offload(po);
+
+	synchronize_net();
+}
+EXPORT_SYMBOL(dev_remove_offload);
+
 /******************************************************************************
 
 		      Device Boot-time Settings Routines
@@ -6662,6 +6740,8 @@ static int __init net_dev_init(void)
 	for (i = 0; i < PTYPE_HASH_SIZE; i++)
 		INIT_LIST_HEAD(&ptype_base[i]);
 
+	INIT_LIST_HEAD(&offload_base);
+
 	if (register_pernet_subsys(&netdev_net_ops))
 		goto out;
 
-- 
1.7.7.6

^ permalink raw reply related

* [PATCH V2 00/14] Always build GSO/GRO functionality into the kernel
From: Vlad Yasevich @ 2012-11-15 18:49 UTC (permalink / raw)
  To: netdev; +Cc: davem, eric.dumazet

This patch series is a revision suggested by Eric to solve the problem where
a host without IPv6 support drops GSO frames from the guest.

The problem is that GSO/GRO support is per protocol, and when said protocol
is not loaded or is disabled, packets attempting to go through GSO/GRO code paths
are dropped.  This causes retransmissions and a two orders of magnitude drop in
performance.

Prior attempt to solve the problem simply enabled enough GSO/GRO functionality
for IPv6 protocol when IPv6 was diabled.  This did not solve the problem when
the protocol was not build in or was blacklisted.
To solve the problem, it was suggested that we separate GSO/GRO callback
registration from packet processing registrations.  That way
GSO/GRO callbacks can be built into the kernel and always be there.
This patch series attempts to do just that.
* Patches 1 and 2 split the GSO/GRO handlers from packet_type and convert
  to the new structure.
* Patches 3, 4 and 5 do the same thing for net_protocol structure.
* The rest of the patches try to incrementally move the IPv6 GSO/GRO
  code out of the module and into the static kernel build.  Some IPv6
  helper functions also had to move as well.
* The last patch just moves the offload callbacks into its own function.

Changes since V1:
  - Removed dev pointer from the packet_offload, since it's not used.
  - Added a erroneousely removed EXPORT_SYMBOL
  - Fix build issues with different states of IPv6 support.

Thanks
-vlad

Vlad Yasevich (14):
  net:  Add generic packet offload infrastructure.
  net:  Switch to using the new packet offload infrustructure
  net: Add net protocol offload registration infrustructure
  ipv6: Add new offload registration infrastructure.
  ipv4: Switch to using the new offload infrastructure.
  ipv6: Switch to using new offload infrastructure.
  ipv6: Separate ipv6 offload support
  ipv6: Separate tcp offload functionality
  ipv6: Separate out UDP offload functionality
  ipv6: Move exthdr offload support into separate file
  ipv6: Update ipv6 static library with newly needed functions
  ipv4: Pull GSO registration out of inet_init()
  ipv6: Pull IPv6 GSO registration out of the module
  net: Remove code duplication between offload structures

 include/linux/netdevice.h  |   20 +++-
 include/net/ip6_checksum.h |   35 ++++++
 include/net/protocol.h     |   31 +++---
 net/core/dev.c             |  107 +++++++++++++++--
 net/ipv4/af_inet.c         |   84 +++++++++----
 net/ipv4/protocol.c        |   21 ++++
 net/ipv6/Makefile          |    7 +-
 net/ipv6/af_inet6.c        |  240 -------------------------------------
 net/ipv6/exthdrs.c         |   52 +--------
 net/ipv6/exthdrs_core.c    |   44 +++++++
 net/ipv6/exthdrs_offload.c |   41 +++++++
 net/ipv6/ip6_offload.c     |  282 ++++++++++++++++++++++++++++++++++++++++++++
 net/ipv6/ip6_offload.h     |   18 +++
 net/ipv6/ip6_output.c      |   65 ----------
 net/ipv6/output_core.c     |   76 ++++++++++++
 net/ipv6/protocol.c        |   25 ++++
 net/ipv6/tcp_ipv6.c        |  107 +----------------
 net/ipv6/tcpv6_offload.c   |   95 +++++++++++++++
 net/ipv6/udp.c             |   94 ---------------
 net/ipv6/udp_offload.c     |  119 +++++++++++++++++++
 20 files changed, 949 insertions(+), 614 deletions(-)
 create mode 100644 net/ipv6/exthdrs_offload.c
 create mode 100644 net/ipv6/ip6_offload.c
 create mode 100644 net/ipv6/ip6_offload.h
 create mode 100644 net/ipv6/output_core.c
 create mode 100644 net/ipv6/tcpv6_offload.c
 create mode 100644 net/ipv6/udp_offload.c

-- 
1.7.7.6

^ permalink raw reply

* Re: [RFC] tcp: use order-3 pages in tcp_sendmsg()
From: Rick Jones @ 2012-11-15 18:33 UTC (permalink / raw)
  To: Yan, Zheng ; +Cc: Eric Dumazet, netdev
In-Reply-To: <CAAM7YAm+XJKTxJStLMoo4RRV85oogN5wCHfXorkEiUHKqNKHDQ@mail.gmail.com>

On 11/14/2012 11:52 PM, Yan, Zheng wrote:
> This commit makes one of our test case on core 2 machine drop in
> performance by about 60%. The test case runs 2048 instances of
> netperf 64k stream test at the same time.

I'm impressed that 2048 concurrent netperf TCP_STREAM tests ran to 
completion in the first place :)

> Analysis showed using order-3 pages causes more LLC misses, most new
> LLC misses happen when the senders copy data to the socket buffer.

> If revert to use single page, the sender side only trigger a few LLC
>  misses, most LLC misses happen on the receiver size. It means most
> pages allocated by the senders are cache hot. But when using order-3
> pages, 2048 * 32k = 64M, 64M is much larger than LLC size. Should
> this regression be worried? or our test case is too unpractical?

Even before the page change I would have expected the buffers that 
netperf itself uses would have exceeded the LLC.  If you were not using 
test-specific -s and -S options to set an explicit socket buffer size, I 
believe that under Linux (most of the time) the default SO_SNDBUF size 
will be 86KB.  Coupled with your statement that the send size was 64K it 
means the send ring being used by netperf will be 2, 64KB buffers, which 
would then be 256MB across 2048 concurrent netperfs.  Even if we go with 
"only the one send buffer in play at a time matters" that is still 128 
MB of space up in netperf itself even before one gets to the stack.

Still, sharing the analysis tool output might be helpful.

By the way the "default" size of the buffer netperf posts in recv() 
calls will depend on the initial value of SO_RCVBUF after the data 
socket is created and had any -s or -S option values applied to it.

I cannot say that the scripts distributed with netperf are consistently 
good about doing it themselves, but I would suggest for the "canonical" 
bulk streak test something like:

netperf -t TCP_STREAM -H <dest> -l 60 -- -s 1M -S 1M -m 64K -M 64K

as that will reduce the number of variables.  Those -s and -S values 
though will probably call for tweaking sysctl settings or they will be 
clipped by net.core.rmem_max and net.core.wmem_max.  At a minimum I 
would suggest having the -m and -M option.  I might also tack-on a "-o 
all" at the end, but that is a matter of preference - it will cause a 
great deal of output...

Eric Dumazet later says:
> Number of in flight bytes do not depend on the order of the pages, but
> sizes of TCP buffers (receiver, sender)

And unless you happened to use explicit -s and -S options, there is even 
more variability in how much may be inflight.  If you do not add those 
you can at least get netperf to report what the socket buffer sizes 
became by the end of the test:

netperf -t TCP_STREAM ... -- ... -o lss_size_end,rsr_size_end

for "local socket send size" and "remote socket receive size" respectively.

> If the sender is faster (because of this commit), but receiver is slow
> to drain the receive queues, then you can have a situation where the
> consumed memory on receiver is higher and the receiver might be actually
> slower.

Netperf can be told to report the number of receive calls and the bytes 
per receive - either by tacking-on a global "-v 2" or by requesting them 
explicitly via omni output selection.  Presumably, if the receiving 
netserver processes are not keeping-up as well, that should manifest as 
the bytes per receive being larger in the "after" case than the "before" 
case.

netperf ... -- ... -o 
remote_recv_size,remote_recv_calls,remote_bytes_per_recv

happy benchmarking,

rick jones

^ permalink raw reply

* Re: [Xen-devel] [PATCH 0/4] Implement persistent grant in xen-netfront/netback
From: Konrad Rzeszutek Wilk @ 2012-11-15 18:29 UTC (permalink / raw)
  To: Ian Campbell
  Cc: Roger Pau Monne, ANNIE LI, Pasi Kärkkäinen,
	netdev@vger.kernel.org, xen-devel@lists.xensource.com
In-Reply-To: <1352978106.3499.101.camel@zakaz.uk.xensource.com>

On Thu, Nov 15, 2012 at 11:15:06AM +0000, Ian Campbell wrote:
> On Thu, 2012-11-15 at 10:56 +0000, Roger Pau Monne wrote:
> > On 15/11/12 09:38, ANNIE LI wrote:
> > > 
> > > 
> > > On 2012-11-15 15:40, Pasi Kärkkäinen wrote:
> > >> Hello,
> > >>
> > >> On Thu, Nov 15, 2012 at 03:03:07PM +0800, Annie Li wrote:
> > >>> This patch implements persistent grants for xen-netfront/netback. This
> > >>> mechanism maintains page pools in netback/netfront, these page pools is used to
> > >>> save grant pages which are mapped. This way improve performance which is wasted
> > >>> when doing grant operations.
> > >>>
> > >>> Current netback/netfront does map/unmap grant operations frequently when
> > >>> transmitting/receiving packets, and grant operations costs much cpu clock. In
> > >>> this patch, netfront/netback maps grant pages when needed and then saves them
> > >>> into a page pool for future use. All these pages will be unmapped when
> > >>> removing/releasing the net device.
> > >>>
> > >> Do you have performance numbers available already? with/without persistent grants?
> > > I have some simple netperf/netserver test result with/without persistent 
> > > grants,
> > > 
> > > Following is result of with persistent grant patch,
> > > 
> > > Guests, Sum,      Avg,     Min,     Max
> > >   1,  15106.4,  15106.4, 15106.36, 15106.36
> > >   2,  13052.7,  6526.34,  6261.81,  6790.86
> > >   3,  12675.1,  6337.53,  6220.24,  6454.83
> > >   4,  13194,  6596.98,  6274.70,  6919.25
> > > 
> > > 
> > > Following are result of without persistent patch
> > > 
> > > Guests, Sum,     Avg,    Min,        Max
> > >   1,  10864.1,  10864.1, 10864.10, 10864.10
> > >   2,  10898.5,  5449.24,  4862.08,  6036.40
> > >   3,  10734.5,  5367.26,  5261.43,  5473.08
> > >   4,  10924,    5461.99,  5314.84,  5609.14
> > 
> > In the block case, performance improvement is seen when using a large
> > number of guests, could you perform the same benchmark increasing the
> > number of guests to 15?
> 
> It would also be nice to see some analysis of the numbers which justify
> why this change is a good one without every reviewer having to evaluate
> the raw data themselves. In fact this should really be part of the
> commit message.

You mean like a nice graph, eh?

I will run these patches on my 32GB box and see if I can give you
a nice PDF/jpg.

> 
> Ian.
> 

^ permalink raw reply

* EMAIL_barr.colinlee002@hotmail.com
From: Barr. Colin Lee. @ 2012-11-15 18:10 UTC (permalink / raw)





I once again try to notify you as my earlier letter was returned
undelivered. You were bequeathed by my late Client Mr. James Campbell. Do
contact my office.

Barr. Colin Lee.


 EMAIL_barr.colinlee002@hotmail.com

^ permalink raw reply

* [v6 PATCH 7/8] csiostor: Chelsio FCoE offload driver submission (sources part 4).
From: Naresh Kumar Inna @ 2012-11-15 17:11 UTC (permalink / raw)
  To: JBottomley, linux-scsi, dm, leedom; +Cc: netdev, naresh, chethan
In-Reply-To: <1352999484-17812-1-git-send-email-naresh@chelsio.com>

This patch contains code to implement the interrupt handling and the fast
path I/O functionality. The interrupt handling includes allocation of
MSIX vectors, registering and implemeting the interrupt service routines.
The fast path I/O functionality includes posting the I/O request to firmware
via Work Requests, tracking/completing them, and handling task management
requests. SCSI midlayer host template implementation is also covered by
this patch.

Signed-off-by: Naresh Kumar Inna <naresh@chelsio.com>
---
V2:
- Inlined code instead of macro in csio_isr.c
- Use true/false instead if CSIO_TRUE/CSIO_FALSE.
- Use DMA_ defines instead of CSIO_IOREQF_DMA_ defines.
- Replaced large local stack buffer with pre-allocated memory.
- Replaced fast path macros with static functions.
- Removed extra empty lines, needless comment.

V4: Removed needless header file inclusion.

V5:
- Changed over to lockless queuecommand.
- Removed needless header inclusion.
- Re-wrote error paths to reduce lock/unlock ambiguity.

 drivers/scsi/csiostor/csio_isr.c  |  624 +++++++++
 drivers/scsi/csiostor/csio_scsi.c | 2555 +++++++++++++++++++++++++++++++++++++
 2 files changed, 3179 insertions(+), 0 deletions(-)
 create mode 100644 drivers/scsi/csiostor/csio_isr.c
 create mode 100644 drivers/scsi/csiostor/csio_scsi.c

diff --git a/drivers/scsi/csiostor/csio_isr.c b/drivers/scsi/csiostor/csio_isr.c
new file mode 100644
index 0000000..7ee9777
--- /dev/null
+++ b/drivers/scsi/csiostor/csio_isr.c
@@ -0,0 +1,624 @@
+/*
+ * This file is part of the Chelsio FCoE driver for Linux.
+ *
+ * Copyright (c) 2008-2012 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/cpumask.h>
+#include <linux/string.h>
+
+#include "csio_init.h"
+#include "csio_hw.h"
+
+static irqreturn_t
+csio_nondata_isr(int irq, void *dev_id)
+{
+	struct csio_hw *hw = (struct csio_hw *) dev_id;
+	int rv;
+	unsigned long flags;
+
+	if (unlikely(!hw))
+		return IRQ_NONE;
+
+	if (unlikely(pci_channel_offline(hw->pdev))) {
+		CSIO_INC_STATS(hw, n_pcich_offline);
+		return IRQ_NONE;
+	}
+
+	spin_lock_irqsave(&hw->lock, flags);
+	csio_hw_slow_intr_handler(hw);
+	rv = csio_mb_isr_handler(hw);
+
+	if (rv == 0 && !(hw->flags & CSIO_HWF_FWEVT_PENDING)) {
+		hw->flags |= CSIO_HWF_FWEVT_PENDING;
+		spin_unlock_irqrestore(&hw->lock, flags);
+		schedule_work(&hw->evtq_work);
+		return IRQ_HANDLED;
+	}
+	spin_unlock_irqrestore(&hw->lock, flags);
+	return IRQ_HANDLED;
+}
+
+/*
+ * csio_fwevt_handler - Common FW event handler routine.
+ * @hw: HW module.
+ *
+ * This is the ISR for FW events. It is shared b/w MSIX
+ * and INTx handlers.
+ */
+static void
+csio_fwevt_handler(struct csio_hw *hw)
+{
+	int rv;
+	unsigned long flags;
+
+	rv = csio_fwevtq_handler(hw);
+
+	spin_lock_irqsave(&hw->lock, flags);
+	if (rv == 0 && !(hw->flags & CSIO_HWF_FWEVT_PENDING)) {
+		hw->flags |= CSIO_HWF_FWEVT_PENDING;
+		spin_unlock_irqrestore(&hw->lock, flags);
+		schedule_work(&hw->evtq_work);
+		return;
+	}
+	spin_unlock_irqrestore(&hw->lock, flags);
+
+} /* csio_fwevt_handler */
+
+/*
+ * csio_fwevt_isr() - FW events MSIX ISR
+ * @irq:
+ * @dev_id:
+ *
+ * Process WRs on the FW event queue.
+ *
+ */
+static irqreturn_t
+csio_fwevt_isr(int irq, void *dev_id)
+{
+	struct csio_hw *hw = (struct csio_hw *) dev_id;
+
+	if (unlikely(!hw))
+		return IRQ_NONE;
+
+	if (unlikely(pci_channel_offline(hw->pdev))) {
+		CSIO_INC_STATS(hw, n_pcich_offline);
+		return IRQ_NONE;
+	}
+
+	csio_fwevt_handler(hw);
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * csio_fwevt_isr() - INTx wrapper for handling FW events.
+ * @irq:
+ * @dev_id:
+ */
+void
+csio_fwevt_intx_handler(struct csio_hw *hw, void *wr, uint32_t len,
+			   struct csio_fl_dma_buf *flb, void *priv)
+{
+	csio_fwevt_handler(hw);
+} /* csio_fwevt_intx_handler */
+
+/*
+ * csio_process_scsi_cmpl - Process a SCSI WR completion.
+ * @hw: HW module.
+ * @wr: The completed WR from the ingress queue.
+ * @len: Length of the WR.
+ * @flb: Freelist buffer array.
+ *
+ */
+static void
+csio_process_scsi_cmpl(struct csio_hw *hw, void *wr, uint32_t len,
+			struct csio_fl_dma_buf *flb, void *cbfn_q)
+{
+	struct csio_ioreq *ioreq;
+	uint8_t *scsiwr;
+	uint8_t subop;
+	void *cmnd;
+	unsigned long flags;
+
+	ioreq = csio_scsi_cmpl_handler(hw, wr, len, flb, NULL, &scsiwr);
+	if (likely(ioreq)) {
+		if (unlikely(*scsiwr == FW_SCSI_ABRT_CLS_WR)) {
+			subop = FW_SCSI_ABRT_CLS_WR_SUB_OPCODE_GET(
+					((struct fw_scsi_abrt_cls_wr *)
+					    scsiwr)->sub_opcode_to_chk_all_io);
+
+			csio_dbg(hw, "%s cmpl recvd ioreq:%p status:%d\n",
+				    subop ? "Close" : "Abort",
+				    ioreq, ioreq->wr_status);
+
+			spin_lock_irqsave(&hw->lock, flags);
+			if (subop)
+				csio_scsi_closed(ioreq,
+						 (struct list_head *)cbfn_q);
+			else
+				csio_scsi_aborted(ioreq,
+						  (struct list_head *)cbfn_q);
+			/*
+			 * We call scsi_done for I/Os that driver thinks aborts
+			 * have timed out. If there is a race caused by FW
+			 * completing abort at the exact same time that the
+			 * driver has deteced the abort timeout, the following
+			 * check prevents calling of scsi_done twice for the
+			 * same command: once from the eh_abort_handler, another
+			 * from csio_scsi_isr_handler(). This also avoids the
+			 * need to check if csio_scsi_cmnd(req) is NULL in the
+			 * fast path.
+			 */
+			cmnd = csio_scsi_cmnd(ioreq);
+			if (unlikely(cmnd == NULL))
+				list_del_init(&ioreq->sm.sm_list);
+
+			spin_unlock_irqrestore(&hw->lock, flags);
+
+			if (unlikely(cmnd == NULL))
+				csio_put_scsi_ioreq_lock(hw,
+						csio_hw_to_scsim(hw), ioreq);
+		} else {
+			spin_lock_irqsave(&hw->lock, flags);
+			csio_scsi_completed(ioreq, (struct list_head *)cbfn_q);
+			spin_unlock_irqrestore(&hw->lock, flags);
+		}
+	}
+}
+
+/*
+ * csio_scsi_isr_handler() - Common SCSI ISR handler.
+ * @iq: Ingress queue pointer.
+ *
+ * Processes SCSI completions on the SCSI IQ indicated by scm->iq_idx
+ * by calling csio_wr_process_iq_idx. If there are completions on the
+ * isr_cbfn_q, yank them out into a local queue and call their io_cbfns.
+ * Once done, add these completions onto the freelist.
+ * This routine is shared b/w MSIX and INTx.
+ */
+static inline irqreturn_t
+csio_scsi_isr_handler(struct csio_q *iq)
+{
+	struct csio_hw *hw = (struct csio_hw *)iq->owner;
+	LIST_HEAD(cbfn_q);
+	struct list_head *tmp;
+	struct csio_scsim *scm;
+	struct csio_ioreq *ioreq;
+	int isr_completions = 0;
+
+	scm = csio_hw_to_scsim(hw);
+
+	if (unlikely(csio_wr_process_iq(hw, iq, csio_process_scsi_cmpl,
+					&cbfn_q) != 0))
+		return IRQ_NONE;
+
+	/* Call back the completion routines */
+	list_for_each(tmp, &cbfn_q) {
+		ioreq = (struct csio_ioreq *)tmp;
+		isr_completions++;
+		ioreq->io_cbfn(hw, ioreq);
+		/* Release ddp buffer if used for this req */
+		if (unlikely(ioreq->dcopy))
+			csio_put_scsi_ddp_list_lock(hw, scm, &ioreq->gen_list,
+						    ioreq->nsge);
+	}
+
+	if (isr_completions) {
+		/* Return the ioreqs back to ioreq->freelist */
+		csio_put_scsi_ioreq_list_lock(hw, scm, &cbfn_q,
+					      isr_completions);
+	}
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * csio_scsi_isr() - SCSI MSIX handler
+ * @irq:
+ * @dev_id:
+ *
+ * This is the top level SCSI MSIX handler. Calls csio_scsi_isr_handler()
+ * for handling SCSI completions.
+ */
+static irqreturn_t
+csio_scsi_isr(int irq, void *dev_id)
+{
+	struct csio_q *iq = (struct csio_q *) dev_id;
+	struct csio_hw *hw;
+
+	if (unlikely(!iq))
+		return IRQ_NONE;
+
+	hw = (struct csio_hw *)iq->owner;
+
+	if (unlikely(pci_channel_offline(hw->pdev))) {
+		CSIO_INC_STATS(hw, n_pcich_offline);
+		return IRQ_NONE;
+	}
+
+	csio_scsi_isr_handler(iq);
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * csio_scsi_intx_handler() - SCSI INTx handler
+ * @irq:
+ * @dev_id:
+ *
+ * This is the top level SCSI INTx handler. Calls csio_scsi_isr_handler()
+ * for handling SCSI completions.
+ */
+void
+csio_scsi_intx_handler(struct csio_hw *hw, void *wr, uint32_t len,
+			struct csio_fl_dma_buf *flb, void *priv)
+{
+	struct csio_q *iq = priv;
+
+	csio_scsi_isr_handler(iq);
+
+} /* csio_scsi_intx_handler */
+
+/*
+ * csio_fcoe_isr() - INTx/MSI interrupt service routine for FCoE.
+ * @irq:
+ * @dev_id:
+ *
+ *
+ */
+static irqreturn_t
+csio_fcoe_isr(int irq, void *dev_id)
+{
+	struct csio_hw *hw = (struct csio_hw *) dev_id;
+	struct csio_q *intx_q = NULL;
+	int rv;
+	irqreturn_t ret = IRQ_NONE;
+	unsigned long flags;
+
+	if (unlikely(!hw))
+		return IRQ_NONE;
+
+	if (unlikely(pci_channel_offline(hw->pdev))) {
+		CSIO_INC_STATS(hw, n_pcich_offline);
+		return IRQ_NONE;
+	}
+
+	/* Disable the interrupt for this PCI function. */
+	if (hw->intr_mode == CSIO_IM_INTX)
+		csio_wr_reg32(hw, 0, MYPF_REG(PCIE_PF_CLI));
+
+	/*
+	 * The read in the following function will flush the
+	 * above write.
+	 */
+	if (csio_hw_slow_intr_handler(hw))
+		ret = IRQ_HANDLED;
+
+	/* Get the INTx Forward interrupt IQ. */
+	intx_q = csio_get_q(hw, hw->intr_iq_idx);
+
+	CSIO_DB_ASSERT(intx_q);
+
+	/* IQ handler is not possible for intx_q, hence pass in NULL */
+	if (likely(csio_wr_process_iq(hw, intx_q, NULL, NULL) == 0))
+		ret = IRQ_HANDLED;
+
+	spin_lock_irqsave(&hw->lock, flags);
+	rv = csio_mb_isr_handler(hw);
+	if (rv == 0 && !(hw->flags & CSIO_HWF_FWEVT_PENDING)) {
+		hw->flags |= CSIO_HWF_FWEVT_PENDING;
+		spin_unlock_irqrestore(&hw->lock, flags);
+		schedule_work(&hw->evtq_work);
+		return IRQ_HANDLED;
+	}
+	spin_unlock_irqrestore(&hw->lock, flags);
+
+	return ret;
+}
+
+static void
+csio_add_msix_desc(struct csio_hw *hw)
+{
+	int i;
+	struct csio_msix_entries *entryp = &hw->msix_entries[0];
+	int k = CSIO_EXTRA_VECS;
+	int len = sizeof(entryp->desc) - 1;
+	int cnt = hw->num_sqsets + k;
+
+	/* Non-data vector */
+	memset(entryp->desc, 0, len + 1);
+	snprintf(entryp->desc, len, "csio-%02x:%02x:%x-nondata",
+		 CSIO_PCI_BUS(hw), CSIO_PCI_DEV(hw), CSIO_PCI_FUNC(hw));
+
+	entryp++;
+	memset(entryp->desc, 0, len + 1);
+	snprintf(entryp->desc, len, "csio-%02x:%02x:%x-fwevt",
+		 CSIO_PCI_BUS(hw), CSIO_PCI_DEV(hw), CSIO_PCI_FUNC(hw));
+	entryp++;
+
+	/* Name SCSI vecs */
+	for (i = k; i < cnt; i++, entryp++) {
+		memset(entryp->desc, 0, len + 1);
+		snprintf(entryp->desc, len, "csio-%02x:%02x:%x-scsi%d",
+			 CSIO_PCI_BUS(hw), CSIO_PCI_DEV(hw),
+			 CSIO_PCI_FUNC(hw), i - CSIO_EXTRA_VECS);
+	}
+}
+
+int
+csio_request_irqs(struct csio_hw *hw)
+{
+	int rv, i, j, k = 0;
+	struct csio_msix_entries *entryp = &hw->msix_entries[0];
+	struct csio_scsi_cpu_info *info;
+
+	if (hw->intr_mode != CSIO_IM_MSIX) {
+		rv = request_irq(hw->pdev->irq, csio_fcoe_isr,
+					(hw->intr_mode == CSIO_IM_MSI) ?
+							0 : IRQF_SHARED,
+					KBUILD_MODNAME, hw);
+		if (rv) {
+			if (hw->intr_mode == CSIO_IM_MSI)
+				pci_disable_msi(hw->pdev);
+			csio_err(hw, "Failed to allocate interrupt line.\n");
+			return -EINVAL;
+		}
+
+		goto out;
+	}
+
+	/* Add the MSIX vector descriptions */
+	csio_add_msix_desc(hw);
+
+	rv = request_irq(entryp[k].vector, csio_nondata_isr, 0,
+			 entryp[k].desc, hw);
+	if (rv) {
+		csio_err(hw, "IRQ request failed for vec %d err:%d\n",
+			 entryp[k].vector, rv);
+		goto err;
+	}
+
+	entryp[k++].dev_id = (void *)hw;
+
+	rv = request_irq(entryp[k].vector, csio_fwevt_isr, 0,
+			 entryp[k].desc, hw);
+	if (rv) {
+		csio_err(hw, "IRQ request failed for vec %d err:%d\n",
+			 entryp[k].vector, rv);
+		goto err;
+	}
+
+	entryp[k++].dev_id = (void *)hw;
+
+	/* Allocate IRQs for SCSI */
+	for (i = 0; i < hw->num_pports; i++) {
+		info = &hw->scsi_cpu_info[i];
+		for (j = 0; j < info->max_cpus; j++, k++) {
+			struct csio_scsi_qset *sqset = &hw->sqset[i][j];
+			struct csio_q *q = hw->wrm.q_arr[sqset->iq_idx];
+
+			rv = request_irq(entryp[k].vector, csio_scsi_isr, 0,
+					 entryp[k].desc, q);
+			if (rv) {
+				csio_err(hw,
+				       "IRQ request failed for vec %d err:%d\n",
+				       entryp[k].vector, rv);
+				goto err;
+			}
+
+			entryp[k].dev_id = (void *)q;
+
+		} /* for all scsi cpus */
+	} /* for all ports */
+
+out:
+	hw->flags |= CSIO_HWF_HOST_INTR_ENABLED;
+
+	return 0;
+
+err:
+	for (i = 0; i < k; i++) {
+		entryp = &hw->msix_entries[i];
+		free_irq(entryp->vector, entryp->dev_id);
+	}
+	pci_disable_msix(hw->pdev);
+
+	return -EINVAL;
+}
+
+static void
+csio_disable_msix(struct csio_hw *hw, bool free)
+{
+	int i;
+	struct csio_msix_entries *entryp;
+	int cnt = hw->num_sqsets + CSIO_EXTRA_VECS;
+
+	if (free) {
+		for (i = 0; i < cnt; i++) {
+			entryp = &hw->msix_entries[i];
+			free_irq(entryp->vector, entryp->dev_id);
+		}
+	}
+	pci_disable_msix(hw->pdev);
+}
+
+/* Reduce per-port max possible CPUs */
+static void
+csio_reduce_sqsets(struct csio_hw *hw, int cnt)
+{
+	int i;
+	struct csio_scsi_cpu_info *info;
+
+	while (cnt < hw->num_sqsets) {
+		for (i = 0; i < hw->num_pports; i++) {
+			info = &hw->scsi_cpu_info[i];
+			if (info->max_cpus > 1) {
+				info->max_cpus--;
+				hw->num_sqsets--;
+				if (hw->num_sqsets <= cnt)
+					break;
+			}
+		}
+	}
+
+	csio_dbg(hw, "Reduced sqsets to %d\n", hw->num_sqsets);
+}
+
+static int
+csio_enable_msix(struct csio_hw *hw)
+{
+	int rv, i, j, k, n, min, cnt;
+	struct csio_msix_entries *entryp;
+	struct msix_entry *entries;
+	int extra = CSIO_EXTRA_VECS;
+	struct csio_scsi_cpu_info *info;
+
+	min = hw->num_pports + extra;
+	cnt = hw->num_sqsets + extra;
+
+	/* Max vectors required based on #niqs configured in fw */
+	if (hw->flags & CSIO_HWF_USING_SOFT_PARAMS || !csio_is_hw_master(hw))
+		cnt = min_t(uint8_t, hw->cfg_niq, cnt);
+
+	entries = kzalloc(sizeof(struct msix_entry) * cnt, GFP_KERNEL);
+	if (!entries)
+		return -ENOMEM;
+
+	for (i = 0; i < cnt; i++)
+		entries[i].entry = (uint16_t)i;
+
+	csio_dbg(hw, "FW supp #niq:%d, trying %d msix's\n", hw->cfg_niq, cnt);
+
+	while ((rv = pci_enable_msix(hw->pdev, entries, cnt)) >= min)
+		cnt = rv;
+	if (!rv) {
+		if (cnt < (hw->num_sqsets + extra)) {
+			csio_dbg(hw, "Reducing sqsets to %d\n", cnt - extra);
+			csio_reduce_sqsets(hw, cnt - extra);
+		}
+	} else {
+		if (rv > 0) {
+			pci_disable_msix(hw->pdev);
+			csio_info(hw, "Not using MSI-X, remainder:%d\n", rv);
+		}
+
+		kfree(entries);
+		return -ENOMEM;
+	}
+
+	/* Save off vectors */
+	for (i = 0; i < cnt; i++) {
+		entryp = &hw->msix_entries[i];
+		entryp->vector = entries[i].vector;
+	}
+
+	/* Distribute vectors */
+	k = 0;
+	csio_set_nondata_intr_idx(hw, entries[k].entry);
+	csio_set_mb_intr_idx(csio_hw_to_mbm(hw), entries[k++].entry);
+	csio_set_fwevt_intr_idx(hw, entries[k++].entry);
+
+	for (i = 0; i < hw->num_pports; i++) {
+		info = &hw->scsi_cpu_info[i];
+
+		for (j = 0; j < hw->num_scsi_msix_cpus; j++) {
+			n = (j % info->max_cpus) +  k;
+			hw->sqset[i][j].intr_idx = entries[n].entry;
+		}
+
+		k += info->max_cpus;
+	}
+
+	kfree(entries);
+	return 0;
+}
+
+void
+csio_intr_enable(struct csio_hw *hw)
+{
+	hw->intr_mode = CSIO_IM_NONE;
+	hw->flags &= ~CSIO_HWF_HOST_INTR_ENABLED;
+
+	/* Try MSIX, then MSI or fall back to INTx */
+	if ((csio_msi == 2) && !csio_enable_msix(hw))
+		hw->intr_mode = CSIO_IM_MSIX;
+	else {
+		/* Max iqs required based on #niqs configured in fw */
+		if (hw->flags & CSIO_HWF_USING_SOFT_PARAMS ||
+			!csio_is_hw_master(hw)) {
+			int extra = CSIO_EXTRA_MSI_IQS;
+
+			if (hw->cfg_niq < (hw->num_sqsets + extra)) {
+				csio_dbg(hw, "Reducing sqsets to %d\n",
+					 hw->cfg_niq - extra);
+				csio_reduce_sqsets(hw, hw->cfg_niq - extra);
+			}
+		}
+
+		if ((csio_msi == 1) && !pci_enable_msi(hw->pdev))
+			hw->intr_mode = CSIO_IM_MSI;
+		else
+			hw->intr_mode = CSIO_IM_INTX;
+	}
+
+	csio_dbg(hw, "Using %s interrupt mode.\n",
+		(hw->intr_mode == CSIO_IM_MSIX) ? "MSIX" :
+		((hw->intr_mode == CSIO_IM_MSI) ? "MSI" : "INTx"));
+}
+
+void
+csio_intr_disable(struct csio_hw *hw, bool free)
+{
+	csio_hw_intr_disable(hw);
+
+	switch (hw->intr_mode) {
+	case CSIO_IM_MSIX:
+		csio_disable_msix(hw, free);
+		break;
+	case CSIO_IM_MSI:
+		if (free)
+			free_irq(hw->pdev->irq, hw);
+		pci_disable_msi(hw->pdev);
+		break;
+	case CSIO_IM_INTX:
+		if (free)
+			free_irq(hw->pdev->irq, hw);
+		break;
+	default:
+		break;
+	}
+	hw->intr_mode = CSIO_IM_NONE;
+	hw->flags &= ~CSIO_HWF_HOST_INTR_ENABLED;
+}
diff --git a/drivers/scsi/csiostor/csio_scsi.c b/drivers/scsi/csiostor/csio_scsi.c
new file mode 100644
index 0000000..fdbd7da
--- /dev/null
+++ b/drivers/scsi/csiostor/csio_scsi.c
@@ -0,0 +1,2555 @@
+/*
+ * This file is part of the Chelsio FCoE driver for Linux.
+ *
+ * Copyright (c) 2008-2012 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/device.h>
+#include <linux/delay.h>
+#include <linux/ctype.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/compiler.h>
+#include <linux/export.h>
+#include <linux/module.h>
+#include <asm/unaligned.h>
+#include <asm/page.h>
+#include <scsi/scsi.h>
+#include <scsi/scsi_device.h>
+#include <scsi/scsi_transport_fc.h>
+
+#include "csio_hw.h"
+#include "csio_lnode.h"
+#include "csio_rnode.h"
+#include "csio_scsi.h"
+#include "csio_init.h"
+
+int csio_scsi_eqsize = 65536;
+int csio_scsi_iqlen = 128;
+int csio_scsi_ioreqs = 2048;
+uint32_t csio_max_scan_tmo;
+uint32_t csio_delta_scan_tmo = 5;
+int csio_lun_qdepth = 32;
+
+static int csio_ddp_descs = 128;
+
+static int csio_do_abrt_cls(struct csio_hw *,
+				      struct csio_ioreq *, bool);
+
+static void csio_scsis_uninit(struct csio_ioreq *, enum csio_scsi_ev);
+static void csio_scsis_io_active(struct csio_ioreq *, enum csio_scsi_ev);
+static void csio_scsis_tm_active(struct csio_ioreq *, enum csio_scsi_ev);
+static void csio_scsis_aborting(struct csio_ioreq *, enum csio_scsi_ev);
+static void csio_scsis_closing(struct csio_ioreq *, enum csio_scsi_ev);
+static void csio_scsis_shost_cmpl_await(struct csio_ioreq *, enum csio_scsi_ev);
+
+/*
+ * csio_scsi_match_io - Match an ioreq with the given SCSI level data.
+ * @ioreq: The I/O request
+ * @sld: Level information
+ *
+ * Should be called with lock held.
+ *
+ */
+static bool
+csio_scsi_match_io(struct csio_ioreq *ioreq, struct csio_scsi_level_data *sld)
+{
+	struct scsi_cmnd *scmnd = csio_scsi_cmnd(ioreq);
+
+	switch (sld->level) {
+	case CSIO_LEV_LUN:
+		if (scmnd == NULL)
+			return false;
+
+		return ((ioreq->lnode == sld->lnode) &&
+			(ioreq->rnode == sld->rnode) &&
+			((uint64_t)scmnd->device->lun == sld->oslun));
+
+	case CSIO_LEV_RNODE:
+		return ((ioreq->lnode == sld->lnode) &&
+				(ioreq->rnode == sld->rnode));
+	case CSIO_LEV_LNODE:
+		return (ioreq->lnode == sld->lnode);
+	case CSIO_LEV_ALL:
+		return true;
+	default:
+		return false;
+	}
+}
+
+/*
+ * csio_scsi_gather_active_ios - Gather active I/Os based on level
+ * @scm: SCSI module
+ * @sld: Level information
+ * @dest: The queue where these I/Os have to be gathered.
+ *
+ * Should be called with lock held.
+ */
+static void
+csio_scsi_gather_active_ios(struct csio_scsim *scm,
+			    struct csio_scsi_level_data *sld,
+			    struct list_head *dest)
+{
+	struct list_head *tmp, *next;
+
+	if (list_empty(&scm->active_q))
+		return;
+
+	/* Just splice the entire active_q into dest */
+	if (sld->level == CSIO_LEV_ALL) {
+		list_splice_tail_init(&scm->active_q, dest);
+		return;
+	}
+
+	list_for_each_safe(tmp, next, &scm->active_q) {
+		if (csio_scsi_match_io((struct csio_ioreq *)tmp, sld)) {
+			list_del_init(tmp);
+			list_add_tail(tmp, dest);
+		}
+	}
+}
+
+static inline bool
+csio_scsi_itnexus_loss_error(uint16_t error)
+{
+	switch (error) {
+	case FW_ERR_LINK_DOWN:
+	case FW_RDEV_NOT_READY:
+	case FW_ERR_RDEV_LOST:
+	case FW_ERR_RDEV_LOGO:
+	case FW_ERR_RDEV_IMPL_LOGO:
+		return 1;
+	}
+	return 0;
+}
+
+static inline void
+csio_scsi_tag(struct scsi_cmnd *scmnd, uint8_t *tag, uint8_t hq,
+	      uint8_t oq, uint8_t sq)
+{
+	char stag[2];
+
+	if (scsi_populate_tag_msg(scmnd, stag)) {
+		switch (stag[0]) {
+		case HEAD_OF_QUEUE_TAG:
+			*tag = hq;
+			break;
+		case ORDERED_QUEUE_TAG:
+			*tag = oq;
+			break;
+		default:
+			*tag = sq;
+			break;
+		}
+	} else
+		*tag = 0;
+}
+
+/*
+ * csio_scsi_fcp_cmnd - Frame the SCSI FCP command paylod.
+ * @req: IO req structure.
+ * @addr: DMA location to place the payload.
+ *
+ * This routine is shared between FCP_WRITE, FCP_READ and FCP_CMD requests.
+ */
+static inline void
+csio_scsi_fcp_cmnd(struct csio_ioreq *req, void *addr)
+{
+	struct fcp_cmnd *fcp_cmnd = (struct fcp_cmnd *)addr;
+	struct scsi_cmnd *scmnd = csio_scsi_cmnd(req);
+
+	/* Check for Task Management */
+	if (likely(scmnd->SCp.Message == 0)) {
+		int_to_scsilun(scmnd->device->lun, &fcp_cmnd->fc_lun);
+		fcp_cmnd->fc_tm_flags = 0;
+		fcp_cmnd->fc_cmdref = 0;
+		fcp_cmnd->fc_pri_ta = 0;
+
+		memcpy(fcp_cmnd->fc_cdb, scmnd->cmnd, 16);
+		csio_scsi_tag(scmnd, &fcp_cmnd->fc_pri_ta,
+			      FCP_PTA_HEADQ, FCP_PTA_ORDERED, FCP_PTA_SIMPLE);
+		fcp_cmnd->fc_dl = cpu_to_be32(scsi_bufflen(scmnd));
+
+		if (req->nsge)
+			if (req->datadir == DMA_TO_DEVICE)
+				fcp_cmnd->fc_flags = FCP_CFL_WRDATA;
+			else
+				fcp_cmnd->fc_flags = FCP_CFL_RDDATA;
+		else
+			fcp_cmnd->fc_flags = 0;
+	} else {
+		memset(fcp_cmnd, 0, sizeof(*fcp_cmnd));
+		int_to_scsilun(scmnd->device->lun, &fcp_cmnd->fc_lun);
+		fcp_cmnd->fc_tm_flags = (uint8_t)scmnd->SCp.Message;
+	}
+}
+
+/*
+ * csio_scsi_init_cmd_wr - Initialize the SCSI CMD WR.
+ * @req: IO req structure.
+ * @addr: DMA location to place the payload.
+ * @size: Size of WR (including FW WR + immed data + rsp SG entry
+ *
+ * Wrapper for populating fw_scsi_cmd_wr.
+ */
+static inline void
+csio_scsi_init_cmd_wr(struct csio_ioreq *req, void *addr, uint32_t size)
+{
+	struct csio_hw *hw = req->lnode->hwp;
+	struct csio_rnode *rn = req->rnode;
+	struct fw_scsi_cmd_wr *wr = (struct fw_scsi_cmd_wr *)addr;
+	struct csio_dma_buf *dma_buf;
+	uint8_t imm = csio_hw_to_scsim(hw)->proto_cmd_len;
+
+	wr->op_immdlen = cpu_to_be32(FW_WR_OP(FW_SCSI_CMD_WR) |
+					  FW_SCSI_CMD_WR_IMMDLEN(imm));
+	wr->flowid_len16 = cpu_to_be32(FW_WR_FLOWID(rn->flowid) |
+					    FW_WR_LEN16(
+						DIV_ROUND_UP(size, 16)));
+
+	wr->cookie = (uintptr_t) req;
+	wr->iqid = (uint16_t)cpu_to_be16(csio_q_physiqid(hw, req->iq_idx));
+	wr->tmo_val = (uint8_t) req->tmo;
+	wr->r3 = 0;
+	memset(&wr->r5, 0, 8);
+
+	/* Get RSP DMA buffer */
+	dma_buf = &req->dma_buf;
+
+	/* Prepare RSP SGL */
+	wr->rsp_dmalen = cpu_to_be32(dma_buf->len);
+	wr->rsp_dmaaddr = cpu_to_be64(dma_buf->paddr);
+
+	wr->r6 = 0;
+
+	wr->u.fcoe.ctl_pri = 0;
+	wr->u.fcoe.cp_en_class = 0;
+	wr->u.fcoe.r4_lo[0] = 0;
+	wr->u.fcoe.r4_lo[1] = 0;
+
+	/* Frame a FCP command */
+	csio_scsi_fcp_cmnd(req, (void *)((uintptr_t)addr +
+				    sizeof(struct fw_scsi_cmd_wr)));
+}
+
+#define CSIO_SCSI_CMD_WR_SZ(_imm)					\
+	(sizeof(struct fw_scsi_cmd_wr) +		/* WR size */	\
+	 ALIGN((_imm), 16))				/* Immed data */
+
+#define CSIO_SCSI_CMD_WR_SZ_16(_imm)					\
+			(ALIGN(CSIO_SCSI_CMD_WR_SZ((_imm)), 16))
+
+/*
+ * csio_scsi_cmd - Create a SCSI CMD WR.
+ * @req: IO req structure.
+ *
+ * Gets a WR slot in the ingress queue and initializes it with SCSI CMD WR.
+ *
+ */
+static inline void
+csio_scsi_cmd(struct csio_ioreq *req)
+{
+	struct csio_wr_pair wrp;
+	struct csio_hw *hw = req->lnode->hwp;
+	struct csio_scsim *scsim = csio_hw_to_scsim(hw);
+	uint32_t size = CSIO_SCSI_CMD_WR_SZ_16(scsim->proto_cmd_len);
+
+	req->drv_status = csio_wr_get(hw, req->eq_idx, size, &wrp);
+	if (unlikely(req->drv_status != 0))
+		return;
+
+	if (wrp.size1 >= size) {
+		/* Initialize WR in one shot */
+		csio_scsi_init_cmd_wr(req, wrp.addr1, size);
+	} else {
+		uint8_t *tmpwr = csio_q_eq_wrap(hw, req->eq_idx);
+
+		/*
+		 * Make a temporary copy of the WR and write back
+		 * the copy into the WR pair.
+		 */
+		csio_scsi_init_cmd_wr(req, (void *)tmpwr, size);
+		memcpy(wrp.addr1, tmpwr, wrp.size1);
+		memcpy(wrp.addr2, tmpwr + wrp.size1, size - wrp.size1);
+	}
+}
+
+/*
+ * csio_scsi_init_ulptx_dsgl - Fill in a ULP_TX_SC_DSGL
+ * @hw: HW module
+ * @req: IO request
+ * @sgl: ULP TX SGL pointer.
+ *
+ */
+static inline void
+csio_scsi_init_ultptx_dsgl(struct csio_hw *hw, struct csio_ioreq *req,
+			   struct ulptx_sgl *sgl)
+{
+	struct ulptx_sge_pair *sge_pair = NULL;
+	struct scatterlist *sgel;
+	uint32_t i = 0;
+	uint32_t xfer_len;
+	struct list_head *tmp;
+	struct csio_dma_buf *dma_buf;
+	struct scsi_cmnd *scmnd = csio_scsi_cmnd(req);
+
+	sgl->cmd_nsge = htonl(ULPTX_CMD(ULP_TX_SC_DSGL) | ULPTX_MORE |
+				     ULPTX_NSGE(req->nsge));
+	/* Now add the data SGLs */
+	if (likely(!req->dcopy)) {
+		scsi_for_each_sg(scmnd, sgel, req->nsge, i) {
+			if (i == 0) {
+				sgl->addr0 = cpu_to_be64(sg_dma_address(sgel));
+				sgl->len0 = cpu_to_be32(sg_dma_len(sgel));
+				sge_pair = (struct ulptx_sge_pair *)(sgl + 1);
+				continue;
+			}
+			if ((i - 1) & 0x1) {
+				sge_pair->addr[1] = cpu_to_be64(
+							sg_dma_address(sgel));
+				sge_pair->len[1] = cpu_to_be32(
+							sg_dma_len(sgel));
+				sge_pair++;
+			} else {
+				sge_pair->addr[0] = cpu_to_be64(
+							sg_dma_address(sgel));
+				sge_pair->len[0] = cpu_to_be32(
+							sg_dma_len(sgel));
+			}
+		}
+	} else {
+		/* Program sg elements with driver's DDP buffer */
+		xfer_len = scsi_bufflen(scmnd);
+		list_for_each(tmp, &req->gen_list) {
+			dma_buf = (struct csio_dma_buf *)tmp;
+			if (i == 0) {
+				sgl->addr0 = cpu_to_be64(dma_buf->paddr);
+				sgl->len0 = cpu_to_be32(
+						min(xfer_len, dma_buf->len));
+				sge_pair = (struct ulptx_sge_pair *)(sgl + 1);
+			} else if ((i - 1) & 0x1) {
+				sge_pair->addr[1] = cpu_to_be64(dma_buf->paddr);
+				sge_pair->len[1] = cpu_to_be32(
+						min(xfer_len, dma_buf->len));
+				sge_pair++;
+			} else {
+				sge_pair->addr[0] = cpu_to_be64(dma_buf->paddr);
+				sge_pair->len[0] = cpu_to_be32(
+						min(xfer_len, dma_buf->len));
+			}
+			xfer_len -= min(xfer_len, dma_buf->len);
+			i++;
+		}
+	}
+}
+
+/*
+ * csio_scsi_init_read_wr - Initialize the READ SCSI WR.
+ * @req: IO req structure.
+ * @wrp: DMA location to place the payload.
+ * @size: Size of WR (including FW WR + immed data + rsp SG entry + data SGL
+ *
+ * Wrapper for populating fw_scsi_read_wr.
+ */
+static inline void
+csio_scsi_init_read_wr(struct csio_ioreq *req, void *wrp, uint32_t size)
+{
+	struct csio_hw *hw = req->lnode->hwp;
+	struct csio_rnode *rn = req->rnode;
+	struct fw_scsi_read_wr *wr = (struct fw_scsi_read_wr *)wrp;
+	struct ulptx_sgl *sgl;
+	struct csio_dma_buf *dma_buf;
+	uint8_t imm = csio_hw_to_scsim(hw)->proto_cmd_len;
+	struct scsi_cmnd *scmnd = csio_scsi_cmnd(req);
+
+	wr->op_immdlen = cpu_to_be32(FW_WR_OP(FW_SCSI_READ_WR) |
+				     FW_SCSI_READ_WR_IMMDLEN(imm));
+	wr->flowid_len16 = cpu_to_be32(FW_WR_FLOWID(rn->flowid) |
+				       FW_WR_LEN16(DIV_ROUND_UP(size, 16)));
+	wr->cookie = (uintptr_t)req;
+	wr->iqid = (uint16_t)cpu_to_be16(csio_q_physiqid(hw, req->iq_idx));
+	wr->tmo_val = (uint8_t)(req->tmo);
+	wr->use_xfer_cnt = 1;
+	wr->xfer_cnt = cpu_to_be32(scsi_bufflen(scmnd));
+	wr->ini_xfer_cnt = cpu_to_be32(scsi_bufflen(scmnd));
+	/* Get RSP DMA buffer */
+	dma_buf = &req->dma_buf;
+
+	/* Prepare RSP SGL */
+	wr->rsp_dmalen = cpu_to_be32(dma_buf->len);
+	wr->rsp_dmaaddr = cpu_to_be64(dma_buf->paddr);
+
+	wr->r4 = 0;
+
+	wr->u.fcoe.ctl_pri = 0;
+	wr->u.fcoe.cp_en_class = 0;
+	wr->u.fcoe.r3_lo[0] = 0;
+	wr->u.fcoe.r3_lo[1] = 0;
+	csio_scsi_fcp_cmnd(req, (void *)((uintptr_t)wrp +
+					sizeof(struct fw_scsi_read_wr)));
+
+	/* Move WR pointer past command and immediate data */
+	sgl = (struct ulptx_sgl *)((uintptr_t)wrp +
+			      sizeof(struct fw_scsi_read_wr) + ALIGN(imm, 16));
+
+	/* Fill in the DSGL */
+	csio_scsi_init_ultptx_dsgl(hw, req, sgl);
+}
+
+/*
+ * csio_scsi_init_write_wr - Initialize the WRITE SCSI WR.
+ * @req: IO req structure.
+ * @wrp: DMA location to place the payload.
+ * @size: Size of WR (including FW WR + immed data + rsp SG entry + data SGL
+ *
+ * Wrapper for populating fw_scsi_write_wr.
+ */
+static inline void
+csio_scsi_init_write_wr(struct csio_ioreq *req, void *wrp, uint32_t size)
+{
+	struct csio_hw *hw = req->lnode->hwp;
+	struct csio_rnode *rn = req->rnode;
+	struct fw_scsi_write_wr *wr = (struct fw_scsi_write_wr *)wrp;
+	struct ulptx_sgl *sgl;
+	struct csio_dma_buf *dma_buf;
+	uint8_t imm = csio_hw_to_scsim(hw)->proto_cmd_len;
+	struct scsi_cmnd *scmnd = csio_scsi_cmnd(req);
+
+	wr->op_immdlen = cpu_to_be32(FW_WR_OP(FW_SCSI_WRITE_WR) |
+				     FW_SCSI_WRITE_WR_IMMDLEN(imm));
+	wr->flowid_len16 = cpu_to_be32(FW_WR_FLOWID(rn->flowid) |
+				       FW_WR_LEN16(DIV_ROUND_UP(size, 16)));
+	wr->cookie = (uintptr_t)req;
+	wr->iqid = (uint16_t)cpu_to_be16(csio_q_physiqid(hw, req->iq_idx));
+	wr->tmo_val = (uint8_t)(req->tmo);
+	wr->use_xfer_cnt = 1;
+	wr->xfer_cnt = cpu_to_be32(scsi_bufflen(scmnd));
+	wr->ini_xfer_cnt = cpu_to_be32(scsi_bufflen(scmnd));
+	/* Get RSP DMA buffer */
+	dma_buf = &req->dma_buf;
+
+	/* Prepare RSP SGL */
+	wr->rsp_dmalen = cpu_to_be32(dma_buf->len);
+	wr->rsp_dmaaddr = cpu_to_be64(dma_buf->paddr);
+
+	wr->r4 = 0;
+
+	wr->u.fcoe.ctl_pri = 0;
+	wr->u.fcoe.cp_en_class = 0;
+	wr->u.fcoe.r3_lo[0] = 0;
+	wr->u.fcoe.r3_lo[1] = 0;
+	csio_scsi_fcp_cmnd(req, (void *)((uintptr_t)wrp +
+					sizeof(struct fw_scsi_write_wr)));
+
+	/* Move WR pointer past command and immediate data */
+	sgl = (struct ulptx_sgl *)((uintptr_t)wrp +
+			      sizeof(struct fw_scsi_write_wr) + ALIGN(imm, 16));
+
+	/* Fill in the DSGL */
+	csio_scsi_init_ultptx_dsgl(hw, req, sgl);
+}
+
+/* Calculate WR size needed for fw_scsi_read_wr/fw_scsi_write_wr */
+#define CSIO_SCSI_DATA_WRSZ(req, oper, sz, imm)				       \
+do {									       \
+	(sz) = sizeof(struct fw_scsi_##oper##_wr) +	/* WR size */          \
+	       ALIGN((imm), 16) +			/* Immed data */       \
+	       sizeof(struct ulptx_sgl);		/* ulptx_sgl */	       \
+									       \
+	if (unlikely((req)->nsge > 1))				               \
+		(sz) += (sizeof(struct ulptx_sge_pair) *		       \
+				(ALIGN(((req)->nsge - 1), 2) / 2));            \
+							/* Data SGE */	       \
+} while (0)
+
+/*
+ * csio_scsi_read - Create a SCSI READ WR.
+ * @req: IO req structure.
+ *
+ * Gets a WR slot in the ingress queue and initializes it with
+ * SCSI READ WR.
+ *
+ */
+static inline void
+csio_scsi_read(struct csio_ioreq *req)
+{
+	struct csio_wr_pair wrp;
+	uint32_t size;
+	struct csio_hw *hw = req->lnode->hwp;
+	struct csio_scsim *scsim = csio_hw_to_scsim(hw);
+
+	CSIO_SCSI_DATA_WRSZ(req, read, size, scsim->proto_cmd_len);
+	size = ALIGN(size, 16);
+
+	req->drv_status = csio_wr_get(hw, req->eq_idx, size, &wrp);
+	if (likely(req->drv_status == 0)) {
+		if (likely(wrp.size1 >= size)) {
+			/* Initialize WR in one shot */
+			csio_scsi_init_read_wr(req, wrp.addr1, size);
+		} else {
+			uint8_t *tmpwr = csio_q_eq_wrap(hw, req->eq_idx);
+			/*
+			 * Make a temporary copy of the WR and write back
+			 * the copy into the WR pair.
+			 */
+			csio_scsi_init_read_wr(req, (void *)tmpwr, size);
+			memcpy(wrp.addr1, tmpwr, wrp.size1);
+			memcpy(wrp.addr2, tmpwr + wrp.size1, size - wrp.size1);
+		}
+	}
+}
+
+/*
+ * csio_scsi_write - Create a SCSI WRITE WR.
+ * @req: IO req structure.
+ *
+ * Gets a WR slot in the ingress queue and initializes it with
+ * SCSI WRITE WR.
+ *
+ */
+static inline void
+csio_scsi_write(struct csio_ioreq *req)
+{
+	struct csio_wr_pair wrp;
+	uint32_t size;
+	struct csio_hw *hw = req->lnode->hwp;
+	struct csio_scsim *scsim = csio_hw_to_scsim(hw);
+
+	CSIO_SCSI_DATA_WRSZ(req, write, size, scsim->proto_cmd_len);
+	size = ALIGN(size, 16);
+
+	req->drv_status = csio_wr_get(hw, req->eq_idx, size, &wrp);
+	if (likely(req->drv_status == 0)) {
+		if (likely(wrp.size1 >= size)) {
+			/* Initialize WR in one shot */
+			csio_scsi_init_write_wr(req, wrp.addr1, size);
+		} else {
+			uint8_t *tmpwr = csio_q_eq_wrap(hw, req->eq_idx);
+			/*
+			 * Make a temporary copy of the WR and write back
+			 * the copy into the WR pair.
+			 */
+			csio_scsi_init_write_wr(req, (void *)tmpwr, size);
+			memcpy(wrp.addr1, tmpwr, wrp.size1);
+			memcpy(wrp.addr2, tmpwr + wrp.size1, size - wrp.size1);
+		}
+	}
+}
+
+/*
+ * csio_setup_ddp - Setup DDP buffers for Read request.
+ * @req: IO req structure.
+ *
+ * Checks SGLs/Data buffers are virtually contiguous required for DDP.
+ * If contiguous,driver posts SGLs in the WR otherwise post internal
+ * buffers for such request for DDP.
+ */
+static inline void
+csio_setup_ddp(struct csio_scsim *scsim, struct csio_ioreq *req)
+{
+#ifdef __CSIO_DEBUG__
+	struct csio_hw *hw = req->lnode->hwp;
+#endif
+	struct scatterlist *sgel = NULL;
+	struct scsi_cmnd *scmnd = csio_scsi_cmnd(req);
+	uint64_t sg_addr = 0;
+	uint32_t ddp_pagesz = 4096;
+	uint32_t buf_off;
+	struct csio_dma_buf *dma_buf = NULL;
+	uint32_t alloc_len = 0;
+	uint32_t xfer_len = 0;
+	uint32_t sg_len = 0;
+	uint32_t i;
+
+	scsi_for_each_sg(scmnd, sgel, req->nsge, i) {
+		sg_addr = sg_dma_address(sgel);
+		sg_len	= sg_dma_len(sgel);
+
+		buf_off = sg_addr & (ddp_pagesz - 1);
+
+		/* Except 1st buffer,all buffer addr have to be Page aligned */
+		if (i != 0 && buf_off) {
+			csio_dbg(hw, "SGL addr not DDP aligned (%llx:%d)\n",
+				 sg_addr, sg_len);
+			goto unaligned;
+		}
+
+		/* Except last buffer,all buffer must end on page boundary */
+		if ((i != (req->nsge - 1)) &&
+			((buf_off + sg_len) & (ddp_pagesz - 1))) {
+			csio_dbg(hw,
+				 "SGL addr not ending on page boundary"
+				 "(%llx:%d)\n", sg_addr, sg_len);
+			goto unaligned;
+		}
+	}
+
+	/* SGL's are virtually contiguous. HW will DDP to SGLs */
+	req->dcopy = 0;
+	csio_scsi_read(req);
+
+	return;
+
+unaligned:
+	CSIO_INC_STATS(scsim, n_unaligned);
+	/*
+	 * For unaligned SGLs, driver will allocate internal DDP buffer.
+	 * Once command is completed data from DDP buffer copied to SGLs
+	 */
+	req->dcopy = 1;
+
+	/* Use gen_list to store the DDP buffers */
+	INIT_LIST_HEAD(&req->gen_list);
+	xfer_len = scsi_bufflen(scmnd);
+
+	i = 0;
+	/* Allocate ddp buffers for this request */
+	while (alloc_len < xfer_len) {
+		dma_buf = csio_get_scsi_ddp(scsim);
+		if (dma_buf == NULL || i > scsim->max_sge) {
+			req->drv_status = -EBUSY;
+			break;
+		}
+		alloc_len += dma_buf->len;
+		/* Added to IO req */
+		list_add_tail(&dma_buf->list, &req->gen_list);
+		i++;
+	}
+
+	if (!req->drv_status) {
+		/* set number of ddp bufs used */
+		req->nsge = i;
+		csio_scsi_read(req);
+		return;
+	}
+
+	 /* release dma descs */
+	if (i > 0)
+		csio_put_scsi_ddp_list(scsim, &req->gen_list, i);
+}
+
+/*
+ * csio_scsi_init_abrt_cls_wr - Initialize an ABORT/CLOSE WR.
+ * @req: IO req structure.
+ * @addr: DMA location to place the payload.
+ * @size: Size of WR
+ * @abort: abort OR close
+ *
+ * Wrapper for populating fw_scsi_cmd_wr.
+ */
+static inline void
+csio_scsi_init_abrt_cls_wr(struct csio_ioreq *req, void *addr, uint32_t size,
+			   bool abort)
+{
+	struct csio_hw *hw = req->lnode->hwp;
+	struct csio_rnode *rn = req->rnode;
+	struct fw_scsi_abrt_cls_wr *wr = (struct fw_scsi_abrt_cls_wr *)addr;
+
+	wr->op_immdlen = cpu_to_be32(FW_WR_OP(FW_SCSI_ABRT_CLS_WR));
+	wr->flowid_len16 = cpu_to_be32(FW_WR_FLOWID(rn->flowid) |
+					    FW_WR_LEN16(
+						DIV_ROUND_UP(size, 16)));
+
+	wr->cookie = (uintptr_t) req;
+	wr->iqid = (uint16_t)cpu_to_be16(csio_q_physiqid(hw, req->iq_idx));
+	wr->tmo_val = (uint8_t) req->tmo;
+	/* 0 for CHK_ALL_IO tells FW to look up t_cookie */
+	wr->sub_opcode_to_chk_all_io =
+				(FW_SCSI_ABRT_CLS_WR_SUB_OPCODE(abort) |
+				 FW_SCSI_ABRT_CLS_WR_CHK_ALL_IO(0));
+	wr->r3[0] = 0;
+	wr->r3[1] = 0;
+	wr->r3[2] = 0;
+	wr->r3[3] = 0;
+	/* Since we re-use the same ioreq for abort as well */
+	wr->t_cookie = (uintptr_t) req;
+}
+
+static inline void
+csio_scsi_abrt_cls(struct csio_ioreq *req, bool abort)
+{
+	struct csio_wr_pair wrp;
+	struct csio_hw *hw = req->lnode->hwp;
+	uint32_t size = ALIGN(sizeof(struct fw_scsi_abrt_cls_wr), 16);
+
+	req->drv_status = csio_wr_get(hw, req->eq_idx, size, &wrp);
+	if (req->drv_status != 0)
+		return;
+
+	if (wrp.size1 >= size) {
+		/* Initialize WR in one shot */
+		csio_scsi_init_abrt_cls_wr(req, wrp.addr1, size, abort);
+	} else {
+		uint8_t *tmpwr = csio_q_eq_wrap(hw, req->eq_idx);
+		/*
+		 * Make a temporary copy of the WR and write back
+		 * the copy into the WR pair.
+		 */
+		csio_scsi_init_abrt_cls_wr(req, (void *)tmpwr, size, abort);
+		memcpy(wrp.addr1, tmpwr, wrp.size1);
+		memcpy(wrp.addr2, tmpwr + wrp.size1, size - wrp.size1);
+	}
+}
+
+/*****************************************************************************/
+/* START: SCSI SM                                                            */
+/*****************************************************************************/
+static void
+csio_scsis_uninit(struct csio_ioreq *req, enum csio_scsi_ev evt)
+{
+	struct csio_hw *hw = req->lnode->hwp;
+	struct csio_scsim *scsim = csio_hw_to_scsim(hw);
+
+	switch (evt) {
+	case CSIO_SCSIE_START_IO:
+
+		if (req->nsge) {
+			if (req->datadir == DMA_TO_DEVICE) {
+				req->dcopy = 0;
+				csio_scsi_write(req);
+			} else
+				csio_setup_ddp(scsim, req);
+		} else {
+			csio_scsi_cmd(req);
+		}
+
+		if (likely(req->drv_status == 0)) {
+			/* change state and enqueue on active_q */
+			csio_set_state(&req->sm, csio_scsis_io_active);
+			list_add_tail(&req->sm.sm_list, &scsim->active_q);
+			csio_wr_issue(hw, req->eq_idx, false);
+			CSIO_INC_STATS(scsim, n_active);
+
+			return;
+		}
+		break;
+
+	case CSIO_SCSIE_START_TM:
+		csio_scsi_cmd(req);
+		if (req->drv_status == 0) {
+			/*
+			 * NOTE: We collect the affected I/Os prior to issuing
+			 * LUN reset, and not after it. This is to prevent
+			 * aborting I/Os that get issued after the LUN reset,
+			 * but prior to LUN reset completion (in the event that
+			 * the host stack has not blocked I/Os to a LUN that is
+			 * being reset.
+			 */
+			csio_set_state(&req->sm, csio_scsis_tm_active);
+			list_add_tail(&req->sm.sm_list, &scsim->active_q);
+			csio_wr_issue(hw, req->eq_idx, false);
+			CSIO_INC_STATS(scsim, n_tm_active);
+		}
+		return;
+
+	case CSIO_SCSIE_ABORT:
+	case CSIO_SCSIE_CLOSE:
+		/*
+		 * NOTE:
+		 * We could get here due to  :
+		 * - a window in the cleanup path of the SCSI module
+		 *   (csio_scsi_abort_io()). Please see NOTE in this function.
+		 * - a window in the time we tried to issue an abort/close
+		 *   of a request to FW, and the FW completed the request
+		 *   itself.
+		 *   Print a message for now, and return INVAL either way.
+		 */
+		req->drv_status = -EINVAL;
+		csio_warn(hw, "Trying to abort/close completed IO:%p!\n", req);
+		break;
+
+	default:
+		csio_dbg(hw, "Unhandled event:%d sent to req:%p\n", evt, req);
+		CSIO_DB_ASSERT(0);
+	}
+}
+
+static void
+csio_scsis_io_active(struct csio_ioreq *req, enum csio_scsi_ev evt)
+{
+	struct csio_hw *hw = req->lnode->hwp;
+	struct csio_scsim *scm = csio_hw_to_scsim(hw);
+	struct csio_rnode *rn;
+
+	switch (evt) {
+	case CSIO_SCSIE_COMPLETED:
+		CSIO_DEC_STATS(scm, n_active);
+		list_del_init(&req->sm.sm_list);
+		csio_set_state(&req->sm, csio_scsis_uninit);
+		/*
+		 * In MSIX mode, with multiple queues, the SCSI compeltions
+		 * could reach us sooner than the FW events sent to indicate
+		 * I-T nexus loss (link down, remote device logo etc). We
+		 * dont want to be returning such I/Os to the upper layer
+		 * immediately, since we wouldnt have reported the I-T nexus
+		 * loss itself. This forces us to serialize such completions
+		 * with the reporting of the I-T nexus loss. Therefore, we
+		 * internally queue up such up such completions in the rnode.
+		 * The reporting of I-T nexus loss to the upper layer is then
+		 * followed by the returning of I/Os in this internal queue.
+		 * Having another state alongwith another queue helps us take
+		 * actions for events such as ABORT received while we are
+		 * in this rnode queue.
+		 */
+		if (unlikely(req->wr_status != FW_SUCCESS)) {
+			rn = req->rnode;
+			/*
+			 * FW says remote device is lost, but rnode
+			 * doesnt reflect it.
+			 */
+			if (csio_scsi_itnexus_loss_error(req->wr_status) &&
+						csio_is_rnode_ready(rn)) {
+				csio_set_state(&req->sm,
+						csio_scsis_shost_cmpl_await);
+				list_add_tail(&req->sm.sm_list,
+					      &rn->host_cmpl_q);
+			}
+		}
+
+		break;
+
+	case CSIO_SCSIE_ABORT:
+		csio_scsi_abrt_cls(req, SCSI_ABORT);
+		if (req->drv_status == 0) {
+			csio_wr_issue(hw, req->eq_idx, false);
+			csio_set_state(&req->sm, csio_scsis_aborting);
+		}
+		break;
+
+	case CSIO_SCSIE_CLOSE:
+		csio_scsi_abrt_cls(req, SCSI_CLOSE);
+		if (req->drv_status == 0) {
+			csio_wr_issue(hw, req->eq_idx, false);
+			csio_set_state(&req->sm, csio_scsis_closing);
+		}
+		break;
+
+	case CSIO_SCSIE_DRVCLEANUP:
+		req->wr_status = FW_HOSTERROR;
+		CSIO_DEC_STATS(scm, n_active);
+		csio_set_state(&req->sm, csio_scsis_uninit);
+		break;
+
+	default:
+		csio_dbg(hw, "Unhandled event:%d sent to req:%p\n", evt, req);
+		CSIO_DB_ASSERT(0);
+	}
+}
+
+static void
+csio_scsis_tm_active(struct csio_ioreq *req, enum csio_scsi_ev evt)
+{
+	struct csio_hw *hw = req->lnode->hwp;
+	struct csio_scsim *scm = csio_hw_to_scsim(hw);
+
+	switch (evt) {
+	case CSIO_SCSIE_COMPLETED:
+		CSIO_DEC_STATS(scm, n_tm_active);
+		list_del_init(&req->sm.sm_list);
+		csio_set_state(&req->sm, csio_scsis_uninit);
+
+		break;
+
+	case CSIO_SCSIE_ABORT:
+		csio_scsi_abrt_cls(req, SCSI_ABORT);
+		if (req->drv_status == 0) {
+			csio_wr_issue(hw, req->eq_idx, false);
+			csio_set_state(&req->sm, csio_scsis_aborting);
+		}
+		break;
+
+
+	case CSIO_SCSIE_CLOSE:
+		csio_scsi_abrt_cls(req, SCSI_CLOSE);
+		if (req->drv_status == 0) {
+			csio_wr_issue(hw, req->eq_idx, false);
+			csio_set_state(&req->sm, csio_scsis_closing);
+		}
+		break;
+
+	case CSIO_SCSIE_DRVCLEANUP:
+		req->wr_status = FW_HOSTERROR;
+		CSIO_DEC_STATS(scm, n_tm_active);
+		csio_set_state(&req->sm, csio_scsis_uninit);
+		break;
+
+	default:
+		csio_dbg(hw, "Unhandled event:%d sent to req:%p\n", evt, req);
+		CSIO_DB_ASSERT(0);
+	}
+}
+
+static void
+csio_scsis_aborting(struct csio_ioreq *req, enum csio_scsi_ev evt)
+{
+	struct csio_hw *hw = req->lnode->hwp;
+	struct csio_scsim *scm = csio_hw_to_scsim(hw);
+
+	switch (evt) {
+	case CSIO_SCSIE_COMPLETED:
+		csio_dbg(hw,
+			 "ioreq %p recvd cmpltd (wr_status:%d) "
+			 "in aborting st\n", req, req->wr_status);
+		/*
+		 * Use -ECANCELED to explicitly tell the ABORTED event that
+		 * the original I/O was returned to driver by FW.
+		 * We dont really care if the I/O was returned with success by
+		 * FW (because the ABORT and completion of the I/O crossed each
+		 * other), or any other return value. Once we are in aborting
+		 * state, the success or failure of the I/O is unimportant to
+		 * us.
+		 */
+		req->drv_status = -ECANCELED;
+		break;
+
+	case CSIO_SCSIE_ABORT:
+		CSIO_INC_STATS(scm, n_abrt_dups);
+		break;
+
+	case CSIO_SCSIE_ABORTED:
+
+		csio_dbg(hw, "abort of %p return status:0x%x drv_status:%x\n",
+			 req, req->wr_status, req->drv_status);
+		/*
+		 * Check if original I/O WR completed before the Abort
+		 * completion.
+		 */
+		if (req->drv_status != -ECANCELED) {
+			csio_warn(hw,
+				  "Abort completed before original I/O,"
+				   " req:%p\n", req);
+			CSIO_DB_ASSERT(0);
+		}
+
+		/*
+		 * There are the following possible scenarios:
+		 * 1. The abort completed successfully, FW returned FW_SUCCESS.
+		 * 2. The completion of an I/O and the receipt of
+		 *    abort for that I/O by the FW crossed each other.
+		 *    The FW returned FW_EINVAL. The original I/O would have
+		 *    returned with FW_SUCCESS or any other SCSI error.
+		 * 3. The FW couldnt sent the abort out on the wire, as there
+		 *    was an I-T nexus loss (link down, remote device logged
+		 *    out etc). FW sent back an appropriate IT nexus loss status
+		 *    for the abort.
+		 * 4. FW sent an abort, but abort timed out (remote device
+		 *    didnt respond). FW replied back with
+		 *    FW_SCSI_ABORT_TIMEDOUT.
+		 * 5. FW couldnt genuinely abort the request for some reason,
+		 *    and sent us an error.
+		 *
+		 * The first 3 scenarios are treated as  succesful abort
+		 * operations by the host, while the last 2 are failed attempts
+		 * to abort. Manipulate the return value of the request
+		 * appropriately, so that host can convey these results
+		 * back to the upper layer.
+		 */
+		if ((req->wr_status == FW_SUCCESS) ||
+		    (req->wr_status == FW_EINVAL) ||
+		    csio_scsi_itnexus_loss_error(req->wr_status))
+			req->wr_status = FW_SCSI_ABORT_REQUESTED;
+
+		CSIO_DEC_STATS(scm, n_active);
+		list_del_init(&req->sm.sm_list);
+		csio_set_state(&req->sm, csio_scsis_uninit);
+		break;
+
+	case CSIO_SCSIE_DRVCLEANUP:
+		req->wr_status = FW_HOSTERROR;
+		CSIO_DEC_STATS(scm, n_active);
+		csio_set_state(&req->sm, csio_scsis_uninit);
+		break;
+
+	case CSIO_SCSIE_CLOSE:
+		/*
+		 * We can receive this event from the module
+		 * cleanup paths, if the FW forgot to reply to the ABORT WR
+		 * and left this ioreq in this state. For now, just ignore
+		 * the event. The CLOSE event is sent to this state, as
+		 * the LINK may have already gone down.
+		 */
+		break;
+
+	default:
+		csio_dbg(hw, "Unhandled event:%d sent to req:%p\n", evt, req);
+		CSIO_DB_ASSERT(0);
+	}
+}
+
+static void
+csio_scsis_closing(struct csio_ioreq *req, enum csio_scsi_ev evt)
+{
+	struct csio_hw *hw = req->lnode->hwp;
+	struct csio_scsim *scm = csio_hw_to_scsim(hw);
+
+	switch (evt) {
+	case CSIO_SCSIE_COMPLETED:
+		csio_dbg(hw,
+			 "ioreq %p recvd cmpltd (wr_status:%d) "
+			 "in closing st\n", req, req->wr_status);
+		/*
+		 * Use -ECANCELED to explicitly tell the CLOSED event that
+		 * the original I/O was returned to driver by FW.
+		 * We dont really care if the I/O was returned with success by
+		 * FW (because the CLOSE and completion of the I/O crossed each
+		 * other), or any other return value. Once we are in aborting
+		 * state, the success or failure of the I/O is unimportant to
+		 * us.
+		 */
+		req->drv_status = -ECANCELED;
+		break;
+
+	case CSIO_SCSIE_CLOSED:
+		/*
+		 * Check if original I/O WR completed before the Close
+		 * completion.
+		 */
+		if (req->drv_status != -ECANCELED) {
+			csio_fatal(hw,
+				   "Close completed before original I/O,"
+				   " req:%p\n", req);
+			CSIO_DB_ASSERT(0);
+		}
+
+		/*
+		 * Either close succeeded, or we issued close to FW at the
+		 * same time FW compelted it to us. Either way, the I/O
+		 * is closed.
+		 */
+		CSIO_DB_ASSERT((req->wr_status == FW_SUCCESS) ||
+					(req->wr_status == FW_EINVAL));
+		req->wr_status = FW_SCSI_CLOSE_REQUESTED;
+
+		CSIO_DEC_STATS(scm, n_active);
+		list_del_init(&req->sm.sm_list);
+		csio_set_state(&req->sm, csio_scsis_uninit);
+		break;
+
+	case CSIO_SCSIE_CLOSE:
+		break;
+
+	case CSIO_SCSIE_DRVCLEANUP:
+		req->wr_status = FW_HOSTERROR;
+		CSIO_DEC_STATS(scm, n_active);
+		csio_set_state(&req->sm, csio_scsis_uninit);
+		break;
+
+	default:
+		csio_dbg(hw, "Unhandled event:%d sent to req:%p\n", evt, req);
+		CSIO_DB_ASSERT(0);
+	}
+}
+
+static void
+csio_scsis_shost_cmpl_await(struct csio_ioreq *req, enum csio_scsi_ev evt)
+{
+	switch (evt) {
+	case CSIO_SCSIE_ABORT:
+	case CSIO_SCSIE_CLOSE:
+		/*
+		 * Just succeed the abort request, and hope that
+		 * the remote device unregister path will cleanup
+		 * this I/O to the upper layer within a sane
+		 * amount of time.
+		 */
+		/*
+		 * A close can come in during a LINK DOWN. The FW would have
+		 * returned us the I/O back, but not the remote device lost
+		 * FW event. In this interval, if the I/O times out at the upper
+		 * layer, a close can come in. Take the same action as abort:
+		 * return success, and hope that the remote device unregister
+		 * path will cleanup this I/O. If the FW still doesnt send
+		 * the msg, the close times out, and the upper layer resorts
+		 * to the next level of error recovery.
+		 */
+		req->drv_status = 0;
+		break;
+	case CSIO_SCSIE_DRVCLEANUP:
+		csio_set_state(&req->sm, csio_scsis_uninit);
+		break;
+	default:
+		csio_dbg(req->lnode->hwp, "Unhandled event:%d sent to req:%p\n",
+			 evt, req);
+		CSIO_DB_ASSERT(0);
+	}
+}
+
+/*
+ * csio_scsi_cmpl_handler - WR completion handler for SCSI.
+ * @hw: HW module.
+ * @wr: The completed WR from the ingress queue.
+ * @len: Length of the WR.
+ * @flb: Freelist buffer array.
+ * @priv: Private object
+ * @scsiwr: Pointer to SCSI WR.
+ *
+ * This is the WR completion handler called per completion from the
+ * ISR. It is called with lock held. It walks past the RSS and CPL message
+ * header where the actual WR is present.
+ * It then gets the status, WR handle (ioreq pointer) and the len of
+ * the WR, based on WR opcode. Only on a non-good status is the entire
+ * WR copied into the WR cache (ioreq->fw_wr).
+ * The ioreq corresponding to the WR is returned to the caller.
+ * NOTE: The SCSI queue doesnt allocate a freelist today, hence
+ * no freelist buffer is expected.
+ */
+struct csio_ioreq *
+csio_scsi_cmpl_handler(struct csio_hw *hw, void *wr, uint32_t len,
+		     struct csio_fl_dma_buf *flb, void *priv, uint8_t **scsiwr)
+{
+	struct csio_ioreq *ioreq = NULL;
+	struct cpl_fw6_msg *cpl;
+	uint8_t *tempwr;
+	uint8_t	status;
+	struct csio_scsim *scm = csio_hw_to_scsim(hw);
+
+	/* skip RSS header */
+	cpl = (struct cpl_fw6_msg *)((uintptr_t)wr + sizeof(__be64));
+
+	if (unlikely(cpl->opcode != CPL_FW6_MSG)) {
+		csio_warn(hw, "Error: Invalid CPL msg %x recvd on SCSI q\n",
+			  cpl->opcode);
+		CSIO_INC_STATS(scm, n_inval_cplop);
+		return NULL;
+	}
+
+	tempwr = (uint8_t *)(cpl->data);
+	status = csio_wr_status(tempwr);
+	*scsiwr = tempwr;
+
+	if (likely((*tempwr == FW_SCSI_READ_WR) ||
+			(*tempwr == FW_SCSI_WRITE_WR) ||
+			(*tempwr == FW_SCSI_CMD_WR))) {
+		ioreq = (struct csio_ioreq *)((uintptr_t)
+				 (((struct fw_scsi_read_wr *)tempwr)->cookie));
+		CSIO_DB_ASSERT(virt_addr_valid(ioreq));
+
+		ioreq->wr_status = status;
+
+		return ioreq;
+	}
+
+	if (*tempwr == FW_SCSI_ABRT_CLS_WR) {
+		ioreq = (struct csio_ioreq *)((uintptr_t)
+			 (((struct fw_scsi_abrt_cls_wr *)tempwr)->cookie));
+		CSIO_DB_ASSERT(virt_addr_valid(ioreq));
+
+		ioreq->wr_status = status;
+		return ioreq;
+	}
+
+	csio_warn(hw, "WR with invalid opcode in SCSI IQ: %x\n", *tempwr);
+	CSIO_INC_STATS(scm, n_inval_scsiop);
+	return NULL;
+}
+
+/*
+ * csio_scsi_cleanup_io_q - Cleanup the given queue.
+ * @scm: SCSI module.
+ * @q: Queue to be cleaned up.
+ *
+ * Called with lock held. Has to exit with lock held.
+ */
+void
+csio_scsi_cleanup_io_q(struct csio_scsim *scm, struct list_head *q)
+{
+	struct csio_hw *hw = scm->hw;
+	struct csio_ioreq *ioreq;
+	struct list_head *tmp, *next;
+	struct scsi_cmnd *scmnd;
+
+	/* Call back the completion routines of the active_q */
+	list_for_each_safe(tmp, next, q) {
+		ioreq = (struct csio_ioreq *)tmp;
+		csio_scsi_drvcleanup(ioreq);
+		list_del_init(&ioreq->sm.sm_list);
+		scmnd = csio_scsi_cmnd(ioreq);
+		spin_unlock_irq(&hw->lock);
+
+		/*
+		 * Upper layers may have cleared this command, hence this
+		 * check to avoid accessing stale references.
+		 */
+		if (scmnd != NULL)
+			ioreq->io_cbfn(hw, ioreq);
+
+		spin_lock_irq(&scm->freelist_lock);
+		csio_put_scsi_ioreq(scm, ioreq);
+		spin_unlock_irq(&scm->freelist_lock);
+
+		spin_lock_irq(&hw->lock);
+	}
+}
+
+#define CSIO_SCSI_ABORT_Q_POLL_MS		2000
+
+static void
+csio_abrt_cls(struct csio_ioreq *ioreq, struct scsi_cmnd *scmnd)
+{
+	struct csio_lnode *ln = ioreq->lnode;
+	struct csio_hw *hw = ln->hwp;
+	int ready = 0;
+	struct csio_scsim *scsim = csio_hw_to_scsim(hw);
+	int rv;
+
+	if (csio_scsi_cmnd(ioreq) != scmnd) {
+		CSIO_INC_STATS(scsim, n_abrt_race_comp);
+		return;
+	}
+
+	ready = csio_is_lnode_ready(ln);
+
+	rv = csio_do_abrt_cls(hw, ioreq, (ready ? SCSI_ABORT : SCSI_CLOSE));
+	if (rv != 0) {
+		if (ready)
+			CSIO_INC_STATS(scsim, n_abrt_busy_error);
+		else
+			CSIO_INC_STATS(scsim, n_cls_busy_error);
+	}
+}
+
+/*
+ * csio_scsi_abort_io_q - Abort all I/Os on given queue
+ * @scm: SCSI module.
+ * @q: Queue to abort.
+ * @tmo: Timeout in ms
+ *
+ * Attempt to abort all I/Os on given queue, and wait for a max
+ * of tmo milliseconds for them to complete. Returns success
+ * if all I/Os are aborted. Else returns -ETIMEDOUT.
+ * Should be entered with lock held. Exits with lock held.
+ * NOTE:
+ * Lock has to be held across the loop that aborts I/Os, since dropping the lock
+ * in between can cause the list to be corrupted. As a result, the caller
+ * of this function has to ensure that the number of I/os to be aborted
+ * is finite enough to not cause lock-held-for-too-long issues.
+ */
+static int
+csio_scsi_abort_io_q(struct csio_scsim *scm, struct list_head *q, uint32_t tmo)
+{
+	struct csio_hw *hw = scm->hw;
+	struct list_head *tmp, *next;
+	int count = DIV_ROUND_UP(tmo, CSIO_SCSI_ABORT_Q_POLL_MS);
+	struct scsi_cmnd *scmnd;
+
+	if (list_empty(q))
+		return 0;
+
+	csio_dbg(hw, "Aborting SCSI I/Os\n");
+
+	/* Now abort/close I/Os in the queue passed */
+	list_for_each_safe(tmp, next, q) {
+		scmnd = csio_scsi_cmnd((struct csio_ioreq *)tmp);
+		csio_abrt_cls((struct csio_ioreq *)tmp, scmnd);
+	}
+
+	/* Wait till all active I/Os are completed/aborted/closed */
+	while (!list_empty(q) && count--) {
+		spin_unlock_irq(&hw->lock);
+		msleep(CSIO_SCSI_ABORT_Q_POLL_MS);
+		spin_lock_irq(&hw->lock);
+	}
+
+	/* all aborts completed */
+	if (list_empty(q))
+		return 0;
+
+	return -ETIMEDOUT;
+}
+
+/*
+ * csio_scsim_cleanup_io - Cleanup all I/Os in SCSI module.
+ * @scm: SCSI module.
+ * @abort: abort required.
+ * Called with lock held, should exit with lock held.
+ * Can sleep when waiting for I/Os to complete.
+ */
+int
+csio_scsim_cleanup_io(struct csio_scsim *scm, bool abort)
+{
+	struct csio_hw *hw = scm->hw;
+	int rv = 0;
+	int count = DIV_ROUND_UP(60 * 1000, CSIO_SCSI_ABORT_Q_POLL_MS);
+
+	/* No I/Os pending */
+	if (list_empty(&scm->active_q))
+		return 0;
+
+	/* Wait until all active I/Os are completed */
+	while (!list_empty(&scm->active_q) && count--) {
+		spin_unlock_irq(&hw->lock);
+		msleep(CSIO_SCSI_ABORT_Q_POLL_MS);
+		spin_lock_irq(&hw->lock);
+	}
+
+	/* all I/Os completed */
+	if (list_empty(&scm->active_q))
+		return 0;
+
+	/* Else abort */
+	if (abort) {
+		rv = csio_scsi_abort_io_q(scm, &scm->active_q, 30000);
+		if (rv == 0)
+			return rv;
+		csio_dbg(hw, "Some I/O aborts timed out, cleaning up..\n");
+	}
+
+	csio_scsi_cleanup_io_q(scm, &scm->active_q);
+
+	CSIO_DB_ASSERT(list_empty(&scm->active_q));
+
+	return rv;
+}
+
+/*
+ * csio_scsim_cleanup_io_lnode - Cleanup all I/Os of given lnode.
+ * @scm: SCSI module.
+ * @lnode: lnode
+ *
+ * Called with lock held, should exit with lock held.
+ * Can sleep (with dropped lock) when waiting for I/Os to complete.
+ */
+int
+csio_scsim_cleanup_io_lnode(struct csio_scsim *scm, struct csio_lnode *ln)
+{
+	struct csio_hw *hw = scm->hw;
+	struct csio_scsi_level_data sld;
+	int rv;
+	int count = DIV_ROUND_UP(60 * 1000, CSIO_SCSI_ABORT_Q_POLL_MS);
+
+	csio_dbg(hw, "Gathering all SCSI I/Os on lnode %p\n", ln);
+
+	sld.level = CSIO_LEV_LNODE;
+	sld.lnode = ln;
+	INIT_LIST_HEAD(&ln->cmpl_q);
+	csio_scsi_gather_active_ios(scm, &sld, &ln->cmpl_q);
+
+	/* No I/Os pending on this lnode  */
+	if (list_empty(&ln->cmpl_q))
+		return 0;
+
+	/* Wait until all active I/Os on this lnode are completed */
+	while (!list_empty(&ln->cmpl_q) && count--) {
+		spin_unlock_irq(&hw->lock);
+		msleep(CSIO_SCSI_ABORT_Q_POLL_MS);
+		spin_lock_irq(&hw->lock);
+	}
+
+	/* all I/Os completed */
+	if (list_empty(&ln->cmpl_q))
+		return 0;
+
+	csio_dbg(hw, "Some I/Os pending on ln:%p, aborting them..\n", ln);
+
+	/* I/Os are pending, abort them */
+	rv = csio_scsi_abort_io_q(scm, &ln->cmpl_q, 30000);
+	if (rv != 0) {
+		csio_dbg(hw, "Some I/O aborts timed out, cleaning up..\n");
+		csio_scsi_cleanup_io_q(scm, &ln->cmpl_q);
+	}
+
+	CSIO_DB_ASSERT(list_empty(&ln->cmpl_q));
+
+	return rv;
+}
+
+static ssize_t
+csio_show_hw_state(struct device *dev,
+		   struct device_attribute *attr, char *buf)
+{
+	struct csio_lnode *ln = shost_priv(class_to_shost(dev));
+	struct csio_hw *hw = csio_lnode_to_hw(ln);
+
+	if (csio_is_hw_ready(hw))
+		return snprintf(buf, PAGE_SIZE, "ready\n");
+	else
+		return snprintf(buf, PAGE_SIZE, "not ready\n");
+}
+
+/* Device reset */
+static ssize_t
+csio_device_reset(struct device *dev,
+		   struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct csio_lnode *ln = shost_priv(class_to_shost(dev));
+	struct csio_hw *hw = csio_lnode_to_hw(ln);
+
+	if (*buf != '1')
+		return -EINVAL;
+
+	/* Delete NPIV lnodes */
+	 csio_lnodes_exit(hw, 1);
+
+	/* Block upper IOs */
+	csio_lnodes_block_request(hw);
+
+	spin_lock_irq(&hw->lock);
+	csio_hw_reset(hw);
+	spin_unlock_irq(&hw->lock);
+
+	/* Unblock upper IOs */
+	csio_lnodes_unblock_request(hw);
+	return count;
+}
+
+/* disable port */
+static ssize_t
+csio_disable_port(struct device *dev,
+		   struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct csio_lnode *ln = shost_priv(class_to_shost(dev));
+	struct csio_hw *hw = csio_lnode_to_hw(ln);
+	bool disable;
+
+	if (*buf == '1' || *buf == '0')
+		disable = (*buf == '1') ? true : false;
+	else
+		return -EINVAL;
+
+	/* Block upper IOs */
+	csio_lnodes_block_by_port(hw, ln->portid);
+
+	spin_lock_irq(&hw->lock);
+	csio_disable_lnodes(hw, ln->portid, disable);
+	spin_unlock_irq(&hw->lock);
+
+	/* Unblock upper IOs */
+	csio_lnodes_unblock_by_port(hw, ln->portid);
+	return count;
+}
+
+/* Show debug level */
+static ssize_t
+csio_show_dbg_level(struct device *dev,
+		   struct device_attribute *attr, char *buf)
+{
+	struct csio_lnode *ln = shost_priv(class_to_shost(dev));
+
+	return snprintf(buf, PAGE_SIZE, "%x\n", ln->params.log_level);
+}
+
+/* Store debug level */
+static ssize_t
+csio_store_dbg_level(struct device *dev,
+		   struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct csio_lnode *ln = shost_priv(class_to_shost(dev));
+	struct csio_hw *hw = csio_lnode_to_hw(ln);
+	uint32_t dbg_level = 0;
+
+	if (!isdigit(buf[0]))
+		return -EINVAL;
+
+	if (sscanf(buf, "%i", &dbg_level))
+		return -EINVAL;
+
+	ln->params.log_level = dbg_level;
+	hw->params.log_level = dbg_level;
+
+	return 0;
+}
+
+static DEVICE_ATTR(hw_state, S_IRUGO, csio_show_hw_state, NULL);
+static DEVICE_ATTR(device_reset, S_IRUGO | S_IWUSR, NULL, csio_device_reset);
+static DEVICE_ATTR(disable_port, S_IRUGO | S_IWUSR, NULL, csio_disable_port);
+static DEVICE_ATTR(dbg_level, S_IRUGO | S_IWUSR, csio_show_dbg_level,
+		  csio_store_dbg_level);
+
+static struct device_attribute *csio_fcoe_lport_attrs[] = {
+	&dev_attr_hw_state,
+	&dev_attr_device_reset,
+	&dev_attr_disable_port,
+	&dev_attr_dbg_level,
+	NULL,
+};
+
+static ssize_t
+csio_show_num_reg_rnodes(struct device *dev,
+		     struct device_attribute *attr, char *buf)
+{
+	struct csio_lnode *ln = shost_priv(class_to_shost(dev));
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", ln->num_reg_rnodes);
+}
+
+static DEVICE_ATTR(num_reg_rnodes, S_IRUGO, csio_show_num_reg_rnodes, NULL);
+
+static struct device_attribute *csio_fcoe_vport_attrs[] = {
+	&dev_attr_num_reg_rnodes,
+	&dev_attr_dbg_level,
+	NULL,
+};
+
+static inline uint32_t
+csio_scsi_copy_to_sgl(struct csio_hw *hw, struct csio_ioreq *req)
+{
+	struct scsi_cmnd *scmnd  = (struct scsi_cmnd *)csio_scsi_cmnd(req);
+	struct scatterlist *sg;
+	uint32_t bytes_left;
+	uint32_t bytes_copy;
+	uint32_t buf_off = 0;
+	uint32_t start_off = 0;
+	uint32_t sg_off = 0;
+	void *sg_addr;
+	void *buf_addr;
+	struct csio_dma_buf *dma_buf;
+
+	bytes_left = scsi_bufflen(scmnd);
+	sg = scsi_sglist(scmnd);
+	dma_buf = (struct csio_dma_buf *)csio_list_next(&req->gen_list);
+
+	/* Copy data from driver buffer to SGs of SCSI CMD */
+	while (bytes_left > 0 && sg && dma_buf) {
+		if (buf_off >= dma_buf->len) {
+			buf_off = 0;
+			dma_buf = (struct csio_dma_buf *)
+					csio_list_next(dma_buf);
+			continue;
+		}
+
+		if (start_off >= sg->length) {
+			start_off -= sg->length;
+			sg = sg_next(sg);
+			continue;
+		}
+
+		buf_addr = dma_buf->vaddr + buf_off;
+		sg_off = sg->offset + start_off;
+		bytes_copy = min((dma_buf->len - buf_off),
+				sg->length - start_off);
+		bytes_copy = min((uint32_t)(PAGE_SIZE - (sg_off & ~PAGE_MASK)),
+				 bytes_copy);
+
+		sg_addr = kmap_atomic(sg_page(sg) + (sg_off >> PAGE_SHIFT));
+		if (!sg_addr) {
+			csio_err(hw, "failed to kmap sg:%p of ioreq:%p\n",
+				sg, req);
+			break;
+		}
+
+		csio_dbg(hw, "copy_to_sgl:sg_addr %p sg_off %d buf %p len %d\n",
+				sg_addr, sg_off, buf_addr, bytes_copy);
+		memcpy(sg_addr + (sg_off & ~PAGE_MASK), buf_addr, bytes_copy);
+		kunmap_atomic(sg_addr);
+
+		start_off +=  bytes_copy;
+		buf_off += bytes_copy;
+		bytes_left -= bytes_copy;
+	}
+
+	if (bytes_left > 0)
+		return DID_ERROR;
+	else
+		return DID_OK;
+}
+
+/*
+ * csio_scsi_err_handler - SCSI error handler.
+ * @hw: HW module.
+ * @req: IO request.
+ *
+ */
+static inline void
+csio_scsi_err_handler(struct csio_hw *hw, struct csio_ioreq *req)
+{
+	struct scsi_cmnd *cmnd  = (struct scsi_cmnd *)csio_scsi_cmnd(req);
+	struct csio_scsim *scm = csio_hw_to_scsim(hw);
+	struct fcp_resp_with_ext *fcp_resp;
+	struct fcp_resp_rsp_info *rsp_info;
+	struct csio_dma_buf *dma_buf;
+	uint8_t flags, scsi_status = 0;
+	uint32_t host_status = DID_OK;
+	uint32_t rsp_len = 0, sns_len = 0;
+	struct csio_rnode *rn = (struct csio_rnode *)(cmnd->device->hostdata);
+
+
+	switch (req->wr_status) {
+	case FW_HOSTERROR:
+		if (unlikely(!csio_is_hw_ready(hw)))
+			return;
+
+		host_status = DID_ERROR;
+		CSIO_INC_STATS(scm, n_hosterror);
+
+		break;
+	case FW_SCSI_RSP_ERR:
+		dma_buf = &req->dma_buf;
+		fcp_resp = (struct fcp_resp_with_ext *)dma_buf->vaddr;
+		rsp_info = (struct fcp_resp_rsp_info *)(fcp_resp + 1);
+		flags = fcp_resp->resp.fr_flags;
+		scsi_status = fcp_resp->resp.fr_status;
+
+		if (flags & FCP_RSP_LEN_VAL) {
+			rsp_len = be32_to_cpu(fcp_resp->ext.fr_rsp_len);
+			if ((rsp_len != 0 && rsp_len != 4 && rsp_len != 8) ||
+				(rsp_info->rsp_code != FCP_TMF_CMPL)) {
+				host_status = DID_ERROR;
+				goto out;
+			}
+		}
+
+		if ((flags & FCP_SNS_LEN_VAL) && fcp_resp->ext.fr_sns_len) {
+			sns_len = be32_to_cpu(fcp_resp->ext.fr_sns_len);
+			if (sns_len > SCSI_SENSE_BUFFERSIZE)
+				sns_len = SCSI_SENSE_BUFFERSIZE;
+
+			memcpy(cmnd->sense_buffer,
+			       &rsp_info->_fr_resvd[0] + rsp_len, sns_len);
+			CSIO_INC_STATS(scm, n_autosense);
+		}
+
+		scsi_set_resid(cmnd, 0);
+
+		/* Under run */
+		if (flags & FCP_RESID_UNDER) {
+			scsi_set_resid(cmnd,
+				       be32_to_cpu(fcp_resp->ext.fr_resid));
+
+			if (!(flags & FCP_SNS_LEN_VAL) &&
+			    (scsi_status == SAM_STAT_GOOD) &&
+			    ((scsi_bufflen(cmnd) - scsi_get_resid(cmnd))
+							< cmnd->underflow))
+				host_status = DID_ERROR;
+		} else if (flags & FCP_RESID_OVER)
+			host_status = DID_ERROR;
+
+		CSIO_INC_STATS(scm, n_rsperror);
+		break;
+
+	case FW_SCSI_OVER_FLOW_ERR:
+		csio_warn(hw,
+			  "Over-flow error,cmnd:0x%x expected len:0x%x"
+			  " resid:0x%x\n", cmnd->cmnd[0],
+			  scsi_bufflen(cmnd), scsi_get_resid(cmnd));
+		host_status = DID_ERROR;
+		CSIO_INC_STATS(scm, n_ovflerror);
+		break;
+
+	case FW_SCSI_UNDER_FLOW_ERR:
+		csio_warn(hw,
+			  "Under-flow error,cmnd:0x%x expected"
+			  " len:0x%x resid:0x%x lun:0x%x ssn:0x%x\n",
+			  cmnd->cmnd[0], scsi_bufflen(cmnd),
+			  scsi_get_resid(cmnd), cmnd->device->lun,
+			  rn->flowid);
+		host_status = DID_ERROR;
+		CSIO_INC_STATS(scm, n_unflerror);
+		break;
+
+	case FW_SCSI_ABORT_REQUESTED:
+	case FW_SCSI_ABORTED:
+	case FW_SCSI_CLOSE_REQUESTED:
+		csio_dbg(hw, "Req %p cmd:%p op:%x %s\n", req, cmnd,
+			     cmnd->cmnd[0],
+			    (req->wr_status == FW_SCSI_CLOSE_REQUESTED) ?
+			    "closed" : "aborted");
+		/*
+		 * csio_eh_abort_handler checks this value to
+		 * succeed or fail the abort request.
+		 */
+		host_status = DID_REQUEUE;
+		if (req->wr_status == FW_SCSI_CLOSE_REQUESTED)
+			CSIO_INC_STATS(scm, n_closed);
+		else
+			CSIO_INC_STATS(scm, n_aborted);
+		break;
+
+	case FW_SCSI_ABORT_TIMEDOUT:
+		/* FW timed out the abort itself */
+		csio_dbg(hw, "FW timed out abort req:%p cmnd:%p status:%x\n",
+			 req, cmnd, req->wr_status);
+		host_status = DID_ERROR;
+		CSIO_INC_STATS(scm, n_abrt_timedout);
+		break;
+
+	case FW_RDEV_NOT_READY:
+		/*
+		 * In firmware, a RDEV can get into this state
+		 * temporarily, before moving into dissapeared/lost
+		 * state. So, the driver should complete the request equivalent
+		 * to device-disappeared!
+		 */
+		CSIO_INC_STATS(scm, n_rdev_nr_error);
+		host_status = DID_ERROR;
+		break;
+
+	case FW_ERR_RDEV_LOST:
+		CSIO_INC_STATS(scm, n_rdev_lost_error);
+		host_status = DID_ERROR;
+		break;
+
+	case FW_ERR_RDEV_LOGO:
+		CSIO_INC_STATS(scm, n_rdev_logo_error);
+		host_status = DID_ERROR;
+		break;
+
+	case FW_ERR_RDEV_IMPL_LOGO:
+		host_status = DID_ERROR;
+		break;
+
+	case FW_ERR_LINK_DOWN:
+		CSIO_INC_STATS(scm, n_link_down_error);
+		host_status = DID_ERROR;
+		break;
+
+	case FW_FCOE_NO_XCHG:
+		CSIO_INC_STATS(scm, n_no_xchg_error);
+		host_status = DID_ERROR;
+		break;
+
+	default:
+		csio_err(hw, "Unknown SCSI FW WR status:%d req:%p cmnd:%p\n",
+			    req->wr_status, req, cmnd);
+		CSIO_DB_ASSERT(0);
+
+		CSIO_INC_STATS(scm, n_unknown_error);
+		host_status = DID_ERROR;
+		break;
+	}
+
+out:
+	if (req->nsge > 0)
+		scsi_dma_unmap(cmnd);
+
+	cmnd->result = (((host_status) << 16) | scsi_status);
+	cmnd->scsi_done(cmnd);
+
+	/* Wake up waiting threads */
+	csio_scsi_cmnd(req) = NULL;
+	complete_all(&req->cmplobj);
+}
+
+/*
+ * csio_scsi_cbfn - SCSI callback function.
+ * @hw: HW module.
+ * @req: IO request.
+ *
+ */
+static void
+csio_scsi_cbfn(struct csio_hw *hw, struct csio_ioreq *req)
+{
+	struct scsi_cmnd *cmnd  = (struct scsi_cmnd *)csio_scsi_cmnd(req);
+	uint8_t scsi_status = SAM_STAT_GOOD;
+	uint32_t host_status = DID_OK;
+
+	if (likely(req->wr_status == FW_SUCCESS)) {
+		if (req->nsge > 0) {
+			scsi_dma_unmap(cmnd);
+			if (req->dcopy)
+				host_status = csio_scsi_copy_to_sgl(hw, req);
+		}
+
+		cmnd->result = (((host_status) << 16) | scsi_status);
+		cmnd->scsi_done(cmnd);
+		csio_scsi_cmnd(req) = NULL;
+		CSIO_INC_STATS(csio_hw_to_scsim(hw), n_tot_success);
+	} else {
+		/* Error handling */
+		csio_scsi_err_handler(hw, req);
+	}
+}
+
+/**
+ * csio_queuecommand - Entry point to kickstart an I/O request.
+ * @host:	The scsi_host pointer.
+ * @cmnd:	The I/O request from ML.
+ *
+ * This routine does the following:
+ *	- Checks for HW and Rnode module readiness.
+ *	- Gets a free ioreq structure (which is already initialized
+ *	  to uninit during its allocation).
+ *	- Maps SG elements.
+ *	- Initializes ioreq members.
+ *	- Kicks off the SCSI state machine for this IO.
+ *	- Returns busy status on error.
+ */
+static int
+csio_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmnd)
+{
+	struct csio_lnode *ln = shost_priv(host);
+	struct csio_hw *hw = csio_lnode_to_hw(ln);
+	struct csio_scsim *scsim = csio_hw_to_scsim(hw);
+	struct csio_rnode *rn = (struct csio_rnode *)(cmnd->device->hostdata);
+	struct csio_ioreq *ioreq = NULL;
+	unsigned long flags;
+	int nsge = 0;
+	int rv = SCSI_MLQUEUE_HOST_BUSY, nr;
+	int retval;
+	int cpu;
+	struct csio_scsi_qset *sqset;
+	struct fc_rport *rport = starget_to_rport(scsi_target(cmnd->device));
+
+	if (!blk_rq_cpu_valid(cmnd->request))
+		cpu = smp_processor_id();
+	else
+		cpu = cmnd->request->cpu;
+
+	sqset = &hw->sqset[ln->portid][cpu];
+
+	nr = fc_remote_port_chkready(rport);
+	if (nr) {
+		cmnd->result = nr;
+		CSIO_INC_STATS(scsim, n_rn_nr_error);
+		goto err_done;
+	}
+
+	if (unlikely(!csio_is_hw_ready(hw))) {
+		cmnd->result = (DID_REQUEUE << 16);
+		CSIO_INC_STATS(scsim, n_hw_nr_error);
+		goto err_done;
+	}
+
+	/* Get req->nsge, if there are SG elements to be mapped  */
+	nsge = scsi_dma_map(cmnd);
+	if (unlikely(nsge < 0)) {
+		CSIO_INC_STATS(scsim, n_dmamap_error);
+		goto err;
+	}
+
+	/* Do we support so many mappings? */
+	if (unlikely(nsge > scsim->max_sge)) {
+		csio_warn(hw,
+			  "More SGEs than can be supported."
+			  " SGEs: %d, Max SGEs: %d\n", nsge, scsim->max_sge);
+		CSIO_INC_STATS(scsim, n_unsupp_sge_error);
+		goto err_dma_unmap;
+	}
+
+	/* Get a free ioreq structure - SM is already set to uninit */
+	ioreq = csio_get_scsi_ioreq_lock(hw, scsim);
+	if (!ioreq) {
+		csio_err(hw, "Out of I/O request elements. Active #:%d\n",
+			 scsim->stats.n_active);
+		CSIO_INC_STATS(scsim, n_no_req_error);
+		goto err_dma_unmap;
+	}
+
+	ioreq->nsge		= nsge;
+	ioreq->lnode		= ln;
+	ioreq->rnode		= rn;
+	ioreq->iq_idx		= sqset->iq_idx;
+	ioreq->eq_idx		= sqset->eq_idx;
+	ioreq->wr_status	= 0;
+	ioreq->drv_status	= 0;
+	csio_scsi_cmnd(ioreq)	= (void *)cmnd;
+	ioreq->tmo		= 0;
+	ioreq->datadir		= cmnd->sc_data_direction;
+
+	if (cmnd->sc_data_direction == DMA_TO_DEVICE) {
+		CSIO_INC_STATS(ln, n_output_requests);
+		ln->stats.n_output_bytes += scsi_bufflen(cmnd);
+	} else if (cmnd->sc_data_direction == DMA_FROM_DEVICE) {
+		CSIO_INC_STATS(ln, n_input_requests);
+		ln->stats.n_input_bytes += scsi_bufflen(cmnd);
+	} else
+		CSIO_INC_STATS(ln, n_control_requests);
+
+	/* Set cbfn */
+	ioreq->io_cbfn = csio_scsi_cbfn;
+
+	/* Needed during abort */
+	cmnd->host_scribble = (unsigned char *)ioreq;
+	cmnd->SCp.Message = 0;
+
+	/* Kick off SCSI IO SM on the ioreq */
+	spin_lock_irqsave(&hw->lock, flags);
+	retval = csio_scsi_start_io(ioreq);
+	spin_unlock_irqrestore(&hw->lock, flags);
+
+	if (retval != 0) {
+		csio_err(hw, "ioreq: %p couldnt be started, status:%d\n",
+			 ioreq, retval);
+		CSIO_INC_STATS(scsim, n_busy_error);
+		goto err_put_req;
+	}
+
+	return 0;
+
+err_put_req:
+	csio_put_scsi_ioreq_lock(hw, scsim, ioreq);
+err_dma_unmap:
+	if (nsge > 0)
+		scsi_dma_unmap(cmnd);
+err:
+	return rv;
+
+err_done:
+	cmnd->scsi_done(cmnd);
+	return 0;
+}
+
+static int
+csio_do_abrt_cls(struct csio_hw *hw, struct csio_ioreq *ioreq, bool abort)
+{
+	int rv;
+	int cpu = smp_processor_id();
+	struct csio_lnode *ln = ioreq->lnode;
+	struct csio_scsi_qset *sqset = &hw->sqset[ln->portid][cpu];
+
+	ioreq->tmo = CSIO_SCSI_ABRT_TMO_MS;
+	/*
+	 * Use current processor queue for posting the abort/close, but retain
+	 * the ingress queue ID of the original I/O being aborted/closed - we
+	 * need the abort/close completion to be received on the same queue
+	 * as the original I/O.
+	 */
+	ioreq->eq_idx = sqset->eq_idx;
+
+	if (abort == SCSI_ABORT)
+		rv = csio_scsi_abort(ioreq);
+	else
+		rv = csio_scsi_close(ioreq);
+
+	return rv;
+}
+
+static int
+csio_eh_abort_handler(struct scsi_cmnd *cmnd)
+{
+	struct csio_ioreq *ioreq;
+	struct csio_lnode *ln = shost_priv(cmnd->device->host);
+	struct csio_hw *hw = csio_lnode_to_hw(ln);
+	struct csio_scsim *scsim = csio_hw_to_scsim(hw);
+	int ready = 0, ret;
+	unsigned long tmo = 0;
+	int rv;
+	struct csio_rnode *rn = (struct csio_rnode *)(cmnd->device->hostdata);
+
+	ret = fc_block_scsi_eh(cmnd);
+	if (ret)
+		return ret;
+
+	ioreq = (struct csio_ioreq *)cmnd->host_scribble;
+	if (!ioreq)
+		return SUCCESS;
+
+	if (!rn)
+		return FAILED;
+
+	csio_dbg(hw,
+		 "Request to abort ioreq:%p cmd:%p cdb:%08llx"
+		 " ssni:0x%x lun:%d iq:0x%x\n",
+		ioreq, cmnd, *((uint64_t *)cmnd->cmnd), rn->flowid,
+		cmnd->device->lun, csio_q_physiqid(hw, ioreq->iq_idx));
+
+	if (((struct scsi_cmnd *)csio_scsi_cmnd(ioreq)) != cmnd) {
+		CSIO_INC_STATS(scsim, n_abrt_race_comp);
+		return SUCCESS;
+	}
+
+	ready = csio_is_lnode_ready(ln);
+	tmo = CSIO_SCSI_ABRT_TMO_MS;
+
+	spin_lock_irq(&hw->lock);
+	rv = csio_do_abrt_cls(hw, ioreq, (ready ? SCSI_ABORT : SCSI_CLOSE));
+	spin_unlock_irq(&hw->lock);
+
+	if (rv != 0) {
+		if (rv == -EINVAL) {
+			/* Return success, if abort/close request issued on
+			 * already completed IO
+			 */
+			return SUCCESS;
+		}
+		if (ready)
+			CSIO_INC_STATS(scsim, n_abrt_busy_error);
+		else
+			CSIO_INC_STATS(scsim, n_cls_busy_error);
+
+		goto inval_scmnd;
+	}
+
+	/* Wait for completion */
+	init_completion(&ioreq->cmplobj);
+	wait_for_completion_timeout(&ioreq->cmplobj, msecs_to_jiffies(tmo));
+
+	/* FW didnt respond to abort within our timeout */
+	if (((struct scsi_cmnd *)csio_scsi_cmnd(ioreq)) == cmnd) {
+
+		csio_err(hw, "Abort timed out -- req: %p\n", ioreq);
+		CSIO_INC_STATS(scsim, n_abrt_timedout);
+
+inval_scmnd:
+		if (ioreq->nsge > 0)
+			scsi_dma_unmap(cmnd);
+
+		spin_lock_irq(&hw->lock);
+		csio_scsi_cmnd(ioreq) = NULL;
+		spin_unlock_irq(&hw->lock);
+
+		cmnd->result = (DID_ERROR << 16);
+		cmnd->scsi_done(cmnd);
+
+		return FAILED;
+	}
+
+	/* FW successfully aborted the request */
+	if (host_byte(cmnd->result) == DID_REQUEUE) {
+		csio_info(hw,
+			"Aborted SCSI command to (%d:%d) serial#:0x%lx\n",
+			cmnd->device->id, cmnd->device->lun,
+			cmnd->serial_number);
+		return SUCCESS;
+	} else {
+		csio_info(hw,
+			"Failed to abort SCSI command, (%d:%d) serial#:0x%lx\n",
+			cmnd->device->id, cmnd->device->lun,
+			cmnd->serial_number);
+		return FAILED;
+	}
+}
+
+/*
+ * csio_tm_cbfn - TM callback function.
+ * @hw: HW module.
+ * @req: IO request.
+ *
+ * Cache the result in 'cmnd', since ioreq will be freed soon
+ * after we return from here, and the waiting thread shouldnt trust
+ * the ioreq contents.
+ */
+static void
+csio_tm_cbfn(struct csio_hw *hw, struct csio_ioreq *req)
+{
+	struct scsi_cmnd *cmnd  = (struct scsi_cmnd *)csio_scsi_cmnd(req);
+	struct csio_dma_buf *dma_buf;
+	uint8_t flags = 0;
+	struct fcp_resp_with_ext *fcp_resp;
+	struct fcp_resp_rsp_info *rsp_info;
+
+	csio_dbg(hw, "req: %p in csio_tm_cbfn status: %d\n",
+		      req, req->wr_status);
+
+	/* Cache FW return status */
+	cmnd->SCp.Status = req->wr_status;
+
+	/* Special handling based on FCP response */
+
+	/*
+	 * FW returns us this error, if flags were set. FCP4 says
+	 * FCP_RSP_LEN_VAL in flags shall be set for TM completions.
+	 * So if a target were to set this bit, we expect that the
+	 * rsp_code is set to FCP_TMF_CMPL for a successful TM
+	 * completion. Any other rsp_code means TM operation failed.
+	 * If a target were to just ignore setting flags, we treat
+	 * the TM operation as success, and FW returns FW_SUCCESS.
+	 */
+	if (req->wr_status == FW_SCSI_RSP_ERR) {
+		dma_buf = &req->dma_buf;
+		fcp_resp = (struct fcp_resp_with_ext *)dma_buf->vaddr;
+		rsp_info = (struct fcp_resp_rsp_info *)(fcp_resp + 1);
+
+		flags = fcp_resp->resp.fr_flags;
+
+		/* Modify return status if flags indicate success */
+		if (flags & FCP_RSP_LEN_VAL)
+			if (rsp_info->rsp_code == FCP_TMF_CMPL)
+				cmnd->SCp.Status = FW_SUCCESS;
+
+		csio_dbg(hw, "TM FCP rsp code: %d\n", rsp_info->rsp_code);
+	}
+
+	/* Wake up the TM handler thread */
+	csio_scsi_cmnd(req) = NULL;
+}
+
+static int
+csio_eh_lun_reset_handler(struct scsi_cmnd *cmnd)
+{
+	struct csio_lnode *ln = shost_priv(cmnd->device->host);
+	struct csio_hw *hw = csio_lnode_to_hw(ln);
+	struct csio_scsim *scsim = csio_hw_to_scsim(hw);
+	struct csio_rnode *rn = (struct csio_rnode *)(cmnd->device->hostdata);
+	struct csio_ioreq *ioreq = NULL;
+	struct csio_scsi_qset *sqset;
+	unsigned long flags;
+	int retval;
+	int count, ret;
+	LIST_HEAD(local_q);
+	struct csio_scsi_level_data sld;
+
+	if (!rn)
+		goto fail;
+
+	csio_dbg(hw, "Request to reset LUN:%d (ssni:0x%x tgtid:%d)\n",
+		      cmnd->device->lun, rn->flowid, rn->scsi_id);
+
+	if (!csio_is_lnode_ready(ln)) {
+		csio_err(hw,
+			 "LUN reset cannot be issued on non-ready"
+			 " local node vnpi:0x%x (LUN:%d)\n",
+			 ln->vnp_flowid, cmnd->device->lun);
+		goto fail;
+	}
+
+	/* Lnode is ready, now wait on rport node readiness */
+	ret = fc_block_scsi_eh(cmnd);
+	if (ret)
+		return ret;
+
+	/*
+	 * If we have blocked in the previous call, at this point, either the
+	 * remote node has come back online, or device loss timer has fired
+	 * and the remote node is destroyed. Allow the LUN reset only for
+	 * the former case, since LUN reset is a TMF I/O on the wire, and we
+	 * need a valid session to issue it.
+	 */
+	if (fc_remote_port_chkready(rn->rport)) {
+		csio_err(hw,
+			 "LUN reset cannot be issued on non-ready"
+			 " remote node ssni:0x%x (LUN:%d)\n",
+			 rn->flowid, cmnd->device->lun);
+		goto fail;
+	}
+
+	/* Get a free ioreq structure - SM is already set to uninit */
+	ioreq = csio_get_scsi_ioreq_lock(hw, scsim);
+
+	if (!ioreq) {
+		csio_err(hw, "Out of IO request elements. Active # :%d\n",
+			 scsim->stats.n_active);
+		goto fail;
+	}
+
+	sqset			= &hw->sqset[ln->portid][smp_processor_id()];
+	ioreq->nsge		= 0;
+	ioreq->lnode		= ln;
+	ioreq->rnode		= rn;
+	ioreq->iq_idx		= sqset->iq_idx;
+	ioreq->eq_idx		= sqset->eq_idx;
+
+	csio_scsi_cmnd(ioreq)	= cmnd;
+	cmnd->host_scribble	= (unsigned char *)ioreq;
+	cmnd->SCp.Status	= 0;
+
+	cmnd->SCp.Message	= FCP_TMF_LUN_RESET;
+	ioreq->tmo		= CSIO_SCSI_LUNRST_TMO_MS / 1000;
+
+	/*
+	 * FW times the LUN reset for ioreq->tmo, so we got to wait a little
+	 * longer (10s for now) than that to allow FW to return the timed
+	 * out command.
+	 */
+	count = DIV_ROUND_UP((ioreq->tmo + 10) * 1000, CSIO_SCSI_TM_POLL_MS);
+
+	/* Set cbfn */
+	ioreq->io_cbfn = csio_tm_cbfn;
+
+	/* Save of the ioreq info for later use */
+	sld.level = CSIO_LEV_LUN;
+	sld.lnode = ioreq->lnode;
+	sld.rnode = ioreq->rnode;
+	sld.oslun = (uint64_t)cmnd->device->lun;
+
+	spin_lock_irqsave(&hw->lock, flags);
+	/* Kick off TM SM on the ioreq */
+	retval = csio_scsi_start_tm(ioreq);
+	spin_unlock_irqrestore(&hw->lock, flags);
+
+	if (retval != 0) {
+		csio_err(hw, "Failed to issue LUN reset, req:%p, status:%d\n",
+			    ioreq, retval);
+		goto fail_ret_ioreq;
+	}
+
+	csio_dbg(hw, "Waiting max %d secs for LUN reset completion\n",
+		    count * (CSIO_SCSI_TM_POLL_MS / 1000));
+	/* Wait for completion */
+	while ((((struct scsi_cmnd *)csio_scsi_cmnd(ioreq)) == cmnd)
+								&& count--)
+		msleep(CSIO_SCSI_TM_POLL_MS);
+
+	/* LUN reset timed-out */
+	if (((struct scsi_cmnd *)csio_scsi_cmnd(ioreq)) == cmnd) {
+		csio_err(hw, "LUN reset (%d:%d) timed out\n",
+			 cmnd->device->id, cmnd->device->lun);
+
+		spin_lock_irq(&hw->lock);
+		csio_scsi_drvcleanup(ioreq);
+		list_del_init(&ioreq->sm.sm_list);
+		spin_unlock_irq(&hw->lock);
+
+		goto fail_ret_ioreq;
+	}
+
+	/* LUN reset returned, check cached status */
+	if (cmnd->SCp.Status != FW_SUCCESS) {
+		csio_err(hw, "LUN reset failed (%d:%d), status: %d\n",
+			 cmnd->device->id, cmnd->device->lun, cmnd->SCp.Status);
+		goto fail;
+	}
+
+	/* LUN reset succeeded, Start aborting affected I/Os */
+	/*
+	 * Since the host guarantees during LUN reset that there
+	 * will not be any more I/Os to that LUN, until the LUN reset
+	 * completes, we gather pending I/Os after the LUN reset.
+	 */
+	spin_lock_irq(&hw->lock);
+	csio_scsi_gather_active_ios(scsim, &sld, &local_q);
+
+	retval = csio_scsi_abort_io_q(scsim, &local_q, 30000);
+	spin_unlock_irq(&hw->lock);
+
+	/* Aborts may have timed out */
+	if (retval != 0) {
+		csio_err(hw,
+			 "Attempt to abort I/Os during LUN reset of %d"
+			 " returned %d\n", cmnd->device->lun, retval);
+		/* Return I/Os back to active_q */
+		spin_lock_irq(&hw->lock);
+		list_splice_tail_init(&local_q, &scsim->active_q);
+		spin_unlock_irq(&hw->lock);
+		goto fail;
+	}
+
+	CSIO_INC_STATS(rn, n_lun_rst);
+
+	csio_info(hw, "LUN reset occurred (%d:%d)\n",
+		  cmnd->device->id, cmnd->device->lun);
+
+	return SUCCESS;
+
+fail_ret_ioreq:
+	csio_put_scsi_ioreq_lock(hw, scsim, ioreq);
+fail:
+	CSIO_INC_STATS(rn, n_lun_rst_fail);
+	return FAILED;
+}
+
+static int
+csio_slave_alloc(struct scsi_device *sdev)
+{
+	struct fc_rport *rport = starget_to_rport(scsi_target(sdev));
+
+	if (!rport || fc_remote_port_chkready(rport))
+		return -ENXIO;
+
+	sdev->hostdata = *((struct csio_lnode **)(rport->dd_data));
+
+	return 0;
+}
+
+static int
+csio_slave_configure(struct scsi_device *sdev)
+{
+	if (sdev->tagged_supported)
+		scsi_activate_tcq(sdev, csio_lun_qdepth);
+	else
+		scsi_deactivate_tcq(sdev, csio_lun_qdepth);
+
+	return 0;
+}
+
+static void
+csio_slave_destroy(struct scsi_device *sdev)
+{
+	sdev->hostdata = NULL;
+}
+
+static int
+csio_scan_finished(struct Scsi_Host *shost, unsigned long time)
+{
+	struct csio_lnode *ln = shost_priv(shost);
+	int rv = 1;
+
+	spin_lock_irq(shost->host_lock);
+	if (!ln->hwp || csio_list_deleted(&ln->sm.sm_list))
+		goto out;
+
+	rv = csio_scan_done(ln, jiffies, time, csio_max_scan_tmo * HZ,
+			    csio_delta_scan_tmo * HZ);
+out:
+	spin_unlock_irq(shost->host_lock);
+
+	return rv;
+}
+
+struct scsi_host_template csio_fcoe_shost_template = {
+	.module			= THIS_MODULE,
+	.name			= CSIO_DRV_DESC,
+	.proc_name		= KBUILD_MODNAME,
+	.queuecommand		= csio_queuecommand,
+	.eh_abort_handler	= csio_eh_abort_handler,
+	.eh_device_reset_handler = csio_eh_lun_reset_handler,
+	.slave_alloc		= csio_slave_alloc,
+	.slave_configure	= csio_slave_configure,
+	.slave_destroy		= csio_slave_destroy,
+	.scan_finished		= csio_scan_finished,
+	.this_id		= -1,
+	.sg_tablesize		= CSIO_SCSI_MAX_SGE,
+	.cmd_per_lun		= CSIO_MAX_CMD_PER_LUN,
+	.use_clustering		= ENABLE_CLUSTERING,
+	.shost_attrs		= csio_fcoe_lport_attrs,
+	.max_sectors		= CSIO_MAX_SECTOR_SIZE,
+};
+
+struct scsi_host_template csio_fcoe_shost_vport_template = {
+	.module			= THIS_MODULE,
+	.name			= CSIO_DRV_DESC,
+	.proc_name		= KBUILD_MODNAME,
+	.queuecommand		= csio_queuecommand,
+	.eh_abort_handler	= csio_eh_abort_handler,
+	.eh_device_reset_handler = csio_eh_lun_reset_handler,
+	.slave_alloc		= csio_slave_alloc,
+	.slave_configure	= csio_slave_configure,
+	.slave_destroy		= csio_slave_destroy,
+	.scan_finished		= csio_scan_finished,
+	.this_id		= -1,
+	.sg_tablesize		= CSIO_SCSI_MAX_SGE,
+	.cmd_per_lun		= CSIO_MAX_CMD_PER_LUN,
+	.use_clustering		= ENABLE_CLUSTERING,
+	.shost_attrs		= csio_fcoe_vport_attrs,
+	.max_sectors		= CSIO_MAX_SECTOR_SIZE,
+};
+
+/*
+ * csio_scsi_alloc_ddp_bufs - Allocate buffers for DDP of unaligned SGLs.
+ * @scm: SCSI Module
+ * @hw: HW device.
+ * @buf_size: buffer size
+ * @num_buf : Number of buffers.
+ *
+ * This routine allocates DMA buffers required for SCSI Data xfer, if
+ * each SGL buffer for a SCSI Read request posted by SCSI midlayer are
+ * not virtually contiguous.
+ */
+static int
+csio_scsi_alloc_ddp_bufs(struct csio_scsim *scm, struct csio_hw *hw,
+			 int buf_size, int num_buf)
+{
+	int n = 0;
+	struct list_head *tmp;
+	struct csio_dma_buf *ddp_desc = NULL;
+	uint32_t unit_size = 0;
+
+	if (!num_buf)
+		return 0;
+
+	if (!buf_size)
+		return -EINVAL;
+
+	INIT_LIST_HEAD(&scm->ddp_freelist);
+
+	/* Align buf size to page size */
+	buf_size = (buf_size + PAGE_SIZE - 1) & PAGE_MASK;
+	/* Initialize dma descriptors */
+	for (n = 0; n < num_buf; n++) {
+		/* Set unit size to request size */
+		unit_size = buf_size;
+		ddp_desc = kzalloc(sizeof(struct csio_dma_buf), GFP_KERNEL);
+		if (!ddp_desc) {
+			csio_err(hw,
+				 "Failed to allocate ddp descriptors,"
+				 " Num allocated = %d.\n",
+				 scm->stats.n_free_ddp);
+			goto no_mem;
+		}
+
+		/* Allocate Dma buffers for DDP */
+		ddp_desc->vaddr = pci_alloc_consistent(hw->pdev, unit_size,
+							&ddp_desc->paddr);
+		if (!ddp_desc->vaddr) {
+			csio_err(hw,
+				 "SCSI response DMA buffer (ddp) allocation"
+				 " failed!\n");
+			kfree(ddp_desc);
+			goto no_mem;
+		}
+
+		ddp_desc->len = unit_size;
+
+		/* Added it to scsi ddp freelist */
+		list_add_tail(&ddp_desc->list, &scm->ddp_freelist);
+		CSIO_INC_STATS(scm, n_free_ddp);
+	}
+
+	return 0;
+no_mem:
+	/* release dma descs back to freelist and free dma memory */
+	list_for_each(tmp, &scm->ddp_freelist) {
+		ddp_desc = (struct csio_dma_buf *) tmp;
+		tmp = csio_list_prev(tmp);
+		pci_free_consistent(hw->pdev, ddp_desc->len, ddp_desc->vaddr,
+				    ddp_desc->paddr);
+		list_del_init(&ddp_desc->list);
+		kfree(ddp_desc);
+	}
+	scm->stats.n_free_ddp = 0;
+
+	return -ENOMEM;
+}
+
+/*
+ * csio_scsi_free_ddp_bufs - free DDP buffers of unaligned SGLs.
+ * @scm: SCSI Module
+ * @hw: HW device.
+ *
+ * This routine frees ddp buffers.
+ */
+static void
+csio_scsi_free_ddp_bufs(struct csio_scsim *scm, struct csio_hw *hw)
+{
+	struct list_head *tmp;
+	struct csio_dma_buf *ddp_desc;
+
+	/* release dma descs back to freelist and free dma memory */
+	list_for_each(tmp, &scm->ddp_freelist) {
+		ddp_desc = (struct csio_dma_buf *) tmp;
+		tmp = csio_list_prev(tmp);
+		pci_free_consistent(hw->pdev, ddp_desc->len, ddp_desc->vaddr,
+				    ddp_desc->paddr);
+		list_del_init(&ddp_desc->list);
+		kfree(ddp_desc);
+	}
+	scm->stats.n_free_ddp = 0;
+}
+
+/**
+ * csio_scsim_init - Initialize SCSI Module
+ * @scm:	SCSI Module
+ * @hw:		HW module
+ *
+ */
+int
+csio_scsim_init(struct csio_scsim *scm, struct csio_hw *hw)
+{
+	int i;
+	struct csio_ioreq *ioreq;
+	struct csio_dma_buf *dma_buf;
+
+	INIT_LIST_HEAD(&scm->active_q);
+	scm->hw = hw;
+
+	scm->proto_cmd_len = sizeof(struct fcp_cmnd);
+	scm->proto_rsp_len = CSIO_SCSI_RSP_LEN;
+	scm->max_sge = CSIO_SCSI_MAX_SGE;
+
+	spin_lock_init(&scm->freelist_lock);
+
+	/* Pre-allocate ioreqs and initialize them */
+	INIT_LIST_HEAD(&scm->ioreq_freelist);
+	for (i = 0; i < csio_scsi_ioreqs; i++) {
+
+		ioreq = kzalloc(sizeof(struct csio_ioreq), GFP_KERNEL);
+		if (!ioreq) {
+			csio_err(hw,
+				 "I/O request element allocation failed, "
+				 " Num allocated = %d.\n",
+				 scm->stats.n_free_ioreq);
+
+			goto free_ioreq;
+		}
+
+		/* Allocate Dma buffers for Response Payload */
+		dma_buf = &ioreq->dma_buf;
+		dma_buf->vaddr = pci_pool_alloc(hw->scsi_pci_pool, GFP_KERNEL,
+						&dma_buf->paddr);
+		if (!dma_buf->vaddr) {
+			csio_err(hw,
+				 "SCSI response DMA buffer allocation"
+				 " failed!\n");
+			kfree(ioreq);
+			goto free_ioreq;
+		}
+
+		dma_buf->len = scm->proto_rsp_len;
+
+		/* Set state to uninit */
+		csio_init_state(&ioreq->sm, csio_scsis_uninit);
+		INIT_LIST_HEAD(&ioreq->gen_list);
+		init_completion(&ioreq->cmplobj);
+
+		list_add_tail(&ioreq->sm.sm_list, &scm->ioreq_freelist);
+		CSIO_INC_STATS(scm, n_free_ioreq);
+	}
+
+	if (csio_scsi_alloc_ddp_bufs(scm, hw, PAGE_SIZE, csio_ddp_descs))
+		goto free_ioreq;
+
+	return 0;
+
+free_ioreq:
+	/*
+	 * Free up existing allocations, since an error
+	 * from here means we are returning for good
+	 */
+	while (!list_empty(&scm->ioreq_freelist)) {
+		struct csio_sm *tmp;
+
+		tmp = list_first_entry(&scm->ioreq_freelist,
+				       struct csio_sm, sm_list);
+		list_del_init(&tmp->sm_list);
+		ioreq = (struct csio_ioreq *)tmp;
+
+		dma_buf = &ioreq->dma_buf;
+		pci_pool_free(hw->scsi_pci_pool, dma_buf->vaddr,
+			      dma_buf->paddr);
+
+		kfree(ioreq);
+	}
+
+	scm->stats.n_free_ioreq = 0;
+
+	return -ENOMEM;
+}
+
+/**
+ * csio_scsim_exit: Uninitialize SCSI Module
+ * @scm: SCSI Module
+ *
+ */
+void
+csio_scsim_exit(struct csio_scsim *scm)
+{
+	struct csio_ioreq *ioreq;
+	struct csio_dma_buf *dma_buf;
+
+	while (!list_empty(&scm->ioreq_freelist)) {
+		struct csio_sm *tmp;
+
+		tmp = list_first_entry(&scm->ioreq_freelist,
+				       struct csio_sm, sm_list);
+		list_del_init(&tmp->sm_list);
+		ioreq = (struct csio_ioreq *)tmp;
+
+		dma_buf = &ioreq->dma_buf;
+		pci_pool_free(scm->hw->scsi_pci_pool, dma_buf->vaddr,
+			      dma_buf->paddr);
+
+		kfree(ioreq);
+	}
+
+	scm->stats.n_free_ioreq = 0;
+
+	csio_scsi_free_ddp_bufs(scm, scm->hw);
+}
-- 
1.7.1


^ permalink raw reply related

* [v6 PATCH 3/8] csiostor: Chelsio FCoE offload driver submission (headers part 2).
From: Naresh Kumar Inna @ 2012-11-15 17:11 UTC (permalink / raw)
  To: JBottomley, linux-scsi, dm, leedom; +Cc: netdev, naresh, chethan
In-Reply-To: <1352999484-17812-1-git-send-email-naresh@chelsio.com>

This patch contains the second set of the header files for csiostor driver.

Signed-off-by: Naresh Kumar Inna <naresh@chelsio.com>
---
 drivers/scsi/csiostor/csio_lnode.h |  255 ++++++++++++++++++
 drivers/scsi/csiostor/csio_mb.h    |  278 +++++++++++++++++++
 drivers/scsi/csiostor/csio_rnode.h |  141 ++++++++++
 drivers/scsi/csiostor/csio_scsi.h  |  342 ++++++++++++++++++++++++
 drivers/scsi/csiostor/csio_wr.h    |  512 ++++++++++++++++++++++++++++++++++++
 5 files changed, 1528 insertions(+), 0 deletions(-)
 create mode 100644 drivers/scsi/csiostor/csio_lnode.h
 create mode 100644 drivers/scsi/csiostor/csio_mb.h
 create mode 100644 drivers/scsi/csiostor/csio_rnode.h
 create mode 100644 drivers/scsi/csiostor/csio_scsi.h
 create mode 100644 drivers/scsi/csiostor/csio_wr.h

diff --git a/drivers/scsi/csiostor/csio_lnode.h b/drivers/scsi/csiostor/csio_lnode.h
new file mode 100644
index 0000000..8d84988
--- /dev/null
+++ b/drivers/scsi/csiostor/csio_lnode.h
@@ -0,0 +1,255 @@
+/*
+ * This file is part of the Chelsio FCoE driver for Linux.
+ *
+ * Copyright (c) 2008-2012 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __CSIO_LNODE_H__
+#define __CSIO_LNODE_H__
+
+#include <linux/kref.h>
+#include <linux/timer.h>
+#include <linux/workqueue.h>
+#include <scsi/fc/fc_els.h>
+
+
+#include "csio_defs.h"
+#include "csio_hw.h"
+
+#define CSIO_FCOE_MAX_NPIV	128
+#define CSIO_FCOE_MAX_RNODES	2048
+
+/* FDMI port attribute unknown speed */
+#define CSIO_HBA_PORTSPEED_UNKNOWN	0x8000
+
+extern int csio_fcoe_rnodes;
+extern int csio_fdmi_enable;
+
+/* State machine evets */
+enum csio_ln_ev {
+	CSIO_LNE_NONE = (uint32_t)0,
+	CSIO_LNE_LINKUP,
+	CSIO_LNE_FAB_INIT_DONE,
+	CSIO_LNE_LINK_DOWN,
+	CSIO_LNE_DOWN_LINK,
+	CSIO_LNE_LOGO,
+	CSIO_LNE_CLOSE,
+	CSIO_LNE_MAX_EVENT,
+};
+
+
+struct csio_fcf_info {
+	struct list_head	list;
+	uint8_t			priority;
+	uint8_t			mac[6];
+	uint8_t			name_id[8];
+	uint8_t			fabric[8];
+	uint16_t		vf_id;
+	uint8_t			vlan_id;
+	uint16_t		max_fcoe_size;
+	uint8_t			fc_map[3];
+	uint32_t		fka_adv;
+	uint32_t		fcfi;
+	uint8_t			get_next:1;
+	uint8_t			link_aff:1;
+	uint8_t			fpma:1;
+	uint8_t			spma:1;
+	uint8_t			login:1;
+	uint8_t			portid;
+	uint8_t			spma_mac[6];
+	struct kref		kref;
+};
+
+/* Defines for flags */
+#define	CSIO_LNF_FIPSUPP		0x00000001	/* Fip Supported */
+#define	CSIO_LNF_NPIVSUPP		0x00000002	/* NPIV supported */
+#define CSIO_LNF_LINK_ENABLE		0x00000004	/* Link enabled */
+#define	CSIO_LNF_FDMI_ENABLE		0x00000008	/* FDMI support */
+
+/* Transport events */
+enum csio_ln_fc_evt {
+	CSIO_LN_FC_LINKUP = 1,
+	CSIO_LN_FC_LINKDOWN,
+	CSIO_LN_FC_RSCN,
+	CSIO_LN_FC_ATTRIB_UPDATE,
+};
+
+/* Lnode stats */
+struct csio_lnode_stats {
+	uint32_t	n_link_up;	/* Link down */
+	uint32_t	n_link_down;	/* Link up */
+	uint32_t	n_err;		/* error */
+	uint32_t	n_err_nomem;	/* memory not available */
+	uint32_t	n_inval_parm;   /* Invalid parameters */
+	uint32_t	n_evt_unexp;	/* unexpected event */
+	uint32_t	n_evt_drop;	/* dropped event */
+	uint32_t	n_rnode_match;  /* matched rnode */
+	uint32_t	n_dev_loss_tmo; /* Device loss timeout */
+	uint32_t	n_fdmi_err;	/* fdmi err */
+	uint32_t	n_evt_fw[RSCN_DEV_LOST];	/* fw events */
+	enum csio_ln_ev	n_evt_sm[CSIO_LNE_MAX_EVENT];	/* State m/c events */
+	uint32_t	n_rnode_alloc;	/* rnode allocated */
+	uint32_t	n_rnode_free;	/* rnode freed */
+	uint32_t	n_rnode_nomem;	/* rnode alloc failure */
+	uint32_t        n_input_requests; /* Input Requests */
+	uint32_t        n_output_requests; /* Output Requests */
+	uint32_t        n_control_requests; /* Control Requests */
+	uint32_t        n_input_bytes; /* Input Bytes */
+	uint32_t        n_output_bytes; /* Output Bytes */
+	uint32_t	rsvd1;
+};
+
+/* Common Lnode params */
+struct csio_lnode_params {
+	uint32_t	ra_tov;
+	uint32_t	fcfi;
+	uint32_t	log_level;	/* Module level for debugging */
+};
+
+struct csio_service_parms {
+	struct fc_els_csp	csp;		/* Common service parms */
+	uint8_t			wwpn[8];	/* WWPN */
+	uint8_t			wwnn[8];	/* WWNN */
+	struct fc_els_cssp	clsp[4];	/* Class service params */
+	uint8_t			vvl[16];	/* Vendor version level */
+};
+
+/* Lnode */
+struct csio_lnode {
+	struct csio_sm		sm;		/* State machine + sibling
+						 * lnode list.
+						 */
+	struct csio_hw		*hwp;		/* Pointer to the HW module */
+	uint8_t			portid;		/* Port ID */
+	uint8_t			rsvd1;
+	uint16_t		rsvd2;
+	uint32_t		dev_num;	/* Device number */
+	uint32_t		flags;		/* Flags */
+	struct list_head	fcf_lsthead;	/* FCF entries */
+	struct csio_fcf_info	*fcfinfo;	/* FCF in use */
+	struct csio_ioreq	*mgmt_req;	/* MGMT request */
+
+	/* FCoE identifiers */
+	uint8_t			mac[6];
+	uint32_t		nport_id;
+	struct csio_service_parms ln_sparm;	/* Service parms */
+
+	/* Firmware identifiers */
+	uint32_t		fcf_flowid;	/*fcf flowid */
+	uint32_t		vnp_flowid;
+	uint16_t		ssn_cnt;	/* Registered Session */
+	uint8_t			cur_evt;	/* Current event */
+	uint8_t			prev_evt;	/* Previous event */
+
+	/* Children */
+	struct list_head	cln_head;	/* Head of the children lnode
+						 * list.
+						 */
+	uint32_t		num_vports;	/* Total NPIV/children LNodes*/
+	struct csio_lnode	*pln;		/* Parent lnode of child
+						 * lnodes.
+						 */
+	struct list_head	cmpl_q;		/* Pending I/Os on this lnode */
+
+	/* Remote node information */
+	struct list_head	rnhead;		/* Head of rnode list */
+	uint32_t		num_reg_rnodes;	/* Number of rnodes registered
+						 * with the host.
+						 */
+	uint32_t		n_scsi_tgts;	/* Number of scsi targets
+						 * found
+						 */
+	uint32_t		last_scan_ntgts;/* Number of scsi targets
+						 * found per last scan.
+						 */
+	uint32_t		tgt_scan_tick;	/* timer started after
+						 * new tgt found
+						 */
+	/* FC transport data */
+	struct fc_vport		*fc_vport;
+	struct fc_host_statistics fch_stats;
+
+	struct csio_lnode_stats stats;		/* Common lnode stats */
+	struct csio_lnode_params params;	/* Common lnode params */
+};
+
+#define	csio_lnode_to_hw(ln)	((ln)->hwp)
+#define csio_root_lnode(ln)	(csio_lnode_to_hw((ln))->rln)
+#define csio_parent_lnode(ln)	((ln)->pln)
+#define	csio_ln_flowid(ln)	((ln)->vnp_flowid)
+#define csio_ln_wwpn(ln)	((ln)->ln_sparm.wwpn)
+#define csio_ln_wwnn(ln)	((ln)->ln_sparm.wwnn)
+
+#define csio_is_root_ln(ln)	(((ln) == csio_root_lnode((ln))) ? 1 : 0)
+#define csio_is_phys_ln(ln)	(((ln)->pln == NULL) ? 1 : 0)
+#define csio_is_npiv_ln(ln)	(((ln)->pln != NULL) ? 1 : 0)
+
+
+#define csio_ln_dbg(_ln, _fmt, ...)	\
+	csio_dbg(_ln->hwp, "%x:%x "_fmt, CSIO_DEVID_HI(_ln), \
+		 CSIO_DEVID_LO(_ln), ##__VA_ARGS__);
+
+#define csio_ln_err(_ln, _fmt, ...)	\
+	csio_err(_ln->hwp, "%x:%x "_fmt, CSIO_DEVID_HI(_ln), \
+		 CSIO_DEVID_LO(_ln), ##__VA_ARGS__);
+
+#define csio_ln_warn(_ln, _fmt, ...)	\
+	csio_warn(_ln->hwp, "%x:%x "_fmt, CSIO_DEVID_HI(_ln), \
+		 CSIO_DEVID_LO(_ln), ##__VA_ARGS__);
+
+/* HW->Lnode notifications */
+enum csio_ln_notify {
+	CSIO_LN_NOTIFY_HWREADY = 1,
+	CSIO_LN_NOTIFY_HWSTOP,
+	CSIO_LN_NOTIFY_HWREMOVE,
+	CSIO_LN_NOTIFY_HWRESET,
+};
+
+void csio_fcoe_fwevt_handler(struct csio_hw *,  __u8 cpl_op, __be64 *);
+int csio_is_lnode_ready(struct csio_lnode *);
+void csio_lnode_state_to_str(struct csio_lnode *ln, int8_t *str);
+struct csio_lnode *csio_lnode_lookup_by_wwpn(struct csio_hw *, uint8_t *);
+int csio_get_phy_port_stats(struct csio_hw *, uint8_t ,
+				      struct fw_fcoe_port_stats *);
+int csio_scan_done(struct csio_lnode *, unsigned long, unsigned long,
+		   unsigned long, unsigned long);
+void csio_notify_lnodes(struct csio_hw *, enum csio_ln_notify);
+void csio_disable_lnodes(struct csio_hw *, uint8_t, bool);
+void csio_lnode_async_event(struct csio_lnode *, enum csio_ln_fc_evt);
+int csio_ln_fdmi_start(struct csio_lnode *, void *);
+int csio_lnode_start(struct csio_lnode *);
+void csio_lnode_stop(struct csio_lnode *);
+void csio_lnode_close(struct csio_lnode *);
+int csio_lnode_init(struct csio_lnode *, struct csio_hw *,
+			      struct csio_lnode *);
+void csio_lnode_exit(struct csio_lnode *);
+
+#endif /* ifndef __CSIO_LNODE_H__ */
diff --git a/drivers/scsi/csiostor/csio_mb.h b/drivers/scsi/csiostor/csio_mb.h
new file mode 100644
index 0000000..1788ea5
--- /dev/null
+++ b/drivers/scsi/csiostor/csio_mb.h
@@ -0,0 +1,278 @@
+/*
+ * This file is part of the Chelsio FCoE driver for Linux.
+ *
+ * Copyright (c) 2008-2012 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __CSIO_MB_H__
+#define __CSIO_MB_H__
+
+#include <linux/timer.h>
+#include <linux/completion.h>
+
+#include "t4fw_api.h"
+#include "t4fw_api_stor.h"
+#include "csio_defs.h"
+
+#define CSIO_STATS_OFFSET (2)
+#define CSIO_NUM_STATS_PER_MB (6)
+
+struct fw_fcoe_port_cmd_params {
+	uint8_t		portid;
+	uint8_t		idx;
+	uint8_t		nstats;
+};
+
+#define CSIO_DUMP_MB(__hw, __num, __mb)					\
+	csio_dbg(__hw, "\t%llx %llx %llx %llx %llx %llx %llx %llx\n",	\
+		(unsigned long long)csio_rd_reg64(__hw, __mb),		\
+		(unsigned long long)csio_rd_reg64(__hw, __mb + 8),	\
+		(unsigned long long)csio_rd_reg64(__hw, __mb + 16),	\
+		(unsigned long long)csio_rd_reg64(__hw, __mb + 24),	\
+		(unsigned long long)csio_rd_reg64(__hw, __mb + 32),	\
+		(unsigned long long)csio_rd_reg64(__hw, __mb + 40),	\
+		(unsigned long long)csio_rd_reg64(__hw, __mb + 48),	\
+		(unsigned long long)csio_rd_reg64(__hw, __mb + 56))
+
+#define CSIO_MB_MAX_REGS	8
+#define CSIO_MAX_MB_SIZE	64
+#define CSIO_MB_POLL_FREQ	5		/*  5 ms */
+#define CSIO_MB_DEFAULT_TMO	FW_CMD_MAX_TIMEOUT
+
+/* Device master in HELLO command */
+enum csio_dev_master { CSIO_MASTER_CANT, CSIO_MASTER_MAY, CSIO_MASTER_MUST };
+
+enum csio_mb_owner { CSIO_MBOWNER_NONE, CSIO_MBOWNER_FW, CSIO_MBOWNER_PL };
+
+enum csio_dev_state {
+	CSIO_DEV_STATE_UNINIT,
+	CSIO_DEV_STATE_INIT,
+	CSIO_DEV_STATE_ERR
+};
+
+#define FW_PARAM_DEV(param) \
+	(FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | \
+	 FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_##param))
+
+#define FW_PARAM_PFVF(param) \
+	(FW_PARAMS_MNEM(FW_PARAMS_MNEM_PFVF) | \
+	 FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_PFVF_##param)|  \
+	 FW_PARAMS_PARAM_Y(0) | \
+	 FW_PARAMS_PARAM_Z(0))
+
+enum {
+	PAUSE_RX      = 1 << 0,
+	PAUSE_TX      = 1 << 1,
+	PAUSE_AUTONEG = 1 << 2
+};
+
+#define CSIO_INIT_MBP(__mbp, __cp,  __tmo, __priv, __fn, __clear)	\
+do {									\
+	if (__clear)							\
+		memset((__cp), 0,					\
+			    CSIO_MB_MAX_REGS * sizeof(__be64));		\
+	INIT_LIST_HEAD(&(__mbp)->list);					\
+	(__mbp)->tmo		= (__tmo);				\
+	(__mbp)->priv		= (void *)(__priv);			\
+	(__mbp)->mb_cbfn	= (__fn);				\
+	(__mbp)->mb_size	= sizeof(*(__cp));			\
+} while (0)
+
+struct csio_mbm_stats {
+	uint32_t	n_req;		/* number of mbox req */
+	uint32_t	n_rsp;		/* number of mbox rsp */
+	uint32_t	n_activeq;	/* number of mbox req active Q */
+	uint32_t	n_cbfnq;	/* number of mbox req cbfn Q */
+	uint32_t	n_tmo;		/* number of mbox timeout */
+	uint32_t	n_cancel;	/* number of mbox cancel */
+	uint32_t	n_err;		/* number of mbox error */
+};
+
+/* Driver version of Mailbox */
+struct csio_mb {
+	struct list_head	list;			/* for req/resp */
+							/* queue in driver */
+	__be64			mb[CSIO_MB_MAX_REGS];	/* MB in HW format */
+	int			mb_size;		/* Size of this
+							 * mailbox.
+							 */
+	uint32_t		tmo;			/* Timeout */
+	struct completion	cmplobj;		/* MB Completion
+							 * object
+							 */
+	void			(*mb_cbfn) (struct csio_hw *, struct csio_mb *);
+							/* Callback fn */
+	void			*priv;			/* Owner private ptr */
+};
+
+struct csio_mbm {
+	uint32_t		a_mbox;			/* Async mbox num */
+	uint32_t		intr_idx;		/* Interrupt index */
+	struct timer_list	timer;			/* Mbox timer */
+	struct list_head	req_q;			/* Mbox request queue */
+	struct list_head	cbfn_q;			/* Mbox completion q */
+	struct csio_mb		*mcurrent;		/* Current mailbox */
+	uint32_t		req_q_cnt;		/* Outstanding mbox
+							 * cmds
+							 */
+	struct csio_mbm_stats	stats;			/* Statistics */
+};
+
+#define csio_set_mb_intr_idx(_m, _i)	((_m)->intr_idx = (_i))
+#define csio_get_mb_intr_idx(_m)	((_m)->intr_idx)
+
+struct csio_iq_params;
+struct csio_eq_params;
+
+enum fw_retval csio_mb_fw_retval(struct csio_mb *);
+
+/* MB helpers */
+void csio_mb_hello(struct csio_hw *, struct csio_mb *, uint32_t,
+		   uint32_t, uint32_t, enum csio_dev_master,
+		   void (*)(struct csio_hw *, struct csio_mb *));
+
+void csio_mb_process_hello_rsp(struct csio_hw *, struct csio_mb *,
+			       enum fw_retval *, enum csio_dev_state *,
+			       uint8_t *);
+
+void csio_mb_bye(struct csio_hw *, struct csio_mb *, uint32_t,
+		 void (*)(struct csio_hw *, struct csio_mb *));
+
+void csio_mb_reset(struct csio_hw *, struct csio_mb *, uint32_t, int, int,
+		   void (*)(struct csio_hw *, struct csio_mb *));
+
+void csio_mb_params(struct csio_hw *, struct csio_mb *, uint32_t, unsigned int,
+		    unsigned int, unsigned int, const u32 *, u32 *, bool,
+		    void (*)(struct csio_hw *, struct csio_mb *));
+
+void csio_mb_process_read_params_rsp(struct csio_hw *, struct csio_mb *,
+				enum fw_retval *, unsigned int , u32 *);
+
+void csio_mb_ldst(struct csio_hw *hw, struct csio_mb *mbp, uint32_t tmo,
+		  int reg);
+
+void csio_mb_caps_config(struct csio_hw *, struct csio_mb *, uint32_t,
+			    bool, bool, bool, bool,
+			    void (*)(struct csio_hw *, struct csio_mb *));
+
+void csio_rss_glb_config(struct csio_hw *, struct csio_mb *,
+			 uint32_t, uint8_t, unsigned int,
+			 void (*)(struct csio_hw *, struct csio_mb *));
+
+void csio_mb_pfvf(struct csio_hw *, struct csio_mb *, uint32_t,
+		  unsigned int, unsigned int, unsigned int,
+		  unsigned int, unsigned int, unsigned int,
+		  unsigned int, unsigned int, unsigned int,
+		  unsigned int, unsigned int, unsigned int,
+		  unsigned int, void (*) (struct csio_hw *, struct csio_mb *));
+
+void csio_mb_port(struct csio_hw *, struct csio_mb *, uint32_t,
+		  uint8_t, bool, uint32_t, uint16_t,
+		  void (*) (struct csio_hw *, struct csio_mb *));
+
+void csio_mb_process_read_port_rsp(struct csio_hw *, struct csio_mb *,
+				   enum fw_retval *, uint16_t *);
+
+void csio_mb_initialize(struct csio_hw *, struct csio_mb *, uint32_t,
+			void (*)(struct csio_hw *, struct csio_mb *));
+
+void csio_mb_iq_alloc_write(struct csio_hw *, struct csio_mb *, void *,
+			uint32_t, struct csio_iq_params *,
+			void (*) (struct csio_hw *, struct csio_mb *));
+
+void csio_mb_iq_alloc_write_rsp(struct csio_hw *, struct csio_mb *,
+				enum fw_retval *, struct csio_iq_params *);
+
+void csio_mb_iq_free(struct csio_hw *, struct csio_mb *, void *,
+		     uint32_t, struct csio_iq_params *,
+		     void (*) (struct csio_hw *, struct csio_mb *));
+
+void csio_mb_eq_ofld_alloc_write(struct csio_hw *, struct csio_mb *, void *,
+				 uint32_t, struct csio_eq_params *,
+				 void (*) (struct csio_hw *, struct csio_mb *));
+
+void csio_mb_eq_ofld_alloc_write_rsp(struct csio_hw *, struct csio_mb *,
+				     enum fw_retval *, struct csio_eq_params *);
+
+void csio_mb_eq_ofld_free(struct csio_hw *, struct csio_mb *, void *,
+			  uint32_t , struct csio_eq_params *,
+			  void (*) (struct csio_hw *, struct csio_mb *));
+
+void csio_fcoe_read_res_info_init_mb(struct csio_hw *, struct csio_mb *,
+			uint32_t,
+			void (*) (struct csio_hw *, struct csio_mb *));
+
+void csio_write_fcoe_link_cond_init_mb(struct csio_lnode *, struct csio_mb *,
+			uint32_t, uint8_t, uint32_t, uint8_t, bool, uint32_t,
+			void (*) (struct csio_hw *, struct csio_mb *));
+
+void csio_fcoe_vnp_alloc_init_mb(struct csio_lnode *, struct csio_mb *,
+			uint32_t, uint32_t , uint32_t , uint16_t,
+			uint8_t [8], uint8_t [8],
+			void (*) (struct csio_hw *, struct csio_mb *));
+
+void csio_fcoe_vnp_read_init_mb(struct csio_lnode *, struct csio_mb *,
+			uint32_t, uint32_t , uint32_t ,
+			void (*) (struct csio_hw *, struct csio_mb *));
+
+void csio_fcoe_vnp_free_init_mb(struct csio_lnode *, struct csio_mb *,
+			uint32_t , uint32_t, uint32_t ,
+			void (*) (struct csio_hw *, struct csio_mb *));
+
+void csio_fcoe_read_fcf_init_mb(struct csio_lnode *, struct csio_mb *,
+			uint32_t, uint32_t, uint32_t,
+			void (*cbfn) (struct csio_hw *, struct csio_mb *));
+
+void csio_fcoe_read_portparams_init_mb(struct csio_hw *hw,
+			struct csio_mb *mbp, uint32_t mb_tmo,
+			struct fw_fcoe_port_cmd_params *portparams,
+			void (*cbfn)(struct csio_hw *, struct csio_mb *));
+
+void csio_mb_process_portparams_rsp(struct csio_hw *hw, struct csio_mb *mbp,
+				enum fw_retval *retval,
+				struct fw_fcoe_port_cmd_params *portparams,
+				struct fw_fcoe_port_stats *portstats);
+
+/* MB module functions */
+int csio_mbm_init(struct csio_mbm *, struct csio_hw *,
+			    void (*)(uintptr_t));
+void csio_mbm_exit(struct csio_mbm *);
+void csio_mb_intr_enable(struct csio_hw *);
+void csio_mb_intr_disable(struct csio_hw *);
+
+int csio_mb_issue(struct csio_hw *, struct csio_mb *);
+void csio_mb_completions(struct csio_hw *, struct list_head *);
+int csio_mb_fwevt_handler(struct csio_hw *, __be64 *);
+int csio_mb_isr_handler(struct csio_hw *);
+struct csio_mb *csio_mb_tmo_handler(struct csio_hw *);
+void csio_mb_cancel_all(struct csio_hw *, struct list_head *);
+
+#endif /* ifndef __CSIO_MB_H__ */
diff --git a/drivers/scsi/csiostor/csio_rnode.h b/drivers/scsi/csiostor/csio_rnode.h
new file mode 100644
index 0000000..a3b434c
--- /dev/null
+++ b/drivers/scsi/csiostor/csio_rnode.h
@@ -0,0 +1,141 @@
+/*
+ * This file is part of the Chelsio FCoE driver for Linux.
+ *
+ * Copyright (c) 2008-2012 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __CSIO_RNODE_H__
+#define __CSIO_RNODE_H__
+
+#include "csio_defs.h"
+
+/* State machine evets */
+enum csio_rn_ev {
+	CSIO_RNFE_NONE = (uint32_t)0,			/* None */
+	CSIO_RNFE_LOGGED_IN,				/* [N/F]Port login
+							 * complete.
+							 */
+	CSIO_RNFE_PRLI_DONE,				/* PRLI completed */
+	CSIO_RNFE_PLOGI_RECV,				/* Received PLOGI */
+	CSIO_RNFE_PRLI_RECV,				/* Received PLOGI */
+	CSIO_RNFE_LOGO_RECV,				/* Received LOGO */
+	CSIO_RNFE_PRLO_RECV,				/* Received PRLO */
+	CSIO_RNFE_DOWN,					/* Rnode is down */
+	CSIO_RNFE_CLOSE,				/* Close rnode */
+	CSIO_RNFE_NAME_MISSING,				/* Rnode name missing
+							 * in name server.
+							 */
+	CSIO_RNFE_MAX_EVENT,
+};
+
+/* rnode stats */
+struct csio_rnode_stats {
+	uint32_t	n_err;		/* error */
+	uint32_t	n_err_inval;	/* invalid parameter */
+	uint32_t	n_err_nomem;	/* error nomem */
+	uint32_t	n_evt_unexp;	/* unexpected event */
+	uint32_t	n_evt_drop;	/* unexpected event */
+	uint32_t	n_evt_fw[RSCN_DEV_LOST];	/* fw events */
+	enum csio_rn_ev	n_evt_sm[CSIO_RNFE_MAX_EVENT];	/* State m/c events */
+	uint32_t	n_lun_rst;	/* Number of resets of
+					 * of LUNs under this
+					 * target
+					 */
+	uint32_t	n_lun_rst_fail;	/* Number of LUN reset
+					 * failures.
+					 */
+	uint32_t	n_tgt_rst;	/* Number of target resets */
+	uint32_t	n_tgt_rst_fail;	/* Number of target reset
+					 * failures.
+					 */
+};
+
+/* Defines for rnode role */
+#define	CSIO_RNFR_INITIATOR	0x1
+#define	CSIO_RNFR_TARGET	0x2
+#define CSIO_RNFR_FABRIC	0x4
+#define	CSIO_RNFR_NS		0x8
+#define CSIO_RNFR_NPORT		0x10
+
+struct csio_rnode {
+	struct csio_sm		sm;			/* State machine -
+							 * should be the
+							 * 1st member
+							 */
+	struct csio_lnode	*lnp;			/* Pointer to owning
+							 * Lnode */
+	uint32_t		flowid;			/* Firmware ID */
+	struct list_head	host_cmpl_q;		/* SCSI IOs
+							 * pending to completed
+							 * to Mid-layer.
+							 */
+	/* FC identifiers for remote node */
+	uint32_t		nport_id;
+	uint16_t		fcp_flags;		/* FCP Flags */
+	uint8_t			cur_evt;		/* Current event */
+	uint8_t			prev_evt;		/* Previous event */
+	uint32_t		role;			/* Fabric/Target/
+							 * Initiator/NS
+							 */
+	struct fcoe_rdev_entry		*rdev_entry;	/* Rdev entry */
+	struct csio_service_parms	rn_sparm;
+
+	/* FC transport attributes */
+	struct fc_rport		*rport;		/* FC transport rport */
+	uint32_t		supp_classes;	/* Supported FC classes */
+	uint32_t		maxframe_size;	/* Max Frame size */
+	uint32_t		scsi_id;	/* Transport given SCSI id */
+
+	struct csio_rnode_stats	stats;		/* Common rnode stats */
+};
+
+#define csio_rn_flowid(rn)			((rn)->flowid)
+#define csio_rn_wwpn(rn)			((rn)->rn_sparm.wwpn)
+#define csio_rn_wwnn(rn)			((rn)->rn_sparm.wwnn)
+#define csio_rnode_to_lnode(rn)			((rn)->lnp)
+
+int csio_is_rnode_ready(struct csio_rnode *rn);
+void csio_rnode_state_to_str(struct csio_rnode *rn, int8_t *str);
+
+struct csio_rnode *csio_rnode_lookup_portid(struct csio_lnode *, uint32_t);
+struct csio_rnode *csio_confirm_rnode(struct csio_lnode *,
+					  uint32_t, struct fcoe_rdev_entry *);
+
+void csio_rnode_fwevt_handler(struct csio_rnode *rn, uint8_t fwevt);
+
+void csio_put_rnode(struct csio_lnode *ln, struct csio_rnode *rn);
+
+void csio_reg_rnode(struct csio_rnode *);
+void csio_unreg_rnode(struct csio_rnode *);
+
+void csio_rnode_devloss_handler(struct csio_rnode *);
+
+#endif /* ifndef __CSIO_RNODE_H__ */
diff --git a/drivers/scsi/csiostor/csio_scsi.h b/drivers/scsi/csiostor/csio_scsi.h
new file mode 100644
index 0000000..2257c3d
--- /dev/null
+++ b/drivers/scsi/csiostor/csio_scsi.h
@@ -0,0 +1,342 @@
+/*
+ * This file is part of the Chelsio FCoE driver for Linux.
+ *
+ * Copyright (c) 2008-2012 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __CSIO_SCSI_H__
+#define __CSIO_SCSI_H__
+
+#include <linux/spinlock_types.h>
+#include <linux/completion.h>
+#include <scsi/scsi.h>
+#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_device.h>
+#include <scsi/scsi_host.h>
+#include <scsi/scsi_eh.h>
+#include <scsi/scsi_tcq.h>
+#include <scsi/fc/fc_fcp.h>
+
+#include "csio_defs.h"
+#include "csio_wr.h"
+
+extern struct scsi_host_template csio_fcoe_shost_template;
+extern struct scsi_host_template csio_fcoe_shost_vport_template;
+
+extern int csio_scsi_eqsize;
+extern int csio_scsi_iqlen;
+extern int csio_scsi_ioreqs;
+extern uint32_t csio_max_scan_tmo;
+extern uint32_t csio_delta_scan_tmo;
+extern int csio_lun_qdepth;
+
+/*
+ **************************** NOTE *******************************
+ * How do we calculate MAX FCoE SCSI SGEs? Here is the math:
+ * Max Egress WR size = 512 bytes
+ * One SCSI egress WR has the following fixed no of bytes:
+ *      48 (sizeof(struct fw_scsi_write[read]_wr)) - FW WR
+ *    + 32 (sizeof(struct fc_fcp_cmnd)) - Immediate FCP_CMD
+ *    ------
+ *      80
+ *    ------
+ * That leaves us with 512 - 96 = 432 bytes for data SGE. Using
+ * struct ulptx_sgl header for the SGE consumes:
+ *	- 4 bytes for cmnd_sge.
+ *	- 12 bytes for the first SGL.
+ * That leaves us with 416 bytes for the remaining SGE pairs. Which is
+ * is 416 / 24 (size(struct ulptx_sge_pair)) = 17 SGE pairs,
+ * or 34 SGEs. Adding the first SGE fetches us 35 SGEs.
+ */
+#define CSIO_SCSI_MAX_SGE		35
+#define CSIO_SCSI_ABRT_TMO_MS		60000
+#define CSIO_SCSI_LUNRST_TMO_MS		60000
+#define CSIO_SCSI_TM_POLL_MS		2000	/* should be less than
+						 * all TM timeouts.
+						 */
+#define CSIO_SCSI_IQ_WRSZ		128
+#define CSIO_SCSI_IQSIZE		(csio_scsi_iqlen * CSIO_SCSI_IQ_WRSZ)
+
+#define	CSIO_MAX_SNS_LEN		128
+#define	CSIO_SCSI_RSP_LEN	(FCP_RESP_WITH_EXT + 4 + CSIO_MAX_SNS_LEN)
+
+/* Reference to scsi_cmnd */
+#define csio_scsi_cmnd(req)		((req)->scratch1)
+
+struct csio_scsi_stats {
+	uint64_t		n_tot_success;	/* Total number of good I/Os */
+	uint32_t		n_rn_nr_error;	/* No. of remote-node-not-
+						 * ready errors
+						 */
+	uint32_t		n_hw_nr_error;	/* No. of hw-module-not-
+						 * ready errors
+						 */
+	uint32_t		n_dmamap_error;	/* No. of DMA map erros */
+	uint32_t		n_unsupp_sge_error; /* No. of too-many-SGes
+						     * errors.
+						     */
+	uint32_t		n_no_req_error;	/* No. of Out-of-ioreqs error */
+	uint32_t		n_busy_error;	/* No. of -EBUSY errors */
+	uint32_t		n_hosterror;	/* No. of FW_HOSTERROR I/O */
+	uint32_t		n_rsperror;	/* No. of response errors */
+	uint32_t		n_autosense;	/* No. of auto sense replies */
+	uint32_t		n_ovflerror;	/* No. of overflow errors */
+	uint32_t		n_unflerror;	/* No. of underflow errors */
+	uint32_t		n_rdev_nr_error;/* No. of rdev not
+						 * ready errors
+						 */
+	uint32_t		n_rdev_lost_error;/* No. of rdev lost errors */
+	uint32_t		n_rdev_logo_error;/* No. of rdev logo errors */
+	uint32_t		n_link_down_error;/* No. of link down errors */
+	uint32_t		n_no_xchg_error; /* No. no exchange error */
+	uint32_t		n_unknown_error;/* No. of unhandled errors */
+	uint32_t		n_aborted;	/* No. of aborted I/Os */
+	uint32_t		n_abrt_timedout; /* No. of abort timedouts */
+	uint32_t		n_abrt_fail;	/* No. of abort failures */
+	uint32_t		n_abrt_dups;	/* No. of duplicate aborts */
+	uint32_t		n_abrt_race_comp; /* No. of aborts that raced
+						   * with completions.
+						   */
+	uint32_t		n_abrt_busy_error;/* No. of abort failures
+						   * due to -EBUSY.
+						   */
+	uint32_t		n_closed;	/* No. of closed I/Os */
+	uint32_t		n_cls_busy_error; /* No. of close failures
+						   * due to -EBUSY.
+						   */
+	uint32_t		n_active;	/* No. of IOs in active_q */
+	uint32_t		n_tm_active;	/* No. of TMs in active_q */
+	uint32_t		n_wcbfn;	/* No. of I/Os in worker
+						 * cbfn q
+						 */
+	uint32_t		n_free_ioreq;	/* No. of freelist entries */
+	uint32_t		n_free_ddp;	/* No. of DDP freelist */
+	uint32_t		n_unaligned;	/* No. of Unaligned SGls */
+	uint32_t		n_inval_cplop;	/* No. invalid CPL op's in IQ */
+	uint32_t		n_inval_scsiop;	/* No. invalid scsi op's in IQ*/
+};
+
+struct csio_scsim {
+	struct csio_hw		*hw;		/* Pointer to HW moduel */
+	uint8_t			max_sge;	/* Max SGE */
+	uint8_t			proto_cmd_len;	/* Proto specific SCSI
+						 * cmd length
+						 */
+	uint16_t		proto_rsp_len;	/* Proto specific SCSI
+						 * response length
+						 */
+	spinlock_t		freelist_lock;	/* Lock for ioreq freelist */
+	struct list_head	active_q;	/* Outstanding SCSI I/Os */
+	struct list_head	ioreq_freelist;	/* Free list of ioreq's */
+	struct list_head	ddp_freelist;	/* DDP descriptor freelist */
+	struct csio_scsi_stats	stats;		/* This module's statistics */
+};
+
+/* State machine defines */
+enum csio_scsi_ev {
+	CSIO_SCSIE_START_IO = 1,		/* Start a regular SCSI IO */
+	CSIO_SCSIE_START_TM,			/* Start a TM IO */
+	CSIO_SCSIE_COMPLETED,			/* IO Completed */
+	CSIO_SCSIE_ABORT,			/* Abort IO */
+	CSIO_SCSIE_ABORTED,			/* IO Aborted */
+	CSIO_SCSIE_CLOSE,			/* Close exchange */
+	CSIO_SCSIE_CLOSED,			/* Exchange closed */
+	CSIO_SCSIE_DRVCLEANUP,			/* Driver wants to manually
+						 * cleanup this I/O.
+						 */
+};
+
+enum csio_scsi_lev {
+	CSIO_LEV_ALL = 1,
+	CSIO_LEV_LNODE,
+	CSIO_LEV_RNODE,
+	CSIO_LEV_LUN,
+};
+
+struct csio_scsi_level_data {
+	enum csio_scsi_lev	level;
+	struct csio_rnode	*rnode;
+	struct csio_lnode	*lnode;
+	uint64_t		oslun;
+};
+
+static inline struct csio_ioreq *
+csio_get_scsi_ioreq(struct csio_scsim *scm)
+{
+	struct csio_sm *req;
+
+	if (likely(!list_empty(&scm->ioreq_freelist))) {
+		req = list_first_entry(&scm->ioreq_freelist,
+				       struct csio_sm, sm_list);
+		list_del_init(&req->sm_list);
+		CSIO_DEC_STATS(scm, n_free_ioreq);
+		return (struct csio_ioreq *)req;
+	} else
+		return NULL;
+}
+
+static inline void
+csio_put_scsi_ioreq(struct csio_scsim *scm, struct csio_ioreq *ioreq)
+{
+	list_add_tail(&ioreq->sm.sm_list, &scm->ioreq_freelist);
+	CSIO_INC_STATS(scm, n_free_ioreq);
+}
+
+static inline void
+csio_put_scsi_ioreq_list(struct csio_scsim *scm, struct list_head *reqlist,
+			 int n)
+{
+	list_splice_init(reqlist, &scm->ioreq_freelist);
+	scm->stats.n_free_ioreq += n;
+}
+
+static inline struct csio_dma_buf *
+csio_get_scsi_ddp(struct csio_scsim *scm)
+{
+	struct csio_dma_buf *ddp;
+
+	if (likely(!list_empty(&scm->ddp_freelist))) {
+		ddp = list_first_entry(&scm->ddp_freelist,
+				       struct csio_dma_buf, list);
+		list_del_init(&ddp->list);
+		CSIO_DEC_STATS(scm, n_free_ddp);
+		return ddp;
+	} else
+		return NULL;
+}
+
+static inline void
+csio_put_scsi_ddp(struct csio_scsim *scm, struct csio_dma_buf *ddp)
+{
+	list_add_tail(&ddp->list, &scm->ddp_freelist);
+	CSIO_INC_STATS(scm, n_free_ddp);
+}
+
+static inline void
+csio_put_scsi_ddp_list(struct csio_scsim *scm, struct list_head *reqlist,
+			 int n)
+{
+	list_splice_tail_init(reqlist, &scm->ddp_freelist);
+	scm->stats.n_free_ddp += n;
+}
+
+static inline void
+csio_scsi_completed(struct csio_ioreq *ioreq, struct list_head *cbfn_q)
+{
+	csio_post_event(&ioreq->sm, CSIO_SCSIE_COMPLETED);
+	if (csio_list_deleted(&ioreq->sm.sm_list))
+		list_add_tail(&ioreq->sm.sm_list, cbfn_q);
+}
+
+static inline void
+csio_scsi_aborted(struct csio_ioreq *ioreq, struct list_head *cbfn_q)
+{
+	csio_post_event(&ioreq->sm, CSIO_SCSIE_ABORTED);
+	list_add_tail(&ioreq->sm.sm_list, cbfn_q);
+}
+
+static inline void
+csio_scsi_closed(struct csio_ioreq *ioreq, struct list_head *cbfn_q)
+{
+	csio_post_event(&ioreq->sm, CSIO_SCSIE_CLOSED);
+	list_add_tail(&ioreq->sm.sm_list, cbfn_q);
+}
+
+static inline void
+csio_scsi_drvcleanup(struct csio_ioreq *ioreq)
+{
+	csio_post_event(&ioreq->sm, CSIO_SCSIE_DRVCLEANUP);
+}
+
+/*
+ * csio_scsi_start_io - Kick starts the IO SM.
+ * @req: io request SM.
+ *
+ * needs to be called with lock held.
+ */
+static inline int
+csio_scsi_start_io(struct csio_ioreq *ioreq)
+{
+	csio_post_event(&ioreq->sm, CSIO_SCSIE_START_IO);
+	return ioreq->drv_status;
+}
+
+/*
+ * csio_scsi_start_tm - Kicks off the Task management IO SM.
+ * @req: io request SM.
+ *
+ * needs to be called with lock held.
+ */
+static inline int
+csio_scsi_start_tm(struct csio_ioreq *ioreq)
+{
+	csio_post_event(&ioreq->sm, CSIO_SCSIE_START_TM);
+	return ioreq->drv_status;
+}
+
+/*
+ * csio_scsi_abort - Abort an IO request
+ * @req: io request SM.
+ *
+ * needs to be called with lock held.
+ */
+static inline int
+csio_scsi_abort(struct csio_ioreq *ioreq)
+{
+	csio_post_event(&ioreq->sm, CSIO_SCSIE_ABORT);
+	return ioreq->drv_status;
+}
+
+/*
+ * csio_scsi_close - Close an IO request
+ * @req: io request SM.
+ *
+ * needs to be called with lock held.
+ */
+static inline int
+csio_scsi_close(struct csio_ioreq *ioreq)
+{
+	csio_post_event(&ioreq->sm, CSIO_SCSIE_CLOSE);
+	return ioreq->drv_status;
+}
+
+void csio_scsi_cleanup_io_q(struct csio_scsim *, struct list_head *);
+int csio_scsim_cleanup_io(struct csio_scsim *, bool abort);
+int csio_scsim_cleanup_io_lnode(struct csio_scsim *,
+					  struct csio_lnode *);
+struct csio_ioreq *csio_scsi_cmpl_handler(struct csio_hw *, void *, uint32_t,
+					  struct csio_fl_dma_buf *,
+					  void *, uint8_t **);
+int csio_scsi_qconfig(struct csio_hw *);
+int csio_scsim_init(struct csio_scsim *, struct csio_hw *);
+void csio_scsim_exit(struct csio_scsim *);
+
+#endif /* __CSIO_SCSI_H__ */
diff --git a/drivers/scsi/csiostor/csio_wr.h b/drivers/scsi/csiostor/csio_wr.h
new file mode 100644
index 0000000..8d30e7a
--- /dev/null
+++ b/drivers/scsi/csiostor/csio_wr.h
@@ -0,0 +1,512 @@
+/*
+ * This file is part of the Chelsio FCoE driver for Linux.
+ *
+ * Copyright (c) 2008-2012 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __CSIO_WR_H__
+#define __CSIO_WR_H__
+
+#include <linux/cache.h>
+
+#include "csio_defs.h"
+#include "t4fw_api.h"
+#include "t4fw_api_stor.h"
+
+/*
+ * SGE register field values.
+ */
+#define X_INGPCIEBOUNDARY_32B		0
+#define X_INGPCIEBOUNDARY_64B		1
+#define X_INGPCIEBOUNDARY_128B		2
+#define X_INGPCIEBOUNDARY_256B		3
+#define X_INGPCIEBOUNDARY_512B		4
+#define X_INGPCIEBOUNDARY_1024B		5
+#define X_INGPCIEBOUNDARY_2048B		6
+#define X_INGPCIEBOUNDARY_4096B		7
+
+/* GTS register */
+#define X_TIMERREG_COUNTER0		0
+#define X_TIMERREG_COUNTER1		1
+#define X_TIMERREG_COUNTER2		2
+#define X_TIMERREG_COUNTER3		3
+#define X_TIMERREG_COUNTER4		4
+#define X_TIMERREG_COUNTER5		5
+#define X_TIMERREG_RESTART_COUNTER	6
+#define X_TIMERREG_UPDATE_CIDX		7
+
+/*
+ * Egress Context field values
+ */
+#define X_FETCHBURSTMIN_16B		0
+#define X_FETCHBURSTMIN_32B		1
+#define X_FETCHBURSTMIN_64B		2
+#define X_FETCHBURSTMIN_128B		3
+
+#define X_FETCHBURSTMAX_64B		0
+#define X_FETCHBURSTMAX_128B		1
+#define X_FETCHBURSTMAX_256B		2
+#define X_FETCHBURSTMAX_512B		3
+
+#define X_HOSTFCMODE_NONE		0
+#define X_HOSTFCMODE_INGRESS_QUEUE	1
+#define X_HOSTFCMODE_STATUS_PAGE	2
+#define X_HOSTFCMODE_BOTH		3
+
+/*
+ * Ingress Context field values
+ */
+#define X_UPDATESCHEDULING_TIMER	0
+#define X_UPDATESCHEDULING_COUNTER_OPTTIMER	1
+
+#define X_UPDATEDELIVERY_NONE		0
+#define X_UPDATEDELIVERY_INTERRUPT	1
+#define X_UPDATEDELIVERY_STATUS_PAGE	2
+#define X_UPDATEDELIVERY_BOTH		3
+
+#define X_INTERRUPTDESTINATION_PCIE	0
+#define X_INTERRUPTDESTINATION_IQ	1
+
+#define X_RSPD_TYPE_FLBUF		0
+#define X_RSPD_TYPE_CPL			1
+#define X_RSPD_TYPE_INTR		2
+
+/* WR status is at the same position as retval in a CMD header */
+#define csio_wr_status(_wr)		\
+		(FW_CMD_RETVAL_GET(ntohl(((struct fw_cmd_hdr *)(_wr))->lo)))
+
+struct csio_hw;
+
+extern int csio_intr_coalesce_cnt;
+extern int csio_intr_coalesce_time;
+
+/* Ingress queue params */
+struct csio_iq_params {
+
+	uint8_t		iq_start:1;
+	uint8_t		iq_stop:1;
+	uint8_t		pfn:3;
+
+	uint8_t		vfn;
+
+	uint16_t	physiqid;
+	uint16_t	iqid;
+
+	uint16_t	fl0id;
+	uint16_t	fl1id;
+
+	uint8_t		viid;
+
+	uint8_t		type;
+	uint8_t		iqasynch;
+	uint8_t		reserved4;
+
+	uint8_t		iqandst;
+	uint8_t		iqanus;
+	uint8_t		iqanud;
+
+	uint16_t	iqandstindex;
+
+	uint8_t		iqdroprss;
+	uint8_t		iqpciech;
+	uint8_t		iqdcaen;
+
+	uint8_t		iqdcacpu;
+	uint8_t		iqintcntthresh;
+	uint8_t		iqo;
+
+	uint8_t		iqcprio;
+	uint8_t		iqesize;
+
+	uint16_t	iqsize;
+
+	uint64_t	iqaddr;
+
+	uint8_t		iqflintiqhsen;
+	uint8_t		reserved5;
+	uint8_t		iqflintcongen;
+	uint8_t		iqflintcngchmap;
+
+	uint32_t	reserved6;
+
+	uint8_t		fl0hostfcmode;
+	uint8_t		fl0cprio;
+	uint8_t		fl0paden;
+	uint8_t		fl0packen;
+	uint8_t		fl0congen;
+	uint8_t		fl0dcaen;
+
+	uint8_t		fl0dcacpu;
+	uint8_t		fl0fbmin;
+
+	uint8_t		fl0fbmax;
+	uint8_t		fl0cidxfthresho;
+	uint8_t		fl0cidxfthresh;
+
+	uint16_t	fl0size;
+
+	uint64_t	fl0addr;
+
+	uint64_t	reserved7;
+
+	uint8_t		fl1hostfcmode;
+	uint8_t		fl1cprio;
+	uint8_t		fl1paden;
+	uint8_t		fl1packen;
+	uint8_t		fl1congen;
+	uint8_t		fl1dcaen;
+
+	uint8_t		fl1dcacpu;
+	uint8_t		fl1fbmin;
+
+	uint8_t		fl1fbmax;
+	uint8_t		fl1cidxfthresho;
+	uint8_t		fl1cidxfthresh;
+
+	uint16_t	fl1size;
+
+	uint64_t	fl1addr;
+};
+
+/* Egress queue params */
+struct csio_eq_params {
+
+	uint8_t		pfn;
+	uint8_t		vfn;
+
+	uint8_t		eqstart:1;
+	uint8_t		eqstop:1;
+
+	uint16_t        physeqid;
+	uint32_t	eqid;
+
+	uint8_t		hostfcmode:2;
+	uint8_t		cprio:1;
+	uint8_t		pciechn:3;
+
+	uint16_t	iqid;
+
+	uint8_t		dcaen:1;
+	uint8_t		dcacpu:5;
+
+	uint8_t		fbmin:3;
+	uint8_t		fbmax:3;
+
+	uint8_t		cidxfthresho:1;
+	uint8_t		cidxfthresh:3;
+
+	uint16_t	eqsize;
+
+	uint64_t	eqaddr;
+};
+
+struct csio_dma_buf {
+	struct list_head	list;
+	void			*vaddr;		/* Virtual address */
+	dma_addr_t		paddr;		/* Physical address */
+	uint32_t		len;		/* Buffer size */
+};
+
+/* Generic I/O request structure */
+struct csio_ioreq {
+	struct csio_sm		sm;		/* SM, List
+						 * should be the first member
+						 */
+	int			iq_idx;		/* Ingress queue index */
+	int			eq_idx;		/* Egress queue index */
+	uint32_t		nsge;		/* Number of SG elements */
+	uint32_t		tmo;		/* Driver timeout */
+	uint32_t		datadir;	/* Data direction */
+	struct csio_dma_buf	dma_buf;	/* Req/resp DMA buffers */
+	uint16_t		wr_status;	/* WR completion status */
+	int16_t			drv_status;	/* Driver internal status */
+	struct csio_lnode	*lnode;		/* Owner lnode */
+	struct csio_rnode	*rnode;		/* Src/destination rnode */
+	void (*io_cbfn) (struct csio_hw *, struct csio_ioreq *);
+						/* completion callback */
+	void			*scratch1;	/* Scratch area 1.
+						 */
+	void			*scratch2;	/* Scratch area 2. */
+	struct list_head	gen_list;	/* Any list associated with
+						 * this ioreq.
+						 */
+	uint64_t		fw_handle;	/* Unique handle passed
+						 * to FW
+						 */
+	uint8_t			dcopy;		/* Data copy required */
+	uint8_t			reserved1;
+	uint16_t		reserved2;
+	struct completion	cmplobj;	/* ioreq completion object */
+} ____cacheline_aligned_in_smp;
+
+/*
+ * Egress status page for egress cidx updates
+ */
+struct csio_qstatus_page {
+	__be32 qid;
+	__be16 cidx;
+	__be16 pidx;
+};
+
+
+enum {
+	CSIO_MAX_FLBUF_PER_IQWR = 4,
+	CSIO_QCREDIT_SZ  = 64,			/* pidx/cidx increments
+						 * in bytes
+						 */
+	CSIO_MAX_QID = 0xFFFF,
+	CSIO_MAX_IQ = 128,
+
+	CSIO_SGE_NTIMERS = 6,
+	CSIO_SGE_NCOUNTERS = 4,
+	CSIO_SGE_FL_SIZE_REGS = 16,
+};
+
+/* Defines for type */
+enum {
+	CSIO_EGRESS	= 1,
+	CSIO_INGRESS	= 2,
+	CSIO_FREELIST	= 3,
+};
+
+/*
+ * Structure for footer (last 2 flits) of Ingress Queue Entry.
+ */
+struct csio_iqwr_footer {
+	__be32			hdrbuflen_pidx;
+	__be32			pldbuflen_qid;
+	union {
+		u8		type_gen;
+		__be64		last_flit;
+	} u;
+};
+
+#define IQWRF_NEWBUF		(1 << 31)
+#define IQWRF_LEN_GET(x)	(((x) >> 0) & 0x7fffffffU)
+#define IQWRF_GEN_SHIFT		7
+#define IQWRF_TYPE_GET(x)	(((x) >> 4) & 0x3U)
+
+
+/*
+ * WR pair:
+ * ========
+ * A WR can start towards the end of a queue, and then continue at the
+ * beginning, since the queue is considered to be circular. This will
+ * require a pair of address/len to be passed back to the caller -
+ * hence the Work request pair structure.
+ */
+struct csio_wr_pair {
+	void			*addr1;
+	uint32_t		size1;
+	void			*addr2;
+	uint32_t		size2;
+};
+
+/*
+ * The following structure is used by ingress processing to return the
+ * free list buffers to consumers.
+ */
+struct csio_fl_dma_buf {
+	struct csio_dma_buf	flbufs[CSIO_MAX_FLBUF_PER_IQWR];
+						/* Freelist DMA buffers */
+	int			offset;		/* Offset within the
+						 * first FL buf.
+						 */
+	uint32_t		totlen;		/* Total length */
+	uint8_t			defer_free;	/* Free of buffer can
+						 * deferred
+						 */
+};
+
+/* Data-types */
+typedef void (*iq_handler_t)(struct csio_hw *, void *, uint32_t,
+			     struct csio_fl_dma_buf *, void *);
+
+struct csio_iq {
+	uint16_t		iqid;		/* Queue ID */
+	uint16_t		physiqid;	/* Physical Queue ID */
+	uint16_t		genbit;		/* Generation bit,
+						 * initially set to 1
+						 */
+	int			flq_idx;	/* Freelist queue index */
+	iq_handler_t		iq_intx_handler; /* IQ INTx handler routine */
+};
+
+struct csio_eq {
+	uint16_t		eqid;		/* Qid */
+	uint16_t		physeqid;	/* Physical Queue ID */
+	uint8_t			wrap[512];	/* Temp area for q-wrap around*/
+};
+
+struct csio_fl {
+	uint16_t		flid;		/* Qid */
+	uint16_t		packen;		/* Packing enabled? */
+	int			offset;		/* Offset within FL buf */
+	int			sreg;		/* Size register */
+	struct csio_dma_buf	*bufs;		/* Free list buffer ptr array
+						 * indexed using flq->cidx/pidx
+						 */
+};
+
+struct csio_qstats {
+	uint32_t	n_tot_reqs;		/* Total no. of Requests */
+	uint32_t	n_tot_rsps;		/* Total no. of responses */
+	uint32_t	n_qwrap;		/* Queue wraps */
+	uint32_t	n_eq_wr_split;		/* Number of split EQ WRs */
+	uint32_t	n_qentry;		/* Queue entry */
+	uint32_t	n_qempty;		/* Queue empty */
+	uint32_t	n_qfull;		/* Queue fulls */
+	uint32_t	n_rsp_unknown;		/* Unknown response type */
+	uint32_t	n_stray_comp;		/* Stray completion intr */
+	uint32_t	n_flq_refill;		/* Number of FL refills */
+};
+
+/* Queue metadata */
+struct csio_q {
+	uint16_t		type;		/* Type: Ingress/Egress/FL */
+	uint16_t		pidx;		/* producer index */
+	uint16_t		cidx;		/* consumer index */
+	uint16_t		inc_idx;	/* Incremental index */
+	uint32_t		wr_sz;		/* Size of all WRs in this q
+						 * if fixed
+						 */
+	void			*vstart;	/* Base virtual address
+						 * of queue
+						 */
+	void			*vwrap;		/* Virtual end address to
+						 * wrap around at
+						 */
+	uint32_t		credits;	/* Size of queue in credits */
+	void			*owner;		/* Owner */
+	union {					/* Queue contexts */
+		struct csio_iq	iq;
+		struct csio_eq	eq;
+		struct csio_fl	fl;
+	} un;
+
+	dma_addr_t		pstart;		/* Base physical address of
+						 * queue
+						 */
+	uint32_t		portid;		/* PCIE Channel */
+	uint32_t		size;		/* Size of queue in bytes */
+	struct csio_qstats	stats;		/* Statistics */
+} ____cacheline_aligned_in_smp;
+
+struct csio_sge {
+	uint32_t	csio_fl_align;		/* Calculated and cached
+						 * for fast path
+						 */
+	uint32_t	sge_control;		/* padding, boundaries,
+						 * lengths, etc.
+						 */
+	uint32_t	sge_host_page_size;	/* Host page size */
+	uint32_t	sge_fl_buf_size[CSIO_SGE_FL_SIZE_REGS];
+						/* free list buffer sizes */
+	uint16_t	timer_val[CSIO_SGE_NTIMERS];
+	uint8_t		counter_val[CSIO_SGE_NCOUNTERS];
+};
+
+/* Work request module */
+struct csio_wrm {
+	int			num_q;		/* Number of queues */
+	struct csio_q		**q_arr;	/* Array of queue pointers
+						 * allocated dynamically
+						 * based on configured values
+						 */
+	uint32_t		fw_iq_start;	/* Start ID of IQ for this fn*/
+	uint32_t		fw_eq_start;	/* Start ID of EQ for this fn*/
+	struct csio_q		*intr_map[CSIO_MAX_IQ];
+						/* IQ-id to IQ map table. */
+	int			free_qidx;	/* queue idx of free queue */
+	struct csio_sge		sge;		/* SGE params */
+};
+
+#define csio_get_q(__hw, __idx)		((__hw)->wrm.q_arr[__idx])
+#define	csio_q_type(__hw, __idx)	((__hw)->wrm.q_arr[(__idx)]->type)
+#define	csio_q_pidx(__hw, __idx)	((__hw)->wrm.q_arr[(__idx)]->pidx)
+#define	csio_q_cidx(__hw, __idx)	((__hw)->wrm.q_arr[(__idx)]->cidx)
+#define	csio_q_inc_idx(__hw, __idx)	((__hw)->wrm.q_arr[(__idx)]->inc_idx)
+#define	csio_q_vstart(__hw, __idx)	((__hw)->wrm.q_arr[(__idx)]->vstart)
+#define	csio_q_pstart(__hw, __idx)	((__hw)->wrm.q_arr[(__idx)]->pstart)
+#define	csio_q_size(__hw, __idx)	((__hw)->wrm.q_arr[(__idx)]->size)
+#define	csio_q_credits(__hw, __idx)	((__hw)->wrm.q_arr[(__idx)]->credits)
+#define	csio_q_portid(__hw, __idx)	((__hw)->wrm.q_arr[(__idx)]->portid)
+#define	csio_q_wr_sz(__hw, __idx)	((__hw)->wrm.q_arr[(__idx)]->wr_sz)
+#define	csio_q_iqid(__hw, __idx)	((__hw)->wrm.q_arr[(__idx)]->un.iq.iqid)
+#define csio_q_physiqid(__hw, __idx)					\
+				((__hw)->wrm.q_arr[(__idx)]->un.iq.physiqid)
+#define csio_q_iq_flq_idx(__hw, __idx)					\
+				((__hw)->wrm.q_arr[(__idx)]->un.iq.flq_idx)
+#define	csio_q_eqid(__hw, __idx)	((__hw)->wrm.q_arr[(__idx)]->un.eq.eqid)
+#define	csio_q_flid(__hw, __idx)	((__hw)->wrm.q_arr[(__idx)]->un.fl.flid)
+
+#define csio_q_physeqid(__hw, __idx)					\
+				((__hw)->wrm.q_arr[(__idx)]->un.eq.physeqid)
+#define csio_iq_has_fl(__iq)		((__iq)->un.iq.flq_idx != -1)
+
+#define csio_q_iq_to_flid(__hw, __iq_idx)				\
+	csio_q_flid((__hw), (__hw)->wrm.q_arr[(__iq_qidx)]->un.iq.flq_idx)
+#define csio_q_set_intr_map(__hw, __iq_idx, __rel_iq_id)		\
+		(__hw)->wrm.intr_map[__rel_iq_id] = csio_get_q(__hw, __iq_idx)
+#define	csio_q_eq_wrap(__hw, __idx)	((__hw)->wrm.q_arr[(__idx)]->un.eq.wrap)
+
+struct csio_mb;
+
+int csio_wr_alloc_q(struct csio_hw *, uint32_t, uint32_t,
+		    uint16_t, void *, uint32_t, int, iq_handler_t);
+int csio_wr_iq_create(struct csio_hw *, void *, int,
+				uint32_t, uint8_t, bool,
+				void (*)(struct csio_hw *, struct csio_mb *));
+int csio_wr_eq_create(struct csio_hw *, void *, int, int, uint8_t,
+				void (*)(struct csio_hw *, struct csio_mb *));
+int csio_wr_destroy_queues(struct csio_hw *, bool cmd);
+
+
+int csio_wr_get(struct csio_hw *, int, uint32_t,
+			  struct csio_wr_pair *);
+void csio_wr_copy_to_wrp(void *, struct csio_wr_pair *, uint32_t, uint32_t);
+int csio_wr_issue(struct csio_hw *, int, bool);
+int csio_wr_process_iq(struct csio_hw *, struct csio_q *,
+				 void (*)(struct csio_hw *, void *,
+					  uint32_t, struct csio_fl_dma_buf *,
+					  void *),
+				 void *);
+int csio_wr_process_iq_idx(struct csio_hw *, int,
+				 void (*)(struct csio_hw *, void *,
+					  uint32_t, struct csio_fl_dma_buf *,
+					  void *),
+				 void *);
+
+void csio_wr_sge_init(struct csio_hw *);
+int csio_wrm_init(struct csio_wrm *, struct csio_hw *);
+void csio_wrm_exit(struct csio_wrm *, struct csio_hw *);
+
+#endif /* ifndef __CSIO_WR_H__ */
-- 
1.7.1


^ permalink raw reply related

* [v6 PATCH 1/8] cxgb4/cxgb4vf: Chelsio FCoE offload driver submission (common header updates).
From: Naresh Kumar Inna @ 2012-11-15 17:11 UTC (permalink / raw)
  To: JBottomley, linux-scsi, dm, leedom; +Cc: netdev, naresh, chethan
In-Reply-To: <1352999484-17812-1-git-send-email-naresh@chelsio.com>

This patch contains updates to firmware/hardware header files shared
between csiostor and cxgb4/cxgb4vf, and the resulting changes to the
cxgb4/cxgb4vf source files.

Signed-off-by: Naresh Kumar Inna <naresh@chelsio.com>
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c |   10 +++---
 drivers/net/ethernet/chelsio/cxgb4/sge.c        |    6 ++--
 drivers/net/ethernet/chelsio/cxgb4/t4_hw.c      |   20 +++++-----
 drivers/net/ethernet/chelsio/cxgb4/t4_msg.h     |    1 +
 drivers/net/ethernet/chelsio/cxgb4/t4_regs.h    |   36 +++++++++++++++++++-
 drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h   |   41 ++++++++++++++++++++---
 drivers/net/ethernet/chelsio/cxgb4vf/sge.c      |    8 ++--
 7 files changed, 93 insertions(+), 29 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 0df1284..17ab96d 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -3203,7 +3203,7 @@ static int adap_init1(struct adapter *adap, struct fw_caps_config_cmd *c)
 	memset(c, 0, sizeof(*c));
 	c->op_to_write = htonl(FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
 			       FW_CMD_REQUEST | FW_CMD_READ);
-	c->retval_len16 = htonl(FW_LEN16(*c));
+	c->cfvalid_to_len16 = htonl(FW_LEN16(*c));
 	ret = t4_wr_mbox(adap, adap->fn, c, sizeof(*c), c);
 	if (ret < 0)
 		return ret;
@@ -3397,7 +3397,7 @@ static int adap_init0_config(struct adapter *adapter, int reset)
 		htonl(FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
 		      FW_CMD_REQUEST |
 		      FW_CMD_READ);
-	caps_cmd.retval_len16 =
+	caps_cmd.cfvalid_to_len16 =
 		htonl(FW_CAPS_CONFIG_CMD_CFVALID |
 		      FW_CAPS_CONFIG_CMD_MEMTYPE_CF(mtype) |
 		      FW_CAPS_CONFIG_CMD_MEMADDR64K_CF(maddr >> 16) |
@@ -3422,7 +3422,7 @@ static int adap_init0_config(struct adapter *adapter, int reset)
 		htonl(FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
 		      FW_CMD_REQUEST |
 		      FW_CMD_WRITE);
-	caps_cmd.retval_len16 = htonl(FW_LEN16(caps_cmd));
+	caps_cmd.cfvalid_to_len16 = htonl(FW_LEN16(caps_cmd));
 	ret = t4_wr_mbox(adapter, adapter->mbox, &caps_cmd, sizeof(caps_cmd),
 			 NULL);
 	if (ret < 0)
@@ -3497,7 +3497,7 @@ static int adap_init0_no_config(struct adapter *adapter, int reset)
 	memset(&caps_cmd, 0, sizeof(caps_cmd));
 	caps_cmd.op_to_write = htonl(FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
 				     FW_CMD_REQUEST | FW_CMD_READ);
-	caps_cmd.retval_len16 = htonl(FW_LEN16(caps_cmd));
+	caps_cmd.cfvalid_to_len16 = htonl(FW_LEN16(caps_cmd));
 	ret = t4_wr_mbox(adapter, adapter->mbox, &caps_cmd, sizeof(caps_cmd),
 			 &caps_cmd);
 	if (ret < 0)
@@ -3929,7 +3929,7 @@ static int adap_init0(struct adapter *adap)
 	memset(&caps_cmd, 0, sizeof(caps_cmd));
 	caps_cmd.op_to_write = htonl(FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
 				     FW_CMD_REQUEST | FW_CMD_READ);
-	caps_cmd.retval_len16 = htonl(FW_LEN16(caps_cmd));
+	caps_cmd.cfvalid_to_len16 = htonl(FW_LEN16(caps_cmd));
 	ret = t4_wr_mbox(adap, adap->mbox, &caps_cmd, sizeof(caps_cmd),
 			 &caps_cmd);
 	if (ret < 0)
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index 3ecc087..fe9a2ea 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -508,7 +508,7 @@ static inline void ring_fl_db(struct adapter *adap, struct sge_fl *q)
 {
 	if (q->pend_cred >= 8) {
 		wmb();
-		t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL), DBPRIO |
+		t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL), DBPRIO(1) |
 			     QID(q->cntxt_id) | PIDX(q->pend_cred / 8));
 		q->pend_cred &= 7;
 	}
@@ -2082,10 +2082,10 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
 			goto fl_nomem;
 
 		flsz = fl->size / 8 + s->stat_len / sizeof(struct tx_desc);
-		c.iqns_to_fl0congen = htonl(FW_IQ_CMD_FL0PACKEN |
+		c.iqns_to_fl0congen = htonl(FW_IQ_CMD_FL0PACKEN(1) |
 					    FW_IQ_CMD_FL0FETCHRO(1) |
 					    FW_IQ_CMD_FL0DATARO(1) |
-					    FW_IQ_CMD_FL0PADEN);
+					    FW_IQ_CMD_FL0PADEN(1));
 		c.fl0dcaen_to_fl0cidxfthresh = htons(FW_IQ_CMD_FL0FBMIN(2) |
 				FW_IQ_CMD_FL0FBMAX(3));
 		c.fl0size = htons(flsz);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index 730ae2c..137e1f8 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -648,12 +648,12 @@ static int sf1_read(struct adapter *adapter, unsigned int byte_cnt, int cont,
 
 	if (!byte_cnt || byte_cnt > 4)
 		return -EINVAL;
-	if (t4_read_reg(adapter, SF_OP) & BUSY)
+	if (t4_read_reg(adapter, SF_OP) & SF_BUSY)
 		return -EBUSY;
 	cont = cont ? SF_CONT : 0;
 	lock = lock ? SF_LOCK : 0;
 	t4_write_reg(adapter, SF_OP, lock | cont | BYTECNT(byte_cnt - 1));
-	ret = t4_wait_op_done(adapter, SF_OP, BUSY, 0, SF_ATTEMPTS, 5);
+	ret = t4_wait_op_done(adapter, SF_OP, SF_BUSY, 0, SF_ATTEMPTS, 5);
 	if (!ret)
 		*valp = t4_read_reg(adapter, SF_DATA);
 	return ret;
@@ -676,14 +676,14 @@ static int sf1_write(struct adapter *adapter, unsigned int byte_cnt, int cont,
 {
 	if (!byte_cnt || byte_cnt > 4)
 		return -EINVAL;
-	if (t4_read_reg(adapter, SF_OP) & BUSY)
+	if (t4_read_reg(adapter, SF_OP) & SF_BUSY)
 		return -EBUSY;
 	cont = cont ? SF_CONT : 0;
 	lock = lock ? SF_LOCK : 0;
 	t4_write_reg(adapter, SF_DATA, val);
 	t4_write_reg(adapter, SF_OP, lock |
 		     cont | BYTECNT(byte_cnt - 1) | OP_WR);
-	return t4_wait_op_done(adapter, SF_OP, BUSY, 0, SF_ATTEMPTS, 5);
+	return t4_wait_op_done(adapter, SF_OP, SF_BUSY, 0, SF_ATTEMPTS, 5);
 }
 
 /**
@@ -2252,14 +2252,14 @@ int t4_wol_pat_enable(struct adapter *adap, unsigned int port, unsigned int map,
 		t4_write_reg(adap, EPIO_REG(DATA0), mask0);
 		t4_write_reg(adap, EPIO_REG(OP), ADDRESS(i) | EPIOWR);
 		t4_read_reg(adap, EPIO_REG(OP));                /* flush */
-		if (t4_read_reg(adap, EPIO_REG(OP)) & BUSY)
+		if (t4_read_reg(adap, EPIO_REG(OP)) & SF_BUSY)
 			return -ETIMEDOUT;
 
 		/* write CRC */
 		t4_write_reg(adap, EPIO_REG(DATA0), crc);
 		t4_write_reg(adap, EPIO_REG(OP), ADDRESS(i + 32) | EPIOWR);
 		t4_read_reg(adap, EPIO_REG(OP));                /* flush */
-		if (t4_read_reg(adap, EPIO_REG(OP)) & BUSY)
+		if (t4_read_reg(adap, EPIO_REG(OP)) & SF_BUSY)
 			return -ETIMEDOUT;
 	}
 #undef EPIO_REG
@@ -2405,7 +2405,7 @@ int t4_fw_hello(struct adapter *adap, unsigned int mbox, unsigned int evt_mbox,
 retry:
 	memset(&c, 0, sizeof(c));
 	INIT_CMD(c, HELLO, WRITE);
-	c.err_to_mbasyncnot = htonl(
+	c.err_to_clearinit = htonl(
 		FW_HELLO_CMD_MASTERDIS(master == MASTER_CANT) |
 		FW_HELLO_CMD_MASTERFORCE(master == MASTER_MUST) |
 		FW_HELLO_CMD_MBMASTER(master == MASTER_MUST ? mbox :
@@ -2426,7 +2426,7 @@ retry:
 		return ret;
 	}
 
-	v = ntohl(c.err_to_mbasyncnot);
+	v = ntohl(c.err_to_clearinit);
 	master_mbox = FW_HELLO_CMD_MBMASTER_GET(v);
 	if (state) {
 		if (v & FW_HELLO_CMD_ERR)
@@ -2774,7 +2774,7 @@ int t4_fw_config_file(struct adapter *adap, unsigned int mbox,
 		htonl(FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
 		      FW_CMD_REQUEST |
 		      FW_CMD_READ);
-	caps_cmd.retval_len16 =
+	caps_cmd.cfvalid_to_len16 =
 		htonl(FW_CAPS_CONFIG_CMD_CFVALID |
 		      FW_CAPS_CONFIG_CMD_MEMTYPE_CF(mtype) |
 		      FW_CAPS_CONFIG_CMD_MEMADDR64K_CF(maddr >> 16) |
@@ -2797,7 +2797,7 @@ int t4_fw_config_file(struct adapter *adap, unsigned int mbox,
 		htonl(FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
 		      FW_CMD_REQUEST |
 		      FW_CMD_WRITE);
-	caps_cmd.retval_len16 = htonl(FW_LEN16(caps_cmd));
+	caps_cmd.cfvalid_to_len16 = htonl(FW_LEN16(caps_cmd));
 	return t4_wr_mbox(adap, mbox, &caps_cmd, sizeof(caps_cmd), NULL);
 }
 
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
index eb71b82..b760808 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
@@ -658,6 +658,7 @@ struct ulptx_sgl {
 	__be32 cmd_nsge;
 #define ULPTX_CMD(x) ((x) << 24)
 #define ULPTX_NSGE(x) ((x) << 0)
+#define ULPTX_MORE (1U << 23)
 	__be32 len0;
 	__be64 addr0;
 	struct ulptx_sge_pair sge[0];
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h
index a1a8b57..75393f5 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h
@@ -67,7 +67,7 @@
 #define  QID_MASK    0xffff8000U
 #define  QID_SHIFT   15
 #define  QID(x)      ((x) << QID_SHIFT)
-#define  DBPRIO      0x00004000U
+#define  DBPRIO(x)   ((x) << 14)
 #define  PIDX_MASK   0x00003fffU
 #define  PIDX_SHIFT  0
 #define  PIDX(x)     ((x) << PIDX_SHIFT)
@@ -193,6 +193,12 @@
 #define SGE_FL_BUFFER_SIZE1 0x1048
 #define SGE_FL_BUFFER_SIZE2 0x104c
 #define SGE_FL_BUFFER_SIZE3 0x1050
+#define SGE_FL_BUFFER_SIZE4 0x1054
+#define SGE_FL_BUFFER_SIZE5 0x1058
+#define SGE_FL_BUFFER_SIZE6 0x105c
+#define SGE_FL_BUFFER_SIZE7 0x1060
+#define SGE_FL_BUFFER_SIZE8 0x1064
+
 #define SGE_INGRESS_RX_THRESHOLD 0x10a0
 #define  THRESHOLD_0_MASK   0x3f000000U
 #define  THRESHOLD_0_SHIFT  24
@@ -217,6 +223,17 @@
 #define  EGRTHRESHOLD(x)     ((x) << EGRTHRESHOLDshift)
 #define  EGRTHRESHOLD_GET(x) (((x) & EGRTHRESHOLD_MASK) >> EGRTHRESHOLDshift)
 
+#define SGE_DBFIFO_STATUS 0x10a4
+#define  HP_INT_THRESH_SHIFT 28
+#define  HP_INT_THRESH_MASK  0xfU
+#define  HP_INT_THRESH(x)    ((x) << HP_INT_THRESH_SHIFT)
+#define  LP_INT_THRESH_SHIFT 12
+#define  LP_INT_THRESH_MASK  0xfU
+#define  LP_INT_THRESH(x)    ((x) << LP_INT_THRESH_SHIFT)
+
+#define SGE_DOORBELL_CONTROL 0x10a8
+#define  ENABLE_DROP        (1 << 13)
+
 #define SGE_TIMER_VALUE_0_AND_1 0x10b8
 #define  TIMERVALUE0_MASK   0xffff0000U
 #define  TIMERVALUE0_SHIFT  16
@@ -277,6 +294,10 @@
 #define A_SGE_CTXT_CMD 0x11fc
 #define A_SGE_DBQ_CTXT_BADDR 0x1084
 
+#define PCIE_PF_CFG 0x40
+#define  AIVEC(x)	((x) << 4)
+#define  AIVEC_MASK	0x3ffU
+
 #define PCIE_PF_CLI 0x44
 #define PCIE_INT_CAUSE 0x3004
 #define  UNXSPLCPLERR  0x20000000U
@@ -322,6 +343,13 @@
 #define PCIE_MEM_ACCESS_OFFSET 0x306c
 
 #define PCIE_FW 0x30b8
+#define  PCIE_FW_ERR		0x80000000U
+#define  PCIE_FW_INIT		0x40000000U
+#define  PCIE_FW_HALT		0x20000000U
+#define  PCIE_FW_MASTER_VLD	0x00008000U
+#define  PCIE_FW_MASTER(x)	((x) << 12)
+#define  PCIE_FW_MASTER_MASK	0x7
+#define  PCIE_FW_MASTER_GET(x)	(((x) >> 12) & PCIE_FW_MASTER_MASK)
 
 #define PCIE_CORE_UTL_SYSTEM_BUS_AGENT_STATUS 0x5908
 #define  RNPP 0x80000000U
@@ -432,6 +460,9 @@
 #define  MBOWNER(x)     ((x) << MBOWNER_SHIFT)
 #define  MBOWNER_GET(x) (((x) & MBOWNER_MASK) >> MBOWNER_SHIFT)
 
+#define CIM_PF_HOST_INT_ENABLE 0x288
+#define  MBMSGRDYINTEN(x) ((x) << 19)
+
 #define CIM_PF_HOST_INT_CAUSE 0x28c
 #define  MBMSGRDYINT 0x00080000U
 
@@ -922,7 +953,7 @@
 
 #define SF_DATA 0x193f8
 #define SF_OP 0x193fc
-#define  BUSY          0x80000000U
+#define  SF_BUSY       0x80000000U
 #define  SF_LOCK       0x00000010U
 #define  SF_CONT       0x00000008U
 #define  BYTECNT_MASK  0x00000006U
@@ -981,6 +1012,7 @@
 #define  I2CM       0x00000002U
 #define  CIM        0x00000001U
 
+#define PL_INT_ENABLE 0x19410
 #define PL_INT_MAP0 0x19414
 #define PL_RST 0x19428
 #define  PIORST     0x00000002U
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
index a636463..0abc864 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
@@ -68,6 +68,7 @@ struct fw_wr_hdr {
 };
 
 #define FW_WR_OP(x)	 ((x) << 24)
+#define FW_WR_OP_GET(x)	 (((x) >> 24) & 0xff)
 #define FW_WR_ATOMIC(x)	 ((x) << 23)
 #define FW_WR_FLUSH(x)   ((x) << 22)
 #define FW_WR_COMPL(x)   ((x) << 21)
@@ -222,6 +223,7 @@ struct fw_cmd_hdr {
 #define FW_CMD_OP(x)		((x) << 24)
 #define FW_CMD_OP_GET(x)        (((x) >> 24) & 0xff)
 #define FW_CMD_REQUEST          (1U << 23)
+#define FW_CMD_REQUEST_GET(x)   (((x) >> 23) & 0x1)
 #define FW_CMD_READ		(1U << 22)
 #define FW_CMD_WRITE		(1U << 21)
 #define FW_CMD_EXEC		(1U << 20)
@@ -229,6 +231,7 @@ struct fw_cmd_hdr {
 #define FW_CMD_RETVAL(x)	((x) << 8)
 #define FW_CMD_RETVAL_GET(x)	(((x) >> 8) & 0xff)
 #define FW_CMD_LEN16(x)         ((x) << 0)
+#define FW_LEN16(fw_struct)	FW_CMD_LEN16(sizeof(fw_struct) / 16)
 
 enum fw_ldst_addrspc {
 	FW_LDST_ADDRSPC_FIRMWARE  = 0x0001,
@@ -241,7 +244,8 @@ enum fw_ldst_addrspc {
 	FW_LDST_ADDRSPC_TP_MIB    = 0x0012,
 	FW_LDST_ADDRSPC_MDIO      = 0x0018,
 	FW_LDST_ADDRSPC_MPS       = 0x0020,
-	FW_LDST_ADDRSPC_FUNC      = 0x0028
+	FW_LDST_ADDRSPC_FUNC      = 0x0028,
+	FW_LDST_ADDRSPC_FUNC_PCIE = 0x0029,
 };
 
 enum fw_ldst_mps_fid {
@@ -303,6 +307,16 @@ struct fw_ldst_cmd {
 			__be64 data0;
 			__be64 data1;
 		} func;
+		struct fw_ldst_pcie {
+			u8 ctrl_to_fn;
+			u8 bnum;
+			u8 r;
+			u8 ext_r;
+			u8 select_naccess;
+			u8 pcie_fn;
+			__be16 nset_pkd;
+			__be32 data[12];
+		} pcie;
 	} u;
 };
 
@@ -312,6 +326,9 @@ struct fw_ldst_cmd {
 #define FW_LDST_CMD_FID(x)	((x) << 15)
 #define FW_LDST_CMD_CTL(x)	((x) << 0)
 #define FW_LDST_CMD_RPLCPF(x)	((x) << 0)
+#define FW_LDST_CMD_LC		(1U << 4)
+#define FW_LDST_CMD_NACCESS(x)	((x) << 0)
+#define FW_LDST_CMD_FN(x)	((x) << 0)
 
 struct fw_reset_cmd {
 	__be32 op_to_write;
@@ -333,7 +350,7 @@ enum fw_hellow_cmd {
 struct fw_hello_cmd {
 	__be32 op_to_write;
 	__be32 retval_len16;
-	__be32 err_to_mbasyncnot;
+	__be32 err_to_clearinit;
 #define FW_HELLO_CMD_ERR	    (1U << 31)
 #define FW_HELLO_CMD_INIT	    (1U << 30)
 #define FW_HELLO_CMD_MASTERDIS(x)   ((x) << 29)
@@ -343,6 +360,7 @@ struct fw_hello_cmd {
 #define FW_HELLO_CMD_MBMASTER(x)     ((x) << FW_HELLO_CMD_MBMASTER_SHIFT)
 #define FW_HELLO_CMD_MBMASTER_GET(x) \
 	(((x) >> FW_HELLO_CMD_MBMASTER_SHIFT) & FW_HELLO_CMD_MBMASTER_MASK)
+#define FW_HELLO_CMD_MBASYNCNOTINT(x)	((x) << 23)
 #define FW_HELLO_CMD_MBASYNCNOT(x)  ((x) << 20)
 #define FW_HELLO_CMD_STAGE(x)       ((x) << 17)
 #define FW_HELLO_CMD_CLEARINIT      (1U << 16)
@@ -428,6 +446,7 @@ enum fw_caps_config_iscsi {
 enum fw_caps_config_fcoe {
 	FW_CAPS_CONFIG_FCOE_INITIATOR	= 0x00000001,
 	FW_CAPS_CONFIG_FCOE_TARGET	= 0x00000002,
+	FW_CAPS_CONFIG_FCOE_CTRL_OFLD	= 0x00000004,
 };
 
 enum fw_memtype_cf {
@@ -440,7 +459,7 @@ enum fw_memtype_cf {
 
 struct fw_caps_config_cmd {
 	__be32 op_to_write;
-	__be32 retval_len16;
+	__be32 cfvalid_to_len16;
 	__be32 r2;
 	__be32 hwmbitmap;
 	__be16 nbmcaps;
@@ -701,8 +720,8 @@ struct fw_iq_cmd {
 #define FW_IQ_CMD_FL0FETCHRO(x) ((x) << 6)
 #define FW_IQ_CMD_FL0HOSTFCMODE(x) ((x) << 4)
 #define FW_IQ_CMD_FL0CPRIO(x) ((x) << 3)
-#define FW_IQ_CMD_FL0PADEN (1U << 2)
-#define FW_IQ_CMD_FL0PACKEN (1U << 1)
+#define FW_IQ_CMD_FL0PADEN(x) ((x) << 2)
+#define FW_IQ_CMD_FL0PACKEN(x) ((x) << 1)
 #define FW_IQ_CMD_FL0CONGEN (1U << 0)
 
 #define FW_IQ_CMD_FL0DCAEN(x) ((x) << 15)
@@ -1190,6 +1209,14 @@ enum fw_port_dcb_cfg_rc {
 	FW_PORT_DCB_CFG_ERROR	= 0x1
 };
 
+enum fw_port_dcb_type {
+	FW_PORT_DCB_TYPE_PGID		= 0x00,
+	FW_PORT_DCB_TYPE_PGRATE		= 0x01,
+	FW_PORT_DCB_TYPE_PRIORATE	= 0x02,
+	FW_PORT_DCB_TYPE_PFC		= 0x03,
+	FW_PORT_DCB_TYPE_APP_ID		= 0x04,
+};
+
 struct fw_port_cmd {
 	__be32 op_to_portid;
 	__be32 action_to_len16;
@@ -1257,6 +1284,7 @@ struct fw_port_cmd {
 #define FW_PORT_CMD_TXIPG(x) ((x) << 19)
 
 #define FW_PORT_CMD_LSTATUS (1U << 31)
+#define FW_PORT_CMD_LSTATUS_GET(x) (((x) >> 31) & 0x1)
 #define FW_PORT_CMD_LSPEED(x) ((x) << 24)
 #define FW_PORT_CMD_LSPEED_GET(x) (((x) >> 24) & 0x3f)
 #define FW_PORT_CMD_TXPAUSE (1U << 23)
@@ -1305,6 +1333,9 @@ enum fw_port_module_type {
 	FW_PORT_MOD_TYPE_TWINAX_PASSIVE,
 	FW_PORT_MOD_TYPE_TWINAX_ACTIVE,
 	FW_PORT_MOD_TYPE_LRM,
+	FW_PORT_MOD_TYPE_ERROR		= FW_PORT_CMD_MODTYPE_MASK - 3,
+	FW_PORT_MOD_TYPE_UNKNOWN	= FW_PORT_CMD_MODTYPE_MASK - 2,
+	FW_PORT_MOD_TYPE_NOTSUPPORTED	= FW_PORT_CMD_MODTYPE_MASK - 1,
 
 	FW_PORT_MOD_TYPE_NONE = FW_PORT_CMD_MODTYPE_MASK
 };
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
index f16745f..92170d5 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
@@ -536,7 +536,7 @@ static inline void ring_fl_db(struct adapter *adapter, struct sge_fl *fl)
 	if (fl->pend_cred >= FL_PER_EQ_UNIT) {
 		wmb();
 		t4_write_reg(adapter, T4VF_SGE_BASE_ADDR + SGE_VF_KDOORBELL,
-			     DBPRIO |
+			     DBPRIO(1) |
 			     QID(fl->cntxt_id) |
 			     PIDX(fl->pend_cred / FL_PER_EQ_UNIT));
 		fl->pend_cred %= FL_PER_EQ_UNIT;
@@ -952,7 +952,7 @@ static inline void ring_tx_db(struct adapter *adapter, struct sge_txq *tq,
 	 * Warn if we write doorbells with the wrong priority and write
 	 * descriptors before telling HW.
 	 */
-	WARN_ON((QID(tq->cntxt_id) | PIDX(n)) & DBPRIO);
+	WARN_ON((QID(tq->cntxt_id) | PIDX(n)) & DBPRIO(1));
 	wmb();
 	t4_write_reg(adapter, T4VF_SGE_BASE_ADDR + SGE_VF_KDOORBELL,
 		     QID(tq->cntxt_id) | PIDX(n));
@@ -2126,8 +2126,8 @@ int t4vf_sge_alloc_rxq(struct adapter *adapter, struct sge_rspq *rspq,
 		cmd.iqns_to_fl0congen =
 			cpu_to_be32(
 				FW_IQ_CMD_FL0HOSTFCMODE(SGE_HOSTFCMODE_NONE) |
-				FW_IQ_CMD_FL0PACKEN |
-				FW_IQ_CMD_FL0PADEN);
+				FW_IQ_CMD_FL0PACKEN(1) |
+				FW_IQ_CMD_FL0PADEN(1));
 		cmd.fl0dcaen_to_fl0cidxfthresh =
 			cpu_to_be16(
 				FW_IQ_CMD_FL0FBMIN(SGE_FETCHBURSTMIN_64B) |
-- 
1.7.1


^ permalink raw reply related

* [v6 PATCH 0/8] csiostor: Chelsio FCoE offload driver submission
From: Naresh Kumar Inna @ 2012-11-15 17:11 UTC (permalink / raw)
  To: JBottomley, linux-scsi, dm, leedom; +Cc: netdev, naresh, chethan

This is the initial submission of the Chelsio FCoE offload driver (csiostor)
to the upstream kernel. This driver currently supports FCoE offload
functionality over Chelsio T4-based 10Gb Converged Network Adapters.

The following patches contain the driver sources for csiostor driver and
updates to firmware/hardware header files shared between csiostor,
cxgb4 (Chelsio T4-based NIC driver) and cxgb4vf (Chelsio T4-based Virtual
Function NIC driver). The csiostor driver is dependent on these
header updates. These patches have been generated against scsi 'misc' branch.

csiostor is a low level SCSI driver that interfaces with PCI, SCSI midlayer and
FC transport subsystems. This driver claims the FCoE PCIe function on
Chelsio Converged Network Adapters. It relies on firmware events for slow path
operations like discovery, thereby offloading session management. The driver
programs firmware via Work Request interfaces for fast path I/O offload
features.

v6 has changes to make csiostor work with cxgb4 header files post 3.6 merge, as
well as a few bug fixes in csiostor.

Here is the brief description of patches:
[v6 PATCH 1/8]: Updates to header files shared between cxgb4, cxgb4vf and
                csiostor.
[v6 PATCH 2/8]: Header files part 1.
[v6 PATCH 3/8]: Header files part 2.
[v6 PATCH 4/8]: Driver initialization and Work Request services.
[v6 PATCH 5/8]: FC transport interfaces and mailbox services.
[v6 PATCH 6/8]: Local and remote port state tracking functionality.
[v6 PATCH 7/8]: Interrupt handling and fast path I/O functionality.
[v6 PATCH 8/8]: Hardware interface, Makefile and Kconfig changes.

Naresh Kumar Inna (8):
  cxgb4/cxgb4vf: Chelsio FCoE offload driver submission (common header
    updates).
  csiostor: Chelsio FCoE offload driver submission (headers part 1).
  csiostor: Chelsio FCoE offload driver submission (headers part 2).
  csiostor: Chelsio FCoE offload driver submission (sources part 1).
  csiostor: Chelsio FCoE offload driver submission (sources part 2).
  csiostor: Chelsio FCoE offload driver submission (sources part 3).
  csiostor: Chelsio FCoE offload driver submission (sources part 4).
  csiostor: Chelsio FCoE offload driver submission (sources part 5).

 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c |   10 +-
 drivers/net/ethernet/chelsio/cxgb4/sge.c        |    6 +-
 drivers/net/ethernet/chelsio/cxgb4/t4_hw.c      |   20 +-
 drivers/net/ethernet/chelsio/cxgb4/t4_msg.h     |    1 +
 drivers/net/ethernet/chelsio/cxgb4/t4_regs.h    |   36 +-
 drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h   |   41 +-
 drivers/net/ethernet/chelsio/cxgb4vf/sge.c      |    8 +-
 drivers/scsi/Kconfig                            |    1 +
 drivers/scsi/Makefile                           |    1 +
 drivers/scsi/csiostor/Kconfig                   |   19 +
 drivers/scsi/csiostor/Makefile                  |   11 +
 drivers/scsi/csiostor/csio_attr.c               |  796 ++++
 drivers/scsi/csiostor/csio_defs.h               |  121 +
 drivers/scsi/csiostor/csio_hw.c                 | 4395 +++++++++++++++++++++++
 drivers/scsi/csiostor/csio_hw.h                 |  667 ++++
 drivers/scsi/csiostor/csio_init.c               | 1274 +++++++
 drivers/scsi/csiostor/csio_init.h               |  158 +
 drivers/scsi/csiostor/csio_isr.c                |  624 ++++
 drivers/scsi/csiostor/csio_lnode.c              | 2133 +++++++++++
 drivers/scsi/csiostor/csio_lnode.h              |  255 ++
 drivers/scsi/csiostor/csio_mb.c                 | 1770 +++++++++
 drivers/scsi/csiostor/csio_mb.h                 |  278 ++
 drivers/scsi/csiostor/csio_rnode.c              |  912 +++++
 drivers/scsi/csiostor/csio_rnode.h              |  141 +
 drivers/scsi/csiostor/csio_scsi.c               | 2555 +++++++++++++
 drivers/scsi/csiostor/csio_scsi.h               |  342 ++
 drivers/scsi/csiostor/csio_wr.c                 | 1632 +++++++++
 drivers/scsi/csiostor/csio_wr.h                 |  512 +++
 drivers/scsi/csiostor/t4fw_api_stor.h           |  578 +++
 29 files changed, 19268 insertions(+), 29 deletions(-)
 create mode 100644 drivers/scsi/csiostor/Kconfig
 create mode 100644 drivers/scsi/csiostor/Makefile
 create mode 100644 drivers/scsi/csiostor/csio_attr.c
 create mode 100644 drivers/scsi/csiostor/csio_defs.h
 create mode 100644 drivers/scsi/csiostor/csio_hw.c
 create mode 100644 drivers/scsi/csiostor/csio_hw.h
 create mode 100644 drivers/scsi/csiostor/csio_init.c
 create mode 100644 drivers/scsi/csiostor/csio_init.h
 create mode 100644 drivers/scsi/csiostor/csio_isr.c
 create mode 100644 drivers/scsi/csiostor/csio_lnode.c
 create mode 100644 drivers/scsi/csiostor/csio_lnode.h
 create mode 100644 drivers/scsi/csiostor/csio_mb.c
 create mode 100644 drivers/scsi/csiostor/csio_mb.h
 create mode 100644 drivers/scsi/csiostor/csio_rnode.c
 create mode 100644 drivers/scsi/csiostor/csio_rnode.h
 create mode 100644 drivers/scsi/csiostor/csio_scsi.c
 create mode 100644 drivers/scsi/csiostor/csio_scsi.h
 create mode 100644 drivers/scsi/csiostor/csio_wr.c
 create mode 100644 drivers/scsi/csiostor/csio_wr.h
 create mode 100644 drivers/scsi/csiostor/t4fw_api_stor.h


^ permalink raw reply

* Re: [PATCH net-next] tunnel: use the correct endian for some fields
From: Stephen Hemminger @ 2012-11-15 17:08 UTC (permalink / raw)
  To: Cong Wang; +Cc: nicolas.dichtel, netdev, David S. Miller
In-Reply-To: <1352970275.31884.37.camel@cr0>

On Thu, 15 Nov 2012 17:04:35 +0800
Cong Wang <amwang@redhat.com> wrote:

> On Thu, 2012-11-15 at 09:57 +0100, Nicolas Dichtel wrote:
> > Le 15/11/2012 09:51, Cong Wang a écrit :
> > > Fengguang reported:
> > >
> > > net/ipv6/ip6_tunnel.c:1571:33: sparse: incorrect type in assignment (different base types)
> > > net/ipv6/ip6_tunnel.c:1571:33:    expected restricted __be32 [usertype] flowinfo
> > > net/ipv6/ip6_tunnel.c:1571:33:    got unsigned int
> > >
> > > for these fields, we need to use the correct endian wrapers.
> > >
> > > Reported-by: Fengguang Wu <fengguang.wu@intel.com>
> > > Cc: Nicolas Dichtel <nicolas.dichtel@6wind.com>
> > > Cc: David S. Miller <davem@davemloft.net>
> > > Signed-off-by: Cong Wang <amwang@redhat.com>
> > Just one minute before my patch.
> > Your patch does not fix all warnings (i_flags & SIT_ISATAP in ip6_tunnel.c).
> 
> Yeah, then ignore this patch. :)

i_flags is a mess right now, it mixed host and bigendian values in the
same field. Also SIT and VTI flags overlap.

^ permalink raw reply

* Re: [PATCH 3.7.0-rc4] of/net/mdio-gpio: Fix pdev->id issue when using devicetrees.
From: Grant Likely @ 2012-11-15 16:59 UTC (permalink / raw)
  To: Srinivas KANDAGATLA, netdev; +Cc: devicetree-discuss, davem
In-Reply-To: <1352816773-17837-1-git-send-email-srinivas.kandagatla@st.com>

On Tue, 13 Nov 2012 14:26:13 +0000, Srinivas KANDAGATLA <srinivas.kandagatla@st.com> wrote:
> From: Srinivas Kandagatla <srinivas.kandagatla@st.com>
> 
> When the mdio-gpio driver is probed via device trees, the platform
> device id is set as -1, However the id is re-used in the code while
> creating an mdio bus.
> So, setting up the id via aliases from device tree is a sensible
> solution to fix this issue.
> 
> Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@st.com>
> ---
>  .../devicetree/bindings/net/mdio-gpio.txt          |    9 ++++++++-
>  drivers/net/phy/mdio-gpio.c                        |    1 +
>  2 files changed, 9 insertions(+), 1 deletions(-)
> 
> diff --git a/Documentation/devicetree/bindings/net/mdio-gpio.txt b/Documentation/devicetree/bindings/net/mdio-gpio.txt
> index bc95495..c79bab0 100644
> --- a/Documentation/devicetree/bindings/net/mdio-gpio.txt
> +++ b/Documentation/devicetree/bindings/net/mdio-gpio.txt
> @@ -8,9 +8,16 @@ gpios property as described in section VIII.1 in the following order:
>  
>  MDC, MDIO.
>  
> +Note: Each gpio-mdio bus should have an alias correctly numbered in "aliases"
> +node.
> +
>  Example:
>  
> -mdio {
> +aliases {
> +	mdio-gpio0 = <&mdio0>;
> +};
> +
> +mdio0: mdio {
>  	compatible = "virtual,mdio-gpio";
>  	#address-cells = <1>;
>  	#size-cells = <0>;
> diff --git a/drivers/net/phy/mdio-gpio.c b/drivers/net/phy/mdio-gpio.c
> index 899274f..e3f3115 100644
> --- a/drivers/net/phy/mdio-gpio.c
> +++ b/drivers/net/phy/mdio-gpio.c
> @@ -56,6 +56,7 @@ static void *mdio_gpio_of_get_data(struct platform_device *pdev)
>  	if (ret < 0)
>  		return NULL;
>  	pdata->mdio = ret;
> +	pdev->id = of_alias_get_id(np, "mdio-gpio");

This is actually illegal. Once a device is registered, pdev->id must not
be changed. Same goes for pdev->name and pdev->dev.platform_data.

g.

^ permalink raw reply

* [PATCH v2 net-next 17/22] bnx2x: Support of PF driver of a VF q_teardown request
From: Ariel Elior @ 2012-11-15 16:47 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Ariel Elior, Eilon Greenstein
In-Reply-To: <1352998067-9707-1-git-send-email-ariele@broadcom.com>

The 'q_teardown' request is basically the opposite of the 'q_setup'.
Here the PF driver removes from the device the queue it opened against
the VF fastpath ring at 'setup_q' stage, along with all related
rx_mode info.

Signed-off-by: Ariel Elior <ariele@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c |  268 +++++++++++++++++++++
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h |    5 +
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c  |   20 ++
 3 files changed, 293 insertions(+), 0 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
index 82a9654..146ad65 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
@@ -109,6 +109,13 @@ enum bnx2x_vfop_qctor_state {
 	   BNX2X_VFOP_QCTOR_INT_EN
 };
 
+enum bnx2x_vfop_qdtor_state {
+	   BNX2X_VFOP_QDTOR_HALT,
+	   BNX2X_VFOP_QDTOR_TERMINATE,
+	   BNX2X_VFOP_QDTOR_CFCDEL,
+	   BNX2X_VFOP_QDTOR_DONE
+};
+
 enum bnx2x_vfop_vlan_mac_state {
 	   BNX2X_VFOP_VLAN_MAC_CONFIG_SINGLE,
 	   BNX2X_VFOP_VLAN_MAC_CLEAR,
@@ -135,6 +142,14 @@ enum bnx2x_vfop_rxmode_state {
 	   BNX2X_VFOP_RXMODE_DONE
 };
 
+enum bnx2x_vfop_qteardown_state {
+	   BNX2X_VFOP_QTEARDOWN_RXMODE,
+	   BNX2X_VFOP_QTEARDOWN_CLR_VLAN,
+	   BNX2X_VFOP_QTEARDOWN_CLR_MAC,
+	   BNX2X_VFOP_QTEARDOWN_QDTOR,
+	   BNX2X_VFOP_QTEARDOWN_DONE
+};
+
 #define bnx2x_vfop_reset_wq(vf)	atomic_set(&vf->op_in_progress, 0)
 
 void bnx2x_vfop_qctor_dump_tx(struct bnx2x *bp, struct bnx2x_virtf *vf,
@@ -341,6 +356,102 @@ static int bnx2x_vfop_qctor_cmd(struct bnx2x *bp,
 	return -ENOMEM;
 }
 
+/* VFOP queue destruction */
+static void bnx2x_vfop_qdtor(struct bnx2x *bp, struct bnx2x_virtf *vf)
+{
+	struct bnx2x_vfop *vfop = bnx2x_vfop_cur(bp, vf);
+	struct bnx2x_vfop_args_qdtor *qdtor = &vfop->args.qdtor;
+	struct bnx2x_queue_state_params *q_params = &vfop->op_p->qctor.qstate;
+	enum bnx2x_vfop_qdtor_state state = vfop->state;
+
+	bnx2x_vfop_reset_wq(vf);
+
+	if (vfop->rc < 0)
+		goto op_err;
+
+	DP(BNX2X_MSG_IOV, "vf[%d] STATE: %d\n", vf->abs_vfid, state);
+
+	switch (state) {
+	case BNX2X_VFOP_QDTOR_HALT:
+
+		/* has this queue already been stopped? */
+		if (bnx2x_get_q_logical_state(bp, q_params->q_obj) ==
+		    BNX2X_Q_LOGICAL_STATE_STOPPED) {
+			DP(BNX2X_MSG_IOV,
+			   "Entered qdtor but queue was already stopped. Aborting gracefully\n");
+			goto op_done;
+		}
+
+		/* next state */
+		vfop->state = BNX2X_VFOP_QDTOR_TERMINATE;
+
+		q_params->cmd = BNX2X_Q_CMD_HALT;
+		vfop->rc = bnx2x_queue_state_change(bp, q_params);
+
+		bnx2x_vfop_finalize(vf, vfop->rc, VFOP_CONT);
+
+	case BNX2X_VFOP_QDTOR_TERMINATE:
+		/* next state */
+		vfop->state = BNX2X_VFOP_QDTOR_CFCDEL;
+
+		q_params->cmd = BNX2X_Q_CMD_TERMINATE;
+		vfop->rc = bnx2x_queue_state_change(bp, q_params);
+
+		bnx2x_vfop_finalize(vf, vfop->rc, VFOP_CONT);
+
+	case BNX2X_VFOP_QDTOR_CFCDEL:
+		/* next state */
+		vfop->state = BNX2X_VFOP_QDTOR_DONE;
+
+		q_params->cmd = BNX2X_Q_CMD_CFC_DEL;
+		vfop->rc = bnx2x_queue_state_change(bp, q_params);
+
+		bnx2x_vfop_finalize(vf, vfop->rc, VFOP_DONE);
+op_err:
+	BNX2X_ERR("QDTOR[%d:%d] error: cmd %d, rc %d\n",
+		  vf->abs_vfid, qdtor->qid, q_params->cmd, vfop->rc);
+op_done:
+	case BNX2X_VFOP_QDTOR_DONE:
+		/* invalidate the context */
+		qdtor->cxt->ustorm_ag_context.cdu_usage = 0;
+		qdtor->cxt->xstorm_ag_context.cdu_reserved = 0;
+		bnx2x_vfop_end(bp, vf, vfop);
+		return;
+	default:
+		bnx2x_vfop_default(state);
+	}
+op_pending:
+	return;
+}
+
+static int bnx2x_vfop_qdtor_cmd(struct bnx2x *bp,
+				struct bnx2x_virtf *vf,
+				struct bnx2x_vfop_cmd *cmd,
+				int qid)
+{
+	struct bnx2x_vfop *vfop = bnx2x_vfop_add(bp, vf);
+
+
+	if (vfop) {
+		struct bnx2x_queue_state_params *qstate =
+			&vf->op_params.qctor.qstate;
+
+		memset(qstate, 0, sizeof(*qstate));
+		qstate->q_obj = &bnx2x_vfq(vf, qid, sp_obj);
+
+		vfop->args.qdtor.qid = qid;
+		vfop->args.qdtor.cxt = bnx2x_vfq(vf, qid, cxt);
+
+		bnx2x_vfop_opset(BNX2X_VFOP_QDTOR_HALT,
+				 bnx2x_vfop_qdtor, cmd->done);
+		return bnx2x_vfop_transition(bp, vf, bnx2x_vfop_qdtor,
+					     cmd->block);
+	}
+	DP(BNX2X_MSG_IOV, "VF[%d] failed to add a vfop. rc %d\n",
+	   vf->abs_vfid, vfop->rc);
+	return -ENOMEM;
+}
+
 static void __devinit
 bnx2x_vf_set_igu_info(struct bnx2x *bp, u8 igu_sb_id, u8 abs_vfid)
 {
@@ -592,6 +703,44 @@ bnx2x_vfop_mac_prep_ramrod(struct bnx2x_vlan_mac_ramrod_params *ramrod,
 	set_bit(BNX2X_ETH_MAC, &ramrod->user_req.vlan_mac_flags);
 }
 
+static int bnx2x_vfop_mac_delall_cmd(struct bnx2x *bp,
+				     struct bnx2x_virtf *vf,
+				     struct bnx2x_vfop_cmd *cmd,
+				     int qid, bool drv_only)
+{
+
+	struct bnx2x_vfop *vfop = bnx2x_vfop_add(bp, vf);
+
+	if (vfop) {
+		struct bnx2x_vfop_args_filters filters = {
+			.multi_filter = NULL,	/* single */
+			.credit = NULL,		/* consume credit */
+		};
+		struct bnx2x_vfop_vlan_mac_flags flags = {
+			.drv_only = drv_only,
+			.dont_consume = (filters.credit != NULL),
+			.single_cmd = true,
+			.add = false /* don't care */,
+		};
+		struct bnx2x_vlan_mac_ramrod_params *ramrod =
+			&vf->op_params.vlan_mac;
+
+		/* set ramrod params */
+		bnx2x_vfop_mac_prep_ramrod(ramrod, &flags);
+
+		/* set object */
+		ramrod->vlan_mac_obj = &bnx2x_vfq(vf, qid, mac_obj);
+
+		/* set extra args */
+		vfop->args.filters = filters;
+
+		bnx2x_vfop_opset(BNX2X_VFOP_VLAN_MAC_CLEAR,
+				 bnx2x_vfop_vlan_mac, cmd->done);
+		return bnx2x_vfop_transition(bp, vf, bnx2x_vfop_vlan_mac,
+					     cmd->block);
+	}
+	return -ENOMEM;
+}
 int bnx2x_vfop_mac_list_cmd(struct bnx2x *bp,
 			    struct bnx2x_virtf *vf,
 			    struct bnx2x_vfop_cmd *cmd,
@@ -674,6 +823,44 @@ int bnx2x_vfop_vlan_set_cmd(struct bnx2x *bp,
 	return -ENOMEM;
 }
 
+static int bnx2x_vfop_vlan_delall_cmd(struct bnx2x *bp,
+			       struct bnx2x_virtf *vf,
+			       struct bnx2x_vfop_cmd *cmd,
+			       int qid, bool drv_only)
+{
+	struct bnx2x_vfop *vfop = bnx2x_vfop_add(bp, vf);
+
+	if (vfop) {
+		struct bnx2x_vfop_args_filters filters = {
+			.multi_filter = NULL, /* single command */
+			.credit = &bnx2x_vfq(vf, qid, vlan_count),
+		};
+		struct bnx2x_vfop_vlan_mac_flags flags = {
+			.drv_only = drv_only,
+			.dont_consume = (filters.credit != NULL),
+			.single_cmd = true,
+			.add = false, /* don't care */
+		};
+		struct bnx2x_vlan_mac_ramrod_params *ramrod =
+			&vf->op_params.vlan_mac;
+
+		/* set ramrod params */
+		bnx2x_vfop_vlan_mac_prep_ramrod(ramrod, &flags);
+
+		/* set object */
+		ramrod->vlan_mac_obj = &bnx2x_vfq(vf, qid, vlan_obj);
+
+		/* set extra args */
+		vfop->args.filters = filters;
+
+		bnx2x_vfop_opset(BNX2X_VFOP_VLAN_MAC_CLEAR,
+				 bnx2x_vfop_vlan_mac, cmd->done);
+		return bnx2x_vfop_transition(bp, vf, bnx2x_vfop_vlan_mac,
+					     cmd->block);
+	}
+	return -ENOMEM;
+}
+
 int bnx2x_vfop_vlan_list_cmd(struct bnx2x *bp,
 			     struct bnx2x_virtf *vf,
 			     struct bnx2x_vfop_cmd *cmd,
@@ -958,6 +1145,87 @@ int bnx2x_vfop_rxmode_cmd(struct bnx2x *bp,
 
 }
 
+/* VFOP queue tear-down ('drop all' rx-mode, clear vlans, clear macs,
+ * queue destructor)
+ */
+static void bnx2x_vfop_qdown(struct bnx2x *bp, struct bnx2x_virtf *vf)
+{
+	struct bnx2x_vfop *vfop = bnx2x_vfop_cur(bp, vf);
+	int qid = vfop->args.qx.qid;
+	enum bnx2x_vfop_qteardown_state state = vfop->state;
+	struct bnx2x_vfop_cmd cmd;
+
+	if (vfop->rc < 0)
+		goto op_err;
+
+	DP(BNX2X_MSG_IOV, "vf[%d] STATE: %d\n", vf->abs_vfid, state);
+
+	cmd.done = bnx2x_vfop_qdown;
+	cmd.block = false;
+
+	switch (state) {
+	case BNX2X_VFOP_QTEARDOWN_RXMODE:
+		/* Drop all */
+		vfop->state = BNX2X_VFOP_QTEARDOWN_CLR_VLAN;
+		vfop->rc = bnx2x_vfop_rxmode_cmd(bp, vf, &cmd, qid, 0);
+		if (vfop->rc)
+			goto op_err;
+		return;
+
+	case BNX2X_VFOP_QTEARDOWN_CLR_VLAN:
+		/* vlan-clear-all: don't consume credit */
+		vfop->state = BNX2X_VFOP_QTEARDOWN_CLR_MAC;
+		vfop->rc = bnx2x_vfop_vlan_delall_cmd(bp, vf, &cmd, qid, false);
+		if (vfop->rc)
+			goto op_err;
+		return;
+
+	case BNX2X_VFOP_QTEARDOWN_CLR_MAC:
+		/* mac-clear-all: consume credit */
+		vfop->state = BNX2X_VFOP_QTEARDOWN_QDTOR;
+		vfop->rc = bnx2x_vfop_mac_delall_cmd(bp, vf, &cmd, qid, false);
+		if (vfop->rc)
+			goto op_err;
+		return;
+
+	case BNX2X_VFOP_QTEARDOWN_QDTOR:
+		/* run the queue destruction flow */
+		DP(BNX2X_MSG_IOV, "case: BNX2X_VFOP_QTEARDOWN_QDTOR\n");
+		vfop->state = BNX2X_VFOP_QTEARDOWN_DONE;
+		DP(BNX2X_MSG_IOV, "new state: BNX2X_VFOP_QTEARDOWN_DONE\n");
+		vfop->rc = bnx2x_vfop_qdtor_cmd(bp, vf, &cmd, qid);
+		DP(BNX2X_MSG_IOV, "returned from cmd");
+		if (vfop->rc)
+			goto op_err;
+		return;
+op_err:
+	BNX2X_ERR("QTEARDOWN[%d:%d] error: rc %d\n",
+		  vf->abs_vfid, qid, vfop->rc);
+
+	case BNX2X_VFOP_QTEARDOWN_DONE:
+		bnx2x_vfop_end(bp, vf, vfop);
+		return;
+	default:
+		bnx2x_vfop_default(state);
+	}
+}
+
+int bnx2x_vfop_qdown_cmd(struct bnx2x *bp,
+			 struct bnx2x_virtf *vf,
+			 struct bnx2x_vfop_cmd *cmd,
+			 int qid)
+{
+	struct bnx2x_vfop *vfop = bnx2x_vfop_add(bp, vf);
+
+	if (vfop) {
+		vfop->args.qx.qid = qid;
+		bnx2x_vfop_opset(BNX2X_VFOP_QTEARDOWN_RXMODE,
+				 bnx2x_vfop_qdown, cmd->done);
+		return bnx2x_vfop_transition(bp, vf, bnx2x_vfop_qdown,
+					     cmd->block);
+	}
+	return -ENOMEM;
+}
 /* VF enable primitives
  *
  * when pretend is required the caller is responsible
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
index a18cef6..4a79741 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
@@ -645,6 +645,11 @@ int bnx2x_vfop_qsetup_cmd(struct bnx2x *bp,
 			  struct bnx2x_vfop_cmd *cmd,
 			  int qid);
 
+int bnx2x_vfop_qdown_cmd(struct bnx2x *bp,
+			 struct bnx2x_virtf *vf,
+			 struct bnx2x_vfop_cmd *cmd,
+			 int qid);
+
 int bnx2x_vfop_mcast_cmd(struct bnx2x *bp,
 			 struct bnx2x_virtf *vf,
 			 struct bnx2x_vfop_cmd *cmd,
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
index 8dddb02..832aa31 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
@@ -794,6 +794,23 @@ static void bnx2x_vf_mbx_set_q_filters(struct bnx2x *bp,
 response:
 	bnx2x_vf_mbx_resp(bp, vf);
 }
+static void bnx2x_vf_mbx_teardown_q(struct bnx2x *bp, struct bnx2x_virtf *vf,
+				    struct bnx2x_vf_mbx *mbx)
+{
+	int qid = mbx->msg->req.q_op.vf_qid;
+	struct bnx2x_vfop_cmd cmd = {
+		.done = bnx2x_vf_mbx_resp,
+		.block = false,
+	};
+
+	DP(BNX2X_MSG_IOV, "VF[%d] Q_TEARDOWN: vf_qid=%d\n",
+	   vf->abs_vfid, qid);
+
+	vf->op_rc = bnx2x_vfop_qdown_cmd(bp, vf, &cmd, qid);
+	if (vf->op_rc)
+		bnx2x_vf_mbx_resp(bp, vf);
+}
+
 /* dispatch request */
 static void bnx2x_vf_mbx_request(struct bnx2x *bp, struct bnx2x_virtf *vf,
 				  struct bnx2x_vf_mbx *mbx)
@@ -822,6 +839,9 @@ static void bnx2x_vf_mbx_request(struct bnx2x *bp, struct bnx2x_virtf *vf,
 		case CHANNEL_TLV_SET_Q_FILTERS:
 			bnx2x_vf_mbx_set_q_filters(bp, vf, mbx);
 			break;
+		case CHANNEL_TLV_TEARDOWN_Q:
+			bnx2x_vf_mbx_teardown_q(bp, vf, mbx);
+			break;
 		}
 
 	/* unknown TLV - this may belong to a VF driver from the future - a
-- 
1.7.9.GIT

^ permalink raw reply related

* [PATCH v2 net-next 20/22] bnx2x: Support VF FLR
From: Ariel Elior @ 2012-11-15 16:47 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Ariel Elior, Eilon Greenstein
In-Reply-To: <1352998067-9707-1-git-send-email-ariele@broadcom.com>

The FLR indication arrives as an attention from the management processor.
Upon VF flr all FLRed function in the indication have already been
released by Firmware and now we basically need to free the resources
allocated to those VFs, and clean any remainders from the device
(FLR final cleanup).

Signed-off-by: Ariel Elior <ariele@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x.h       |    6 +
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c  |   19 +-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h   |    1 +
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c |  295 ++++++++++++++++++++-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h |    7 +
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.h  |    1 +
 6 files changed, 321 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
index 88dbf02..dfae9b0 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
@@ -1880,7 +1880,13 @@ void bnx2x_prep_dmae_with_comp(struct bnx2x *bp, struct dmae_command *dmae,
 int bnx2x_issue_dmae_with_comp(struct bnx2x *bp, struct dmae_command *dmae);
 void bnx2x_dp_dmae(struct bnx2x *bp, struct dmae_command *dmae, int msglvl);
 
+/* FLR related routines */
+u32 bnx2x_flr_clnup_poll_count(struct bnx2x *bp);
+void bnx2x_tx_hw_flushed(struct bnx2x *bp, u32 poll_count);
+int bnx2x_send_final_clnup(struct bnx2x *bp, u8 clnup_func, u32 poll_cnt);
 u8 bnx2x_is_pcie_pending(struct pci_dev *dev);
+int bnx2x_flr_clnup_poll_hw_counter(struct bnx2x *bp, u32 reg,
+				    char *msg, u32 poll_cnt);
 
 void bnx2x_calc_fc_adv(struct bnx2x *bp);
 int bnx2x_sp_post(struct bnx2x *bp, int command, int cid,
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 4fa3e74..42e572e 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -1096,8 +1096,8 @@ static u32 bnx2x_flr_clnup_reg_poll(struct bnx2x *bp, u32 reg,
 	return val;
 }
 
-static int bnx2x_flr_clnup_poll_hw_counter(struct bnx2x *bp, u32 reg,
-					   char *msg, u32 poll_cnt)
+int bnx2x_flr_clnup_poll_hw_counter(struct bnx2x *bp, u32 reg,
+				    char *msg, u32 poll_cnt)
 {
 	u32 val = bnx2x_flr_clnup_reg_poll(bp, reg, 0, poll_cnt);
 	if (val != 0) {
@@ -1107,7 +1107,8 @@ static int bnx2x_flr_clnup_poll_hw_counter(struct bnx2x *bp, u32 reg,
 	return 0;
 }
 
-static u32 bnx2x_flr_clnup_poll_count(struct bnx2x *bp)
+/* Common routines with VF FLR cleanup */
+u32 bnx2x_flr_clnup_poll_count(struct bnx2x *bp)
 {
 	/* adjust polling timeout */
 	if (CHIP_REV_IS_EMUL(bp))
@@ -1119,7 +1120,7 @@ static u32 bnx2x_flr_clnup_poll_count(struct bnx2x *bp)
 	return FLR_POLL_CNT;
 }
 
-static void bnx2x_tx_hw_flushed(struct bnx2x *bp, u32 poll_count)
+void bnx2x_tx_hw_flushed(struct bnx2x *bp, u32 poll_count)
 {
 	struct pbf_pN_cmd_regs cmd_regs[] = {
 		{0, (CHIP_IS_E3B0(bp)) ?
@@ -1194,8 +1195,7 @@ static void bnx2x_tx_hw_flushed(struct bnx2x *bp, u32 poll_count)
 	(((index) << SDM_OP_GEN_AGG_VECT_IDX_SHIFT) & SDM_OP_GEN_AGG_VECT_IDX)
 
 
-static int bnx2x_send_final_clnup(struct bnx2x *bp, u8 clnup_func,
-					 u32 poll_cnt)
+int bnx2x_send_final_clnup(struct bnx2x *bp, u8 clnup_func, u32 poll_cnt)
 {
 	struct sdm_op_gen op_gen = {0};
 
@@ -1220,7 +1220,8 @@ static int bnx2x_send_final_clnup(struct bnx2x *bp, u8 clnup_func,
 		BNX2X_ERR("FW final cleanup did not succeed\n");
 		DP(BNX2X_MSG_SP, "At timeout completion address contained %x\n",
 		   (REG_RD(bp, comp_addr)));
-		ret = 1;
+		bnx2x_panic();
+		return 1;
 	}
 	/* Zero completion for nxt FLR */
 	REG_WR(bp, comp_addr, 0);
@@ -3884,6 +3885,10 @@ static void bnx2x_attn_int_deasserted3(struct bnx2x *bp, u32 attn)
 
 			if (val & DRV_STATUS_DRV_INFO_REQ)
 				bnx2x_handle_drv_info_req(bp);
+
+			if (val & DRV_STATUS_VF_DISABLED)
+				bnx2x_vf_handle_flr_event(bp);
+
 			if ((bp->port.pmf == 0) && (val & DRV_STATUS_PMF))
 				bnx2x_pmf_update(bp);
 
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h
index 3997f63..823d1b6 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h
@@ -876,6 +876,7 @@
 #define HC_CONFIG_0_REG_MSI_MSIX_INT_EN_0			 (0x1<<2)
 #define HC_CONFIG_0_REG_SINGLE_ISR_EN_0				 (0x1<<1)
 #define HC_CONFIG_1_REG_BLOCK_DISABLE_1				 (0x1<<0)
+#define DORQ_REG_VF_USAGE_CNT					 0x170320
 #define HC_REG_AGG_INT_0					 0x108050
 #define HC_REG_AGG_INT_1					 0x108054
 #define HC_REG_ATTN_BIT 					 0x108120
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
index 3baa26d..104978a 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
@@ -136,6 +136,17 @@ enum bnx2x_vfop_mcast_state {
 	   BNX2X_VFOP_MCAST_ADD,
 	   BNX2X_VFOP_MCAST_CHK_DONE
 };
+enum bnx2x_vfop_qflr_state {
+	   BNX2X_VFOP_QFLR_CLR_VLAN,
+	   BNX2X_VFOP_QFLR_CLR_MAC,
+	   BNX2X_VFOP_QFLR_TERMINATE,
+	   BNX2X_VFOP_QFLR_DONE
+};
+
+enum bnx2x_vfop_flr_state {
+	   BNX2X_VFOP_FLR_QUEUES,
+	   BNX2X_VFOP_FLR_HW
+};
 
 enum bnx2x_vfop_close_state {
 	   BNX2X_VFOP_CLOSE_QUEUES,
@@ -973,6 +984,93 @@ int bnx2x_vfop_qsetup_cmd(struct bnx2x *bp,
 	return -ENOMEM;
 }
 
+/* VFOP queue FLR handling (clear vlans, clear macs, queue destructor) */
+static void bnx2x_vfop_qflr(struct bnx2x *bp, struct bnx2x_virtf *vf)
+{
+	struct bnx2x_vfop *vfop = bnx2x_vfop_cur(bp, vf);
+	int qid = vfop->args.qx.qid;
+	enum bnx2x_vfop_qflr_state state = vfop->state;
+	struct bnx2x_queue_state_params *qstate;
+	struct bnx2x_vfop_cmd cmd;
+
+	bnx2x_vfop_reset_wq(vf);
+
+	if (vfop->rc < 0)
+		goto op_err;
+
+	DP(BNX2X_MSG_IOV, "VF[%d] STATE: %d\n", vf->abs_vfid, state);
+
+	cmd.done = bnx2x_vfop_qflr;
+	cmd.block = false;
+
+	switch (state) {
+	case BNX2X_VFOP_QFLR_CLR_VLAN:
+		/* vlan-clear-all: driver-only, don't consume credit */
+		vfop->state = BNX2X_VFOP_QFLR_CLR_MAC;
+		vfop->rc = bnx2x_vfop_vlan_delall_cmd(bp, vf, &cmd, qid, true);
+		if (vfop->rc)
+			goto op_err;
+		return;
+
+	case BNX2X_VFOP_QFLR_CLR_MAC:
+		/* mac-clear-all: driver only consume credit */
+		vfop->state = BNX2X_VFOP_QFLR_TERMINATE;
+		vfop->rc = bnx2x_vfop_mac_delall_cmd(bp, vf, &cmd, qid, true);
+		DP(BNX2X_MSG_IOV,
+		   "VF[%d] vfop->rc after bnx2x_vfop_mac_delall_cmd was %d",
+		   vf->abs_vfid, vfop->rc);
+		if (vfop->rc)
+			goto op_err;
+		return;
+
+	case BNX2X_VFOP_QFLR_TERMINATE:
+		qstate = &vfop->op_p->qctor.qstate;
+		memset(qstate , 0, sizeof(*qstate));
+		qstate->q_obj = &bnx2x_vfq(vf, qid, sp_obj);
+		vfop->state = BNX2X_VFOP_QFLR_DONE;
+
+		DP(BNX2X_MSG_IOV, "VF[%d] qstate during flr was %d",
+		   vf->abs_vfid, qstate->q_obj->state);
+
+		if (qstate->q_obj->state != BNX2X_Q_STATE_RESET) {
+			qstate->q_obj->state = BNX2X_Q_STATE_STOPPED;
+			qstate->cmd = BNX2X_Q_CMD_TERMINATE;
+			vfop->rc = bnx2x_queue_state_change(bp, qstate);
+			bnx2x_vfop_finalize(vf, vfop->rc, VFOP_VERIFY_PEND);
+		} else {
+			goto op_done;
+		}
+
+op_err:
+	BNX2X_ERR("QFLR[%d:%d] error: rc %d\n",
+		  vf->abs_vfid, qid, vfop->rc);
+op_done:
+	case BNX2X_VFOP_QFLR_DONE:
+		bnx2x_vfop_end(bp, vf, vfop);
+		return;
+	default:
+		bnx2x_vfop_default(state);
+	}
+op_pending:
+	return;
+}
+
+static int bnx2x_vfop_qflr_cmd(struct bnx2x *bp,
+			       struct bnx2x_virtf *vf,
+			       struct bnx2x_vfop_cmd *cmd,
+			       int qid)
+{
+	struct bnx2x_vfop *vfop = bnx2x_vfop_add(bp, vf);
+
+	if (vfop) {
+		vfop->args.qx.qid = qid;
+		bnx2x_vfop_opset(BNX2X_VFOP_QFLR_CLR_VLAN,
+				 bnx2x_vfop_qflr, cmd->done);
+		return bnx2x_vfop_transition(bp, vf, bnx2x_vfop_qflr,
+					     cmd->block);
+	}
+	return -ENOMEM;
+}
 
 /* VFOP multi-casts */
 static void bnx2x_vfop_mcast(struct bnx2x *bp, struct bnx2x_virtf *vf)
@@ -1431,8 +1529,203 @@ static void bnx2x_vf_free_resc(struct bnx2x *bp, struct bnx2x_virtf *vf)
 	vf->state = VF_FREE;
 }
 
-/* called by bnx2x_init_hw_func, returns the next ilt line */
+static void bnx2x_vf_flr_clnup_hw(struct bnx2x *bp, struct bnx2x_virtf *vf)
+{
+	u32 poll_cnt = bnx2x_flr_clnup_poll_count(bp);
+
+	/* DQ usage counter */
+	bnx2x_pretend_func(bp, HW_VF_HANDLE(bp, vf->abs_vfid));
+	bnx2x_flr_clnup_poll_hw_counter(bp, DORQ_REG_VF_USAGE_CNT,
+					"DQ VF usage counter timed out",
+					poll_cnt);
+	bnx2x_pretend_func(bp, BP_ABS_FUNC(bp));
+
+	/* FW cleanup command - poll for the results */
+	if (bnx2x_send_final_clnup(bp, (u8)FW_VF_HANDLE(vf->abs_vfid),
+				   poll_cnt))
+		BNX2X_ERR("VF[%d] Final cleanup timed-out\n", vf->abs_vfid);
+
+	/* ATC cleanup */
+
+	/* verify TX hw is flushed */
+	bnx2x_tx_hw_flushed(bp, poll_cnt);
+
+}
+
+static void bnx2x_vfop_flr(struct bnx2x *bp, struct bnx2x_virtf *vf)
+{
+	struct bnx2x_vfop *vfop = bnx2x_vfop_cur(bp, vf);
+	struct bnx2x_vfop_args_qx *qx = &vfop->args.qx;
+	enum bnx2x_vfop_flr_state state = vfop->state;
+	struct bnx2x_vfop_cmd cmd = {
+		.done = bnx2x_vfop_flr,
+		.block = false,
+	};
+
+	if (vfop->rc < 0)
+		goto op_err;
+
+	DP(BNX2X_MSG_IOV, "vf[%d] STATE: %d\n", vf->abs_vfid, state);
 
+	switch (state) {
+	case BNX2X_VFOP_FLR_QUEUES:
+		/* the cleanup operations are valid if and only if the VF
+		 * was first acquired.
+		 */
+		if (++(qx->qid) < vf_rxq_count(vf)) {
+			vfop->rc = bnx2x_vfop_qflr_cmd(bp, vf, &cmd,
+						       qx->qid);
+			if (vfop->rc)
+				goto op_err;
+			return;
+		}
+		/* remove multicasts */
+		vfop->state = BNX2X_VFOP_FLR_HW;
+		vfop->rc = bnx2x_vfop_mcast_cmd(bp, vf, &cmd, NULL,
+						0, true);
+		if (vfop->rc)
+			goto op_err;
+		return;
+	case BNX2X_VFOP_FLR_HW:
+
+		/* dispatch final cleanup and wait for HW queues to flush */
+		bnx2x_vf_flr_clnup_hw(bp, vf);
+
+		/* release VF resources */
+		bnx2x_vf_free_resc(bp, vf);
+
+		/* re-open the mailbox */
+		bnx2x_vf_enable_mbx(bp, vf->abs_vfid);
+
+		goto op_done;
+	default:
+		bnx2x_vfop_default(state);
+	}
+op_err:
+	BNX2X_ERR("VF[%d] FLR error: rc %d\n", vf->abs_vfid, vfop->rc);
+op_done:
+	vf->flr_clnup_stage = VF_FLR_ACK;
+	bnx2x_vfop_end(bp, vf, vfop);
+	bnx2x_unlock_vf_pf_channel(bp, vf, CHANNEL_TLV_FLR);
+}
+
+static int bnx2x_vfop_flr_cmd(struct bnx2x *bp,
+			      struct bnx2x_virtf *vf,
+			      vfop_handler_t done)
+{
+	struct bnx2x_vfop *vfop = bnx2x_vfop_add(bp, vf);
+	if (vfop) {
+		vfop->args.qx.qid = -1; /* loop */
+		bnx2x_vfop_opset(BNX2X_VFOP_FLR_QUEUES,
+				 bnx2x_vfop_flr, done);
+		return bnx2x_vfop_transition(bp, vf, bnx2x_vfop_flr, false);
+	}
+	return -ENOMEM;
+}
+
+static void bnx2x_vf_flr_clnup(struct bnx2x *bp, struct bnx2x_virtf *prev_vf)
+{
+	int i = prev_vf ? prev_vf->index + 1 : 0;
+	struct bnx2x_virtf *vf;
+
+	/* find next VF to cleanup */
+next_vf_to_clean:
+	for (;
+	     i < BNX2X_NR_VIRTFN(bp) &&
+	     (bnx2x_vf(bp, i, state) != VF_RESET ||
+	      bnx2x_vf(bp, i, flr_clnup_stage) != VF_FLR_CLN);
+	     i++)
+		;
+
+	DP(BNX2X_MSG_IOV, "next vf to cleanup: %d. num of vfs: %d\n", i,
+	   BNX2X_NR_VIRTFN(bp));
+
+	if (i < BNX2X_NR_VIRTFN(bp)) {
+		vf = BP_VF(bp, i);
+
+		/* lock the vf pf channel */
+		bnx2x_lock_vf_pf_channel(bp, vf, CHANNEL_TLV_FLR);
+
+		/* invoke the VF FLR SM */
+		if (bnx2x_vfop_flr_cmd(bp, vf, bnx2x_vf_flr_clnup)) {
+			BNX2X_ERR("VF[%d]: FLR cleanup failed -ENOMEM\n",
+				  vf->abs_vfid);
+
+			/* mark the VF to be ACKED and continue */
+			vf->flr_clnup_stage = VF_FLR_ACK;
+			goto next_vf_to_clean;
+		}
+		return;
+	}
+
+	/* we are done, update vf records */
+	for_each_vf(bp, i) {
+		vf = BP_VF(bp, i);
+
+		if (vf->flr_clnup_stage != VF_FLR_ACK)
+			continue;
+
+		vf->flr_clnup_stage = VF_FLR_EPILOG;
+	}
+
+	/* Acknowledge the handled VFs.
+	 * we are acknowledge all the vfs which an flr was requested for, even
+	 * if amongst them there are such that we never opened, since the mcp
+	 * will interrupt us immediately again if we only ack some of the bits,
+	 * resulting in an endless loop. This can happen for example in KVM
+	 * where an 'all ones' flr request is sometimes given by hyper visor
+	 */
+	DP(BNX2X_MSG_MCP, "DRV_STATUS_VF_DISABLED ACK for vfs 0x%x 0x%x\n",
+	   bp->vfdb->flrd_vfs[0], bp->vfdb->flrd_vfs[1]);
+	for (i = 0; i < FLRD_VFS_DWORDS; i++)
+		SHMEM2_WR(bp, drv_ack_vf_disabled[BP_FW_MB_IDX(bp)][i],
+			  bp->vfdb->flrd_vfs[i]);
+
+	bnx2x_fw_command(bp, DRV_MSG_CODE_VF_DISABLED_DONE, 0);
+
+	/* clear the acked bits - better yet if the MCP implemented
+	 * write to clear semantics
+	 */
+	for (i = 0; i < FLRD_VFS_DWORDS; i++)
+		SHMEM2_WR(bp, drv_ack_vf_disabled[BP_FW_MB_IDX(bp)][i], 0);
+}
+
+void bnx2x_vf_handle_flr_event(struct bnx2x *bp)
+{
+	int i;
+
+	/* Read FLR'd VFs */
+	for (i = 0; i < FLRD_VFS_DWORDS; i++)
+		bp->vfdb->flrd_vfs[i] = SHMEM2_RD(bp, mcp_vf_disabled[i]);
+
+	DP(BNX2X_MSG_MCP,
+	   "DRV_STATUS_VF_DISABLED received for vfs 0x%x 0x%x\n",
+	   bp->vfdb->flrd_vfs[0], bp->vfdb->flrd_vfs[1]);
+
+	for_each_vf(bp, i) {
+		struct bnx2x_virtf *vf = BP_VF(bp, i);
+		u32 reset = 0;
+
+		if (vf->abs_vfid < 32)
+			reset = bp->vfdb->flrd_vfs[0] & (1 << vf->abs_vfid);
+		else
+			reset = bp->vfdb->flrd_vfs[1] &
+				(1 << (vf->abs_vfid - 32));
+
+		if (reset) {
+			/* set as reset and ready for cleanup */
+			vf->state = VF_RESET;
+			vf->flr_clnup_stage = VF_FLR_CLN;
+
+			DP(BNX2X_MSG_IOV,
+			   "Initiating Final cleanup for VF %d\n",
+			   vf->abs_vfid);
+		}
+	}
+
+	/* do the FLR cleanup for all marked VFs*/
+	bnx2x_vf_flr_clnup(bp, NULL);
+}
 /* IOV global initialization routines  */
 void bnx2x_iov_init_dq(struct bnx2x *bp)
 {
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
index 3a750af..f3c60ef 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
@@ -690,9 +690,16 @@ int bnx2x_vfop_release_cmd(struct bnx2x *bp,
 void bnx2x_vf_release(struct bnx2x *bp, struct bnx2x_virtf *vf, bool block);
 int bnx2x_vf_idx_by_abs_fid(struct bnx2x *bp, u16 abs_vfid);
 u8 bnx2x_vf_max_queue_cnt(struct bnx2x *bp, struct bnx2x_virtf *vf);
+
+/* FLR routines */
+
 /* VF FLR helpers */
 int bnx2x_vf_flr_clnup_epilog(struct bnx2x *bp, u8 abs_vfid);
 void bnx2x_vf_enable_access(struct bnx2x *bp, u8 abs_vfid);
+
+/* Handles an FLR (or VF_DISABLE) notification form the MCP */
+void bnx2x_vf_handle_flr_event(struct bnx2x *bp);
+
 void bnx2x_add_tlv(struct bnx2x *bp, void *tlvs_list, u16 offset, u16 type,
 		   u16 length);
 void bnx2x_vfpf_prep(struct bnx2x *bp, struct vfpf_first_tlv *first_tlv,
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.h
index 6e06d00..299bbcc 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.h
@@ -306,6 +306,7 @@ enum channel_tlvs {
 	   CHANNEL_TLV_RELEASE,
 	   CHANNEL_TLV_PF_RELEASE_VF,
 	   CHANNEL_TLV_LIST_END,
+	   CHANNEL_TLV_FLR,
 	   CHANNEL_TLV_MAX
 };
 
-- 
1.7.9.GIT

^ permalink raw reply related

* [PATCH v2 net-next 19/22] bnx2x: Support of PF driver of a VF release request
From: Ariel Elior @ 2012-11-15 16:47 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Ariel Elior, Eilon Greenstein
In-Reply-To: <1352998067-9707-1-git-send-email-ariele@broadcom.com>

The 'release' request is the opposite of the 'acquire' request.
At release, all the resources allocated to the VF are reclaimed.
The release flow applies the close flow if applicable.
Note that there are actually two types of release:
1. The VF has been removed, and so issued a 'release' request
over the VF <-> PF Channel.
2. The PF is going down and so has to release all of it's VFs.

Signed-off-by: Ariel Elior <ariele@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c  |    1 +
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c |  123 +++++++++++++++++++++
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h |   23 ++++
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c  |   25 ++++-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.h  |    1 +
 5 files changed, 172 insertions(+), 1 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 1f82d8b..4fa3e74 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -8611,6 +8611,7 @@ void bnx2x_chip_cleanup(struct bnx2x *bp, int unload_mode, bool keep_link)
 
 	netif_addr_unlock_bh(bp->dev);
 
+	bnx2x_iov_chip_cleanup(bp);
 
 
 	/*
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
index 888e3ec..3baa26d 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
@@ -1423,6 +1423,14 @@ bnx2x_iov_static_resc(struct bnx2x *bp, struct vf_pf_resc_request *resc)
 	/* num_sbs already set */
 }
 
+/* FLR routines: */
+static void bnx2x_vf_free_resc(struct bnx2x *bp, struct bnx2x_virtf *vf)
+{
+	/* reset the state variables */
+	bnx2x_iov_static_resc(bp, &vf->alloc_resc);
+	vf->state = VF_FREE;
+}
+
 /* called by bnx2x_init_hw_func, returns the next ilt line */
 
 /* IOV global initialization routines  */
@@ -1946,6 +1954,22 @@ int bnx2x_iov_nic_init(struct bnx2x *bp)
 	return 0;
 }
 
+/* called by bnx2x_chip_cleanup */
+int bnx2x_iov_chip_cleanup(struct bnx2x *bp)
+{
+	int i;
+
+	if (!IS_SRIOV(bp))
+		return 0;
+
+	/* release all the VFs */
+	for_each_vf(bp, i)
+		bnx2x_vf_release(bp, BP_VF(bp, i), true); /* blocking */
+
+	return 0;
+}
+
+/* called by bnx2x_init_hw_func, returns the next ilt line */
 int bnx2x_iov_init_ilt(struct bnx2x *bp, u16 line)
 {
 	int i;
@@ -2565,6 +2589,105 @@ int bnx2x_vfop_close_cmd(struct bnx2x *bp,
 	}
 	return -ENOMEM;
 }
+
+/* VF release can be called either: 1. the VF was acquired but
+ * not enabled 2. the vf was enabled or in the process of being
+ * enabled
+ */
+static void bnx2x_vfop_release(struct bnx2x *bp, struct bnx2x_virtf *vf)
+{
+	struct bnx2x_vfop *vfop = bnx2x_vfop_cur(bp, vf);
+	struct bnx2x_vfop_cmd cmd = {
+		.done = bnx2x_vfop_release,
+		.block = false,
+	};
+
+	DP(BNX2X_MSG_IOV, "vfop->rc %d\n", vfop->rc);
+
+	if (vfop->rc < 0)
+		goto op_err;
+
+	DP(BNX2X_MSG_IOV, "VF[%d] STATE: %s\n", vf->abs_vfid,
+	   vf->state == VF_FREE ? "Free" :
+	   vf->state == VF_ACQUIRED ? "Acquired" :
+	   vf->state == VF_ENABLED ? "Enabled" :
+	   vf->state == VF_RESET ? "Reset" :
+	   "Unknown");
+
+	switch (vf->state) {
+	case VF_ENABLED:
+		vfop->rc = bnx2x_vfop_close_cmd(bp, vf, &cmd);
+		if (vfop->rc)
+			goto op_err;
+		return;
+
+	case VF_ACQUIRED:
+		DP(BNX2X_MSG_IOV, "about to free resources\n");
+		bnx2x_vf_free_resc(bp, vf);
+		DP(BNX2X_MSG_IOV, "vfop->rc %d\n", vfop->rc);
+		goto op_done;
+
+	case VF_FREE:
+	case VF_RESET:
+		/* do nothing */
+		goto op_done;
+	default:
+		bnx2x_vfop_default(vf->state);
+	}
+op_err:
+	BNX2X_ERR("VF[%d] RELEASE error: rc %d\n", vf->abs_vfid, vfop->rc);
+op_done:
+	bnx2x_vfop_end(bp, vf, vfop);
+}
+
+int bnx2x_vfop_release_cmd(struct bnx2x *bp,
+			   struct bnx2x_virtf *vf,
+			   struct bnx2x_vfop_cmd *cmd)
+{
+	struct bnx2x_vfop *vfop = bnx2x_vfop_add(bp, vf);
+	if (vfop) {
+		bnx2x_vfop_opset(-1, /* use vf->state */
+				 bnx2x_vfop_release, cmd->done);
+		return bnx2x_vfop_transition(bp, vf, bnx2x_vfop_release,
+					     cmd->block);
+	}
+	return -ENOMEM;
+}
+
+/* VF release ~ VF close + VF release-resources
+ * Release is the ultimate SW shutdown and is called whenever an
+ * irrecoverable error is encountered.
+ */
+void bnx2x_vf_release(struct bnx2x *bp, struct bnx2x_virtf *vf, bool block)
+{
+	struct bnx2x_vfop_cmd cmd = {
+		.done = NULL,
+		.block = block,
+	};
+	int rc;
+	bnx2x_lock_vf_pf_channel(bp, vf, CHANNEL_TLV_PF_RELEASE_VF);
+	rc = bnx2x_vfop_release_cmd(bp, vf, &cmd);
+	if (rc)
+		WARN(rc,
+		     "VF[%d] Failed to allocate resources for release op- rc=%d\n",
+		     vf->abs_vfid, rc);
+}
+
+static inline void bnx2x_vf_get_sbdf(struct bnx2x *bp,
+			      struct bnx2x_virtf *vf, u32 *sbdf)
+{
+	*sbdf = vf->devfn | (vf->bus << 8);
+}
+
+static inline void bnx2x_vf_get_bars(struct bnx2x *bp, struct bnx2x_virtf *vf,
+		       struct bnx2x_vf_bar_info *bar_info)
+{
+	int n;
+	bar_info->nr_bars = bp->vfdb->sriov.nres;
+	for (n = 0; n < bar_info->nr_bars; n++)
+		bar_info->bars[n] = vf->bars[n];
+}
+
 void bnx2x_lock_vf_pf_channel(struct bnx2x *bp, struct bnx2x_virtf *vf,
 			      enum channel_tlvs tlv)
 {
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
index 9fadabc..3a750af 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
@@ -49,6 +49,12 @@ struct bnx2x_vf_bar {
 	u64 bar;
 	u32 size;
 };
+
+struct bnx2x_vf_bar_info {
+	struct bnx2x_vf_bar bars[PCI_SRIOV_NUM_BARS];
+	u8 nr_bars;
+};
+
 /* vf queue (used both for rx or tx) */
 struct bnx2x_vf_queue {
 	struct eth_context		*cxt;
@@ -430,6 +436,7 @@ void __devexit bnx2x_iov_remove_one(struct bnx2x *bp);
 void bnx2x_iov_free_mem(struct bnx2x *bp);
 int bnx2x_iov_alloc_mem(struct bnx2x *bp);
 int bnx2x_iov_nic_init(struct bnx2x *bp);
+int bnx2x_iov_chip_cleanup(struct bnx2x *bp);
 void bnx2x_iov_init_dq(struct bnx2x *bp);
 void bnx2x_iov_init_dmae(struct bnx2x *bp);
 void bnx2x_iov_set_queue_sp_obj(struct bnx2x *bp, int vf_cid,
@@ -547,6 +554,11 @@ static inline void bnx2x_vfop_end(struct bnx2x *bp, struct bnx2x_virtf *vf,
 	if (vfop->done) {
 		DP(BNX2X_MSG_IOV, "calling done handler\n");
 		vfop->done(bp, vf);
+	} else {
+		/* there is no done handler for the operation to unlock
+		 * the mutex. Must have gotten here from PF initiated VF RELEASE
+		 */
+		bnx2x_unlock_vf_pf_channel(bp, vf, CHANNEL_TLV_PF_RELEASE_VF);
 	}
 
 	DP(BNX2X_MSG_IOV, "done handler complete. vf->op_rc %d, vfop->rc %d\n",
@@ -665,6 +677,17 @@ int bnx2x_vfop_close_cmd(struct bnx2x *bp,
 			 struct bnx2x_virtf *vf,
 			 struct bnx2x_vfop_cmd *cmd);
 
+int bnx2x_vfop_release_cmd(struct bnx2x *bp,
+			   struct bnx2x_virtf *vf,
+			   struct bnx2x_vfop_cmd *cmd);
+
+/**
+ * VF release ~ VF close + VF release-resources
+ *
+ * Release is the ultimate SW shutdown and is called whenever an
+ * irrecoverable error is encountered.
+ */
+void bnx2x_vf_release(struct bnx2x *bp, struct bnx2x_virtf *vf, bool block);
 int bnx2x_vf_idx_by_abs_fid(struct bnx2x *bp, u16 abs_vfid);
 u8 bnx2x_vf_max_queue_cnt(struct bnx2x *bp, struct bnx2x_virtf *vf);
 /* VF FLR helpers */
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
index 96a1de3..0955ba4 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
@@ -233,7 +233,7 @@ static void bnx2x_vf_mbx_resp(struct bnx2x *bp, struct bnx2x_virtf *vf)
 		if (rc) {
 			BNX2X_ERR("Failed to copy response body to VF %d\n",
 				  vf->abs_vfid);
-			return;
+			goto mbx_error;
 		}
 		vf_addr -= sizeof(u64);
 		pf_addr -= sizeof(u64);
@@ -260,8 +260,12 @@ static void bnx2x_vf_mbx_resp(struct bnx2x *bp, struct bnx2x_virtf *vf)
 	if (rc) {
 		BNX2X_ERR("Failed to copy response status to VF %d\n",
 			  vf->abs_vfid);
+		goto mbx_error;
 	}
 	return;
+
+mbx_error:
+	bnx2x_vf_release(bp, vf, false); /* non blocking */
 }
 
 static void bnx2x_vf_mbx_acquire_resp(struct bnx2x *bp, struct bnx2x_virtf *vf,
@@ -835,6 +839,21 @@ static void bnx2x_vf_mbx_close_vf(struct bnx2x *bp, struct bnx2x_virtf *vf,
 		bnx2x_vf_mbx_close_done(bp, vf);
 }
 
+static void bnx2x_vf_mbx_release_vf(struct bnx2x *bp, struct bnx2x_virtf *vf,
+				    struct bnx2x_vf_mbx *mbx)
+{
+	struct bnx2x_vfop_cmd cmd = {
+		.done = bnx2x_vf_mbx_resp,
+		.block = false,
+	};
+
+	DP(BNX2X_MSG_IOV, "VF[%d] VF_RELEASE\n", vf->abs_vfid);
+
+	vf->op_rc = bnx2x_vfop_release_cmd(bp, vf, &cmd);
+	if (vf->op_rc)
+		bnx2x_vf_mbx_resp(bp, vf);
+}
+
 /* dispatch request */
 static void bnx2x_vf_mbx_request(struct bnx2x *bp, struct bnx2x_virtf *vf,
 				  struct bnx2x_vf_mbx *mbx)
@@ -869,6 +888,9 @@ static void bnx2x_vf_mbx_request(struct bnx2x *bp, struct bnx2x_virtf *vf,
 		case CHANNEL_TLV_CLOSE:
 			bnx2x_vf_mbx_close_vf(bp, vf, mbx);
 			break;
+		case CHANNEL_TLV_RELEASE:
+			bnx2x_vf_mbx_release_vf(bp, vf, mbx);
+			break;
 		}
 
 	/* unknown TLV - this may belong to a VF driver from the future - a
@@ -965,6 +987,7 @@ void bnx2x_vf_mbx(struct bnx2x *bp, struct vf_pf_event_data *vfpf_event)
 	goto mbx_done;
 
 mbx_error:
+	bnx2x_vf_release(bp, vf, false); /* non blocking */
 mbx_done:
 	return;
 }
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.h
index 95f5c2b..6e06d00 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.h
@@ -304,6 +304,7 @@ enum channel_tlvs {
 	   CHANNEL_TLV_TEARDOWN_Q,
 	   CHANNEL_TLV_CLOSE,
 	   CHANNEL_TLV_RELEASE,
+	   CHANNEL_TLV_PF_RELEASE_VF,
 	   CHANNEL_TLV_LIST_END,
 	   CHANNEL_TLV_MAX
 };
-- 
1.7.9.GIT

^ permalink raw reply related

* [PATCH v2 net-next 16/22] bnx2x: Support of PF driver of a VF q_filters request
From: Ariel Elior @ 2012-11-15 16:47 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Ariel Elior, Eilon Greenstein
In-Reply-To: <1352998067-9707-1-git-send-email-ariele@broadcom.com>

The VF driver uses the 'q_filters' message on the VF <-> PF channel
for configuring an open queue, for example when the rxmode changes.

Signed-off-by: Ariel Elior <ariele@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c |  294 ++++++++++++++++++++-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h |   32 +++
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c  |  278 +++++++++++++++++++
 3 files changed, 598 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
index 22e01c6..82a9654 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
@@ -100,6 +100,8 @@ static void bnx2x_vf_igu_ack_sb(struct bnx2x *bp, struct bnx2x_virtf *vf,
 }
 /* VFOP - VF slow-path operation support */
 
+#define BNX2X_VFOP_FILTER_ADD_CNT_MAX		0x10000
+
 /* VFOP operations states */
 enum bnx2x_vfop_qctor_state {
 	   BNX2X_VFOP_QCTOR_INIT,
@@ -122,6 +124,17 @@ enum bnx2x_vfop_qsetup_state {
 	   BNX2X_VFOP_QSETUP_DONE
 };
 
+enum bnx2x_vfop_mcast_state {
+	   BNX2X_VFOP_MCAST_DEL,
+	   BNX2X_VFOP_MCAST_ADD,
+	   BNX2X_VFOP_MCAST_CHK_DONE
+};
+
+enum bnx2x_vfop_rxmode_state {
+	   BNX2X_VFOP_RXMODE_CONFIG,
+	   BNX2X_VFOP_RXMODE_DONE
+};
+
 #define bnx2x_vfop_reset_wq(vf)	atomic_set(&vf->op_in_progress, 0)
 
 void bnx2x_vfop_qctor_dump_tx(struct bnx2x *bp, struct bnx2x_virtf *vf,
@@ -571,6 +584,56 @@ bnx2x_vfop_vlan_mac_prep_ramrod(struct bnx2x_vlan_mac_ramrod_params *ramrod,
 	ureq->cmd = flags->add ? BNX2X_VLAN_MAC_ADD : BNX2X_VLAN_MAC_DEL;
 }
 
+static inline void
+bnx2x_vfop_mac_prep_ramrod(struct bnx2x_vlan_mac_ramrod_params *ramrod,
+			   struct bnx2x_vfop_vlan_mac_flags *flags)
+{
+	bnx2x_vfop_vlan_mac_prep_ramrod(ramrod, flags);
+	set_bit(BNX2X_ETH_MAC, &ramrod->user_req.vlan_mac_flags);
+}
+
+int bnx2x_vfop_mac_list_cmd(struct bnx2x *bp,
+			    struct bnx2x_virtf *vf,
+			    struct bnx2x_vfop_cmd *cmd,
+			    struct bnx2x_vfop_filters *macs,
+			    int qid, bool drv_only)
+{
+	struct bnx2x_vfop *vfop = bnx2x_vfop_add(bp, vf);
+
+	if (vfop) {
+		struct bnx2x_vfop_args_filters filters = {
+			.multi_filter = macs,
+			.credit = NULL,		/* consume credit */
+		};
+		struct bnx2x_vfop_vlan_mac_flags flags = {
+			.drv_only = drv_only,
+			.dont_consume = (filters.credit != NULL),
+			.single_cmd = false,
+			.add = false, /* don't care since only the items in the
+				       * filters list affect the sp operation,
+				       * not the list itself
+				       */
+		};
+		struct bnx2x_vlan_mac_ramrod_params *ramrod =
+			&vf->op_params.vlan_mac;
+
+		/* set ramrod params */
+		bnx2x_vfop_mac_prep_ramrod(ramrod, &flags);
+
+		/* set object */
+		ramrod->vlan_mac_obj = &bnx2x_vfq(vf, qid, mac_obj);
+
+		/* set extra args */
+		filters.multi_filter->add_cnt = BNX2X_VFOP_FILTER_ADD_CNT_MAX;
+		vfop->args.filters = filters;
+
+		bnx2x_vfop_opset(BNX2X_VFOP_MAC_CONFIG_LIST,
+				 bnx2x_vfop_vlan_mac, cmd->done);
+		return bnx2x_vfop_transition(bp, vf, bnx2x_vfop_vlan_mac,
+					     cmd->block);
+	}
+	return -ENOMEM;
+}
 
 int bnx2x_vfop_vlan_set_cmd(struct bnx2x *bp,
 			    struct bnx2x_virtf *vf,
@@ -611,6 +674,48 @@ int bnx2x_vfop_vlan_set_cmd(struct bnx2x *bp,
 	return -ENOMEM;
 }
 
+int bnx2x_vfop_vlan_list_cmd(struct bnx2x *bp,
+			     struct bnx2x_virtf *vf,
+			     struct bnx2x_vfop_cmd *cmd,
+			     struct bnx2x_vfop_filters *vlans,
+			     int qid, bool drv_only)
+{
+	struct bnx2x_vfop *vfop = bnx2x_vfop_add(bp, vf);
+
+	if (vfop) {
+		struct bnx2x_vfop_args_filters filters = {
+			.multi_filter = vlans,
+			.credit = &bnx2x_vfq(vf, qid, vlan_count),
+		};
+		struct bnx2x_vfop_vlan_mac_flags flags = {
+			.drv_only = drv_only,
+			.dont_consume = (filters.credit != NULL),
+			.single_cmd = false,
+			.add = false, /* don't care */
+		};
+		struct bnx2x_vlan_mac_ramrod_params *ramrod =
+			&vf->op_params.vlan_mac;
+
+		/* set ramrod params */
+		bnx2x_vfop_vlan_mac_prep_ramrod(ramrod, &flags);
+
+		/* set object */
+		ramrod->vlan_mac_obj = &bnx2x_vfq(vf, qid, vlan_obj);
+
+		/* set extra args */
+		filters.multi_filter->add_cnt = vf_vlan_rules_cnt(vf) -
+			atomic_read(filters.credit);
+
+		vfop->args.filters = filters;
+
+		bnx2x_vfop_opset(BNX2X_VFOP_VLAN_CONFIG_LIST,
+				 bnx2x_vfop_vlan_mac, cmd->done);
+		return bnx2x_vfop_transition(bp, vf, bnx2x_vfop_vlan_mac,
+					     cmd->block);
+	}
+	return -ENOMEM;
+}
+
 /* VFOP queue setup (queue constructor + set vlan 0) */
 static void bnx2x_vfop_qsetup(struct bnx2x *bp, struct bnx2x_virtf *vf)
 {
@@ -677,16 +782,180 @@ int bnx2x_vfop_qsetup_cmd(struct bnx2x *bp,
 }
 
 
-static u8 bnx2x_iov_get_max_queue_count(struct bnx2x *bp)
+/* VFOP multi-casts */
+static void bnx2x_vfop_mcast(struct bnx2x *bp, struct bnx2x_virtf *vf)
 {
+	struct bnx2x_vfop *vfop = bnx2x_vfop_cur(bp, vf);
+	struct bnx2x_mcast_ramrod_params *mcast = &vfop->op_p->mcast;
+	struct bnx2x_raw_obj *raw = &mcast->mcast_obj->raw;
+	struct bnx2x_vfop_args_mcast *args = &vfop->args.mc_list;
+	enum bnx2x_vfop_mcast_state state = vfop->state;
 	int i;
-	u8 queue_count = 0;
 
-	if (IS_SRIOV(bp))
-		for_each_vf(bp, i)
-			queue_count += bnx2x_vf(bp, i, alloc_resc.num_sbs);
+	bnx2x_vfop_reset_wq(vf);
+
+	if (vfop->rc < 0)
+		goto op_err;
+
+	DP(BNX2X_MSG_IOV, "vf[%d] STATE: %d\n", vf->abs_vfid, state);
+
+	switch (state) {
+	case BNX2X_VFOP_MCAST_DEL:
+		/* clear existing mcasts */
+		vfop->state = BNX2X_VFOP_MCAST_ADD;
+		vfop->rc = bnx2x_config_mcast(bp, mcast, BNX2X_MCAST_CMD_DEL);
+		bnx2x_vfop_finalize(vf, vfop->rc, VFOP_CONT);
+
+	case BNX2X_VFOP_MCAST_ADD:
+		if (raw->check_pending(raw))
+			goto op_pending;
+
+		if (args->mc_num) {
+			/* update mcast list on the ramrod params */
+			INIT_LIST_HEAD(&mcast->mcast_list);
+			for (i = 0; i < args->mc_num; i++)
+				list_add_tail(&(args->mc[i].link),
+					      &mcast->mcast_list);
+			/* add new mcasts */
+			vfop->state = BNX2X_VFOP_MCAST_CHK_DONE;
+			vfop->rc = bnx2x_config_mcast(bp, mcast,
+						      BNX2X_MCAST_CMD_ADD);
+		}
+		bnx2x_vfop_finalize(vf, vfop->rc, VFOP_DONE);
+
+	case BNX2X_VFOP_MCAST_CHK_DONE:
+		vfop->rc = raw->check_pending(raw) ? 1 : 0;
+		bnx2x_vfop_finalize(vf, vfop->rc, VFOP_DONE);
+	default:
+		bnx2x_vfop_default(state);
+	}
+op_err:
+	BNX2X_ERR("MCAST CONFIG error: rc %d\n", vfop->rc);
+op_done:
+	kfree(args->mc);
+	bnx2x_vfop_end(bp, vf, vfop);
+op_pending:
+	return;
+}
+
+int bnx2x_vfop_mcast_cmd(struct bnx2x *bp,
+			 struct bnx2x_virtf *vf,
+			 struct bnx2x_vfop_cmd *cmd,
+			 bnx2x_mac_addr_t *mcasts,
+			 int mcast_num, bool drv_only)
+{
+	struct bnx2x_vfop *vfop = NULL;
+	size_t mc_sz = mcast_num * sizeof(struct bnx2x_mcast_list_elem);
+	struct bnx2x_mcast_list_elem *mc = mc_sz ? kzalloc(mc_sz, GFP_KERNEL) :
+		NULL;
+
+	if (!mc_sz || mc) {
+		vfop = bnx2x_vfop_add(bp, vf);
+		if (vfop) {
+			int i;
+			struct bnx2x_mcast_ramrod_params *ramrod =
+				&vf->op_params.mcast;
+
+			/* set ramrod params */
+			memset(ramrod, 0, sizeof(*ramrod));
+			ramrod->mcast_obj = &vf->mcast_obj;
+			if (drv_only)
+				set_bit(RAMROD_DRV_CLR_ONLY,
+					&ramrod->ramrod_flags);
+
+			/* copy mcasts pointers */
+			vfop->args.mc_list.mc_num = mcast_num;
+			vfop->args.mc_list.mc = mc;
+			for (i = 0; i < mcast_num; i++)
+				mc[i].mac = mcasts[i];
+
+			bnx2x_vfop_opset(BNX2X_VFOP_MCAST_DEL,
+					 bnx2x_vfop_mcast, cmd->done);
+			return bnx2x_vfop_transition(bp, vf, bnx2x_vfop_mcast,
+						     cmd->block);
+		} else {
+			kfree(mc);
+		}
+	}
+	return -ENOMEM;
+}
+
+/* VFOP rx-mode */
+static void bnx2x_vfop_rxmode(struct bnx2x *bp, struct bnx2x_virtf *vf)
+{
+	struct bnx2x_vfop *vfop = bnx2x_vfop_cur(bp, vf);
+	struct bnx2x_rx_mode_ramrod_params *ramrod = &vfop->op_p->rx_mode;
+	enum bnx2x_vfop_rxmode_state state = vfop->state;
+
+	bnx2x_vfop_reset_wq(vf);
+
+	if (vfop->rc < 0)
+		goto op_err;
+
+	DP(BNX2X_MSG_IOV, "vf[%d] STATE: %d\n", vf->abs_vfid, state);
+
+	switch (state) {
+	case BNX2X_VFOP_RXMODE_CONFIG:
+		/* next state */
+		vfop->state = BNX2X_VFOP_RXMODE_DONE;
+
+		vfop->rc = bnx2x_config_rx_mode(bp, ramrod);
+		bnx2x_vfop_finalize(vf, vfop->rc, VFOP_DONE);
+op_err:
+	BNX2X_ERR("RXMODE error: rc %d\n", vfop->rc);
+op_done:
+	case BNX2X_VFOP_RXMODE_DONE:
+		bnx2x_vfop_end(bp, vf, vfop);
+		return;
+	default:
+		bnx2x_vfop_default(state);
+	}
+op_pending:
+	return;
+}
+
+int bnx2x_vfop_rxmode_cmd(struct bnx2x *bp,
+			  struct bnx2x_virtf *vf,
+			  struct bnx2x_vfop_cmd *cmd,
+			  int qid, unsigned long accept_flags)
+{
+
+	struct bnx2x_vf_queue *vfq = vfq_get(vf, qid);
+	struct bnx2x_vfop *vfop = bnx2x_vfop_add(bp, vf);
+
+	if (vfop) {
+		struct bnx2x_rx_mode_ramrod_params *ramrod =
+			&vf->op_params.rx_mode;
+
+		memset(ramrod, 0, sizeof(*ramrod));
+
+		/* Prepare ramrod parameters */
+		ramrod->cid = vfq->cid;
+		ramrod->cl_id = vfq_cl_id(vf, vfq);
+		ramrod->rx_mode_obj = &bp->rx_mode_obj;
+		ramrod->func_id = FW_VF_HANDLE(vf->abs_vfid);
+
+		ramrod->rx_accept_flags = accept_flags;
+		ramrod->tx_accept_flags = accept_flags;
+		ramrod->pstate = &vf->filter_state;
+		ramrod->state = BNX2X_FILTER_RX_MODE_PENDING;
+
+		set_bit(BNX2X_FILTER_RX_MODE_PENDING, &vf->filter_state);
+		set_bit(RAMROD_RX, &ramrod->ramrod_flags);
+		set_bit(RAMROD_TX, &ramrod->ramrod_flags);
+
+		ramrod->rdata =
+			bnx2x_vf_sp(bp, vf, rx_mode_rdata.e2);
+		ramrod->rdata_mapping =
+			bnx2x_vf_sp_map(bp, vf, rx_mode_rdata.e2);
+
+		bnx2x_vfop_opset(BNX2X_VFOP_RXMODE_CONFIG,
+				 bnx2x_vfop_rxmode, cmd->done);
+		return bnx2x_vfop_transition(bp, vf, bnx2x_vfop_rxmode,
+					     cmd->block);
+	}
+	return -ENOMEM;
 
-	return queue_count;
 }
 
 /* VF enable primitives
@@ -1055,6 +1324,19 @@ static int bnx2x_sriov_info(struct bnx2x *bp, struct bnx2x_sriov *iov)
 
 	return 0;
 }
+
+static u8 bnx2x_iov_get_max_queue_count(struct bnx2x *bp)
+{
+	int i;
+	u8 queue_count = 0;
+
+	if (IS_SRIOV(bp))
+		for_each_vf(bp, i)
+			queue_count += bnx2x_vf(bp, i, alloc_resc.num_sbs);
+
+	return queue_count;
+}
+
 /* must be called after PF bars are mapped */
 int __devinit bnx2x_iov_init_one(struct bnx2x *bp, int int_mode_param,
 				 int num_vfs_param)
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
index 059715c..a18cef6 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
@@ -442,6 +442,10 @@ void bnx2x_iov_sp_task(struct bnx2x *bp);
 /* global vf mailbox routines */
 void bnx2x_vf_mbx(struct bnx2x *bp, struct vf_pf_event_data *vfpf_event);
 void bnx2x_vf_enable_mbx(struct bnx2x *bp, u8 abs_vfid);
+
+/* CORE VF API */
+typedef u8 bnx2x_mac_addr_t[ETH_ALEN];
+
 /* acquire */
 int bnx2x_vf_acquire(struct bnx2x *bp, struct bnx2x_virtf *vf,
 			  struct vf_pf_resc_request *resc);
@@ -619,11 +623,39 @@ void bnx2x_vfop_qctor_prep(struct bnx2x *bp,
 			   struct bnx2x_vf_queue *q,
 			   struct bnx2x_vfop_qctor_params *p,
 			   unsigned long q_type);
+int bnx2x_vfop_mac_list_cmd(struct bnx2x *bp,
+			    struct bnx2x_virtf *vf,
+			    struct bnx2x_vfop_cmd *cmd,
+			    struct bnx2x_vfop_filters *macs,
+			    int qid, bool drv_only);
+
+int bnx2x_vfop_vlan_set_cmd(struct bnx2x *bp,
+			    struct bnx2x_virtf *vf,
+			    struct bnx2x_vfop_cmd *cmd,
+			    int qid, u16 vid, bool add);
+
+int bnx2x_vfop_vlan_list_cmd(struct bnx2x *bp,
+			     struct bnx2x_virtf *vf,
+			     struct bnx2x_vfop_cmd *cmd,
+			     struct bnx2x_vfop_filters *vlans,
+			     int qid, bool drv_only);
+
 int bnx2x_vfop_qsetup_cmd(struct bnx2x *bp,
 			  struct bnx2x_virtf *vf,
 			  struct bnx2x_vfop_cmd *cmd,
 			  int qid);
 
+int bnx2x_vfop_mcast_cmd(struct bnx2x *bp,
+			 struct bnx2x_virtf *vf,
+			 struct bnx2x_vfop_cmd *cmd,
+			 bnx2x_mac_addr_t *mcasts,
+			 int mcast_num, bool drv_only);
+
+int bnx2x_vfop_rxmode_cmd(struct bnx2x *bp,
+			  struct bnx2x_virtf *vf,
+			  struct bnx2x_vfop_cmd *cmd,
+			  int qid, unsigned long accept_flags);
+
 int bnx2x_vf_idx_by_abs_fid(struct bnx2x *bp, u16 abs_vfid);
 u8 bnx2x_vf_max_queue_cnt(struct bnx2x *bp, struct bnx2x_virtf *vf);
 /* VF FLR helpers */
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
index c198d16..8dddb02 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
@@ -519,6 +519,281 @@ static void bnx2x_vf_mbx_setup_q(struct bnx2x *bp, struct bnx2x_virtf *vf,
 response:
 	bnx2x_vf_mbx_resp(bp, vf);
 }
+
+enum bnx2x_vfop_filters_state {
+	   BNX2X_VFOP_MBX_Q_FILTERS_MACS,
+	   BNX2X_VFOP_MBX_Q_FILTERS_VLANS,
+	   BNX2X_VFOP_MBX_Q_FILTERS_RXMODE,
+	   BNX2X_VFOP_MBX_Q_FILTERS_MCAST,
+	   BNX2X_VFOP_MBX_Q_FILTERS_DONE
+};
+
+static int bnx2x_vf_mbx_macvlan_list(struct bnx2x *bp,
+				     struct bnx2x_virtf *vf,
+				     struct vfpf_set_q_filters_tlv *tlv,
+				     struct bnx2x_vfop_filters **pfl,
+				     u32 type_flag)
+{
+	int i, j;
+	struct bnx2x_vfop_filters *fl = NULL;
+	size_t fsz;
+
+	fsz = tlv->n_mac_vlan_filters * sizeof(struct bnx2x_vfop_filter) +
+		sizeof(struct bnx2x_vfop_filters);
+
+	fl = kzalloc(fsz, GFP_KERNEL);
+	if (!fl)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&fl->head);
+
+	for (i = 0, j = 0; i < tlv->n_mac_vlan_filters; i++) {
+		struct vfpf_q_mac_vlan_filter *msg_filter = &tlv->filters[i];
+
+		if ((msg_filter->flags & type_flag) != type_flag)
+			continue;
+		if (type_flag == VFPF_Q_FILTER_DEST_MAC_VALID) {
+			fl->filters[j].mac = msg_filter->mac;
+			fl->filters[j].type = BNX2X_VFOP_FILTER_MAC;
+		} else {
+			fl->filters[j].vid = msg_filter->vlan_tag;
+			fl->filters[j].type = BNX2X_VFOP_FILTER_VLAN;
+		}
+		fl->filters[j].add =
+			(msg_filter->flags & VFPF_Q_FILTER_SET_MAC) ?
+			true : false;
+		list_add_tail(&fl->filters[j++].link, &fl->head);
+	}
+	if (list_empty(&fl->head))
+		kfree(fl);
+	else
+		*pfl = fl;
+
+	return 0;
+}
+
+static void bnx2x_vf_mbx_dp_q_filter(struct bnx2x *bp, int msglvl, int idx,
+				       struct vfpf_q_mac_vlan_filter *filter)
+{
+	DP(msglvl, "MAC-VLAN[%d] -- flags=0x%x", idx, filter->flags);
+	if (filter->flags & VFPF_Q_FILTER_VLAN_TAG_VALID)
+		DP_CONT(msglvl, ", vlan=%d", filter->vlan_tag);
+	if (filter->flags & VFPF_Q_FILTER_DEST_MAC_VALID)
+		DP_CONT(msglvl, ", MAC=%pM", filter->mac);
+	DP_CONT(msglvl, "\n");
+}
+
+static void bnx2x_vf_mbx_dp_q_filters(struct bnx2x *bp, int msglvl,
+				       struct vfpf_set_q_filters_tlv *filters)
+{
+	int i;
+
+	if (filters->flags & VFPF_SET_Q_FILTERS_MAC_VLAN_CHANGED)
+		for (i = 0; i < filters->n_mac_vlan_filters; i++)
+			bnx2x_vf_mbx_dp_q_filter(bp, msglvl, i,
+						 &filters->filters[i]);
+
+	if (filters->flags & VFPF_SET_Q_FILTERS_RX_MASK_CHANGED)
+		DP(msglvl, "RX-MASK=0x%x\n", filters->rx_mask);
+
+	if (filters->flags & VFPF_SET_Q_FILTERS_MULTICAST_CHANGED)
+		for (i = 0; i < filters->n_multicast; i++)
+			DP(msglvl, "MULTICAST=%pM\n", filters->multicast[i]);
+}
+
+#define VFPF_MAC_FILTER		VFPF_Q_FILTER_DEST_MAC_VALID
+#define VFPF_VLAN_FILTER	VFPF_Q_FILTER_VLAN_TAG_VALID
+
+static void bnx2x_vfop_mbx_qfilters(struct bnx2x *bp, struct bnx2x_virtf *vf)
+{
+	int rc;
+
+	struct vfpf_set_q_filters_tlv *msg =
+		&BP_VF_MBX(bp, vf->index)->msg->req.set_q_filters;
+
+	struct bnx2x_vfop *vfop = bnx2x_vfop_cur(bp, vf);
+	enum bnx2x_vfop_filters_state state = vfop->state;
+
+	struct bnx2x_vfop_cmd cmd = {
+		.done = bnx2x_vfop_mbx_qfilters,
+		.block = false,
+	};
+
+	DP(BNX2X_MSG_IOV, "STATE: %d\n", state);
+
+	if (vfop->rc < 0)
+		goto op_err;
+
+	switch (state) {
+	case BNX2X_VFOP_MBX_Q_FILTERS_MACS:
+		/* next state */
+		vfop->state = BNX2X_VFOP_MBX_Q_FILTERS_VLANS;
+
+		/* check for any vlan/mac changes */
+		if (msg->flags & VFPF_SET_Q_FILTERS_MAC_VLAN_CHANGED) {
+
+			/* build mac list */
+			struct bnx2x_vfop_filters *fl = NULL;
+			vfop->rc = bnx2x_vf_mbx_macvlan_list(bp, vf, msg, &fl,
+							     VFPF_MAC_FILTER);
+			if (vfop->rc)
+				goto op_err;
+
+			if (fl) {
+				/* set mac list */
+				rc = bnx2x_vfop_mac_list_cmd(bp, vf, &cmd, fl,
+							     msg->vf_qid,
+							     false);
+				if (rc) {
+					vfop->rc = rc;
+					goto op_err;
+				}
+				return;
+			}
+		}
+		/* fall through */
+
+	case BNX2X_VFOP_MBX_Q_FILTERS_VLANS:
+		/* next state */
+		vfop->state = BNX2X_VFOP_MBX_Q_FILTERS_RXMODE;
+
+		/* check for any vlan/mac changes */
+		if (msg->flags & VFPF_SET_Q_FILTERS_MAC_VLAN_CHANGED) {
+
+			/* build vlan list */
+			struct bnx2x_vfop_filters *fl = NULL;
+			vfop->rc = bnx2x_vf_mbx_macvlan_list(bp, vf, msg, &fl,
+							     VFPF_VLAN_FILTER);
+			if (vfop->rc)
+				goto op_err;
+
+			if (fl) {
+				/* set vlan list */
+				rc = bnx2x_vfop_vlan_list_cmd(bp, vf, &cmd, fl,
+							      msg->vf_qid,
+							      false);
+				if (rc) {
+					vfop->rc = rc;
+					goto op_err;
+				}
+				return;
+			}
+		}
+		/* fall through */
+
+	case BNX2X_VFOP_MBX_Q_FILTERS_RXMODE:
+		/* next state */
+		vfop->state = BNX2X_VFOP_MBX_Q_FILTERS_MCAST;
+
+		if (msg->flags & VFPF_SET_Q_FILTERS_RX_MASK_CHANGED) {
+			unsigned long accept = 0;
+
+			/* covert VF-PF if mask to bnx2x accept flags */
+			if (msg->rx_mask & VFPF_RX_MASK_ACCEPT_MATCHED_UNICAST)
+				__set_bit(BNX2X_ACCEPT_UNICAST, &accept);
+
+			if (msg->rx_mask &
+					VFPF_RX_MASK_ACCEPT_MATCHED_MULTICAST)
+				__set_bit(BNX2X_ACCEPT_MULTICAST, &accept);
+
+			if (msg->rx_mask & VFPF_RX_MASK_ACCEPT_ALL_UNICAST)
+				__set_bit(BNX2X_ACCEPT_ALL_UNICAST, &accept);
+
+			if (msg->rx_mask & VFPF_RX_MASK_ACCEPT_ALL_MULTICAST)
+				__set_bit(BNX2X_ACCEPT_ALL_MULTICAST, &accept);
+
+			if (msg->rx_mask & VFPF_RX_MASK_ACCEPT_BROADCAST)
+				__set_bit(BNX2X_ACCEPT_BROADCAST, &accept);
+
+			/* A packet arriving the vf's mac should be accepted
+			 * with any vlan
+			 */
+			__set_bit(BNX2X_ACCEPT_ANY_VLAN, &accept);
+
+			/* set rx-mode */
+			rc = bnx2x_vfop_rxmode_cmd(bp, vf, &cmd,
+						   msg->vf_qid, accept);
+			if (rc) {
+				vfop->rc = rc;
+				goto op_err;
+			}
+			return;
+		}
+		/* fall through */
+
+	case BNX2X_VFOP_MBX_Q_FILTERS_MCAST:
+		/* next state */
+		vfop->state = BNX2X_VFOP_MBX_Q_FILTERS_DONE;
+
+		if (msg->flags & VFPF_SET_Q_FILTERS_MULTICAST_CHANGED) {
+			/* set mcasts */
+			rc = bnx2x_vfop_mcast_cmd(bp, vf, &cmd, msg->multicast,
+						  msg->n_multicast, false);
+			if (rc) {
+				vfop->rc = rc;
+				goto op_err;
+			}
+			return;
+		}
+		/* fall through */
+op_done:
+	case BNX2X_VFOP_MBX_Q_FILTERS_DONE:
+		bnx2x_vfop_end(bp, vf, vfop);
+		return;
+op_err:
+	BNX2X_ERR("QFILTERS[%d:%d] error: rc %d\n",
+		  vf->abs_vfid, msg->vf_qid, vfop->rc);
+	goto op_done;
+
+	default:
+		bnx2x_vfop_default(state);
+	}
+}
+
+static int bnx2x_vfop_mbx_qfilters_cmd(struct bnx2x *bp,
+					struct bnx2x_virtf *vf,
+					struct bnx2x_vfop_cmd *cmd)
+{
+	struct bnx2x_vfop *vfop = bnx2x_vfop_add(bp, vf);
+	if (vfop) {
+		bnx2x_vfop_opset(BNX2X_VFOP_MBX_Q_FILTERS_MACS,
+				 bnx2x_vfop_mbx_qfilters, cmd->done);
+		return bnx2x_vfop_transition(bp, vf, bnx2x_vfop_mbx_qfilters,
+					     cmd->block);
+	}
+	return -ENOMEM;
+}
+
+static void bnx2x_vf_mbx_set_q_filters(struct bnx2x *bp,
+				       struct bnx2x_virtf *vf,
+				       struct bnx2x_vf_mbx *mbx)
+{
+	struct vfpf_set_q_filters_tlv *filters = &mbx->msg->req.set_q_filters;
+	struct bnx2x_vfop_cmd cmd = {
+		.done = bnx2x_vf_mbx_resp,
+		.block = false,
+	};
+
+
+	/* verify vf_qid */
+	if (filters->vf_qid > vf_rxq_count(vf))
+		goto response;
+
+	DP(BNX2X_MSG_IOV, "VF[%d] Q_FILTERS: queue[%d]\n",
+	   vf->abs_vfid,
+	   filters->vf_qid);
+
+	/* print q_filter message */
+	bnx2x_vf_mbx_dp_q_filters(bp, BNX2X_MSG_IOV, filters);
+
+	vf->op_rc = bnx2x_vfop_mbx_qfilters_cmd(bp, vf, &cmd);
+	if (vf->op_rc)
+		goto response;
+	return;
+
+response:
+	bnx2x_vf_mbx_resp(bp, vf);
+}
 /* dispatch request */
 static void bnx2x_vf_mbx_request(struct bnx2x *bp, struct bnx2x_virtf *vf,
 				  struct bnx2x_vf_mbx *mbx)
@@ -544,6 +819,9 @@ static void bnx2x_vf_mbx_request(struct bnx2x *bp, struct bnx2x_virtf *vf,
 		case CHANNEL_TLV_SETUP_Q:
 			bnx2x_vf_mbx_setup_q(bp, vf, mbx);
 			break;
+		case CHANNEL_TLV_SET_Q_FILTERS:
+			bnx2x_vf_mbx_set_q_filters(bp, vf, mbx);
+			break;
 		}
 
 	/* unknown TLV - this may belong to a VF driver from the future - a
-- 
1.7.9.GIT

^ permalink raw reply related

* [PATCH v2 net-next 22/22] bnx2x: Add VF device ids and enable feature
From: Ariel Elior @ 2012-11-15 16:47 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Ariel Elior, Eilon Greenstein
In-Reply-To: <1352998067-9707-1-git-send-email-ariele@broadcom.com>

Add the various VF device ids (of all supported hardware)
Add the calls to enable_sriov and disable_sriov to enable the
SR-IOV feature. This patch also advances the version and release
date of the bnx2x module.

Signed-off-by: Ariel Elior <ariele@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x.h       |   22 +++++-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c  |   86 +++++++++++++++++++--
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c |    4 +
 3 files changed, 102 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
index 91beeaf..df9979d 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
@@ -26,8 +26,8 @@
  * (you will need to reboot afterwards) */
 /* #define BNX2X_STOP_ON_ERROR */
 
-#define DRV_MODULE_VERSION      "1.78.00-0"
-#define DRV_MODULE_RELDATE      "2012/09/27"
+#define DRV_MODULE_VERSION      "1.78.01-0"
+#define DRV_MODULE_RELDATE      "2012/10/30"
 #define BNX2X_BC_VER            0x040200
 
 #if defined(CONFIG_DCB)
@@ -805,36 +805,46 @@ struct bnx2x_common {
 #define CHIP_NUM_57711E			0x1650
 #define CHIP_NUM_57712			0x1662
 #define CHIP_NUM_57712_MF		0x1663
+#define CHIP_NUM_57712_VF		0x166f
 #define CHIP_NUM_57713			0x1651
 #define CHIP_NUM_57713E			0x1652
 #define CHIP_NUM_57800			0x168a
 #define CHIP_NUM_57800_MF		0x16a5
+#define CHIP_NUM_57800_VF		0x16a9
 #define CHIP_NUM_57810			0x168e
 #define CHIP_NUM_57810_MF		0x16ae
+#define CHIP_NUM_57810_VF		0x16af
 #define CHIP_NUM_57811			0x163d
 #define CHIP_NUM_57811_MF		0x163e
+#define CHIP_NUM_57811_VF		0x163f
 #define CHIP_NUM_57840_OBSOLETE	0x168d
 #define CHIP_NUM_57840_MF_OBSOLETE	0x16ab
 #define CHIP_NUM_57840_4_10		0x16a1
 #define CHIP_NUM_57840_2_20		0x16a2
 #define CHIP_NUM_57840_MF		0x16a4
+#define CHIP_NUM_57840_VF		0x16ad
 #define CHIP_IS_E1(bp)			(CHIP_NUM(bp) == CHIP_NUM_57710)
 #define CHIP_IS_57711(bp)		(CHIP_NUM(bp) == CHIP_NUM_57711)
 #define CHIP_IS_57711E(bp)		(CHIP_NUM(bp) == CHIP_NUM_57711E)
 #define CHIP_IS_57712(bp)		(CHIP_NUM(bp) == CHIP_NUM_57712)
+#define CHIP_IS_57712_VF(bp)		(CHIP_NUM(bp) == CHIP_NUM_57712_VF)
 #define CHIP_IS_57712_MF(bp)		(CHIP_NUM(bp) == CHIP_NUM_57712_MF)
 #define CHIP_IS_57800(bp)		(CHIP_NUM(bp) == CHIP_NUM_57800)
 #define CHIP_IS_57800_MF(bp)		(CHIP_NUM(bp) == CHIP_NUM_57800_MF)
+#define CHIP_IS_57800_VF(bp)		(CHIP_NUM(bp) == CHIP_NUM_57800_VF)
 #define CHIP_IS_57810(bp)		(CHIP_NUM(bp) == CHIP_NUM_57810)
 #define CHIP_IS_57810_MF(bp)		(CHIP_NUM(bp) == CHIP_NUM_57810_MF)
+#define CHIP_IS_57810_VF(bp)		(CHIP_NUM(bp) == CHIP_NUM_57810_VF)
 #define CHIP_IS_57811(bp)		(CHIP_NUM(bp) == CHIP_NUM_57811)
 #define CHIP_IS_57811_MF(bp)		(CHIP_NUM(bp) == CHIP_NUM_57811_MF)
+#define CHIP_IS_57811_VF(bp)		(CHIP_NUM(bp) == CHIP_NUM_57811_VF)
 #define CHIP_IS_57840(bp)		\
 		((CHIP_NUM(bp) == CHIP_NUM_57840_4_10) || \
 		 (CHIP_NUM(bp) == CHIP_NUM_57840_2_20) || \
 		 (CHIP_NUM(bp) == CHIP_NUM_57840_OBSOLETE))
 #define CHIP_IS_57840_MF(bp)	((CHIP_NUM(bp) == CHIP_NUM_57840_MF) || \
 				 (CHIP_NUM(bp) == CHIP_NUM_57840_MF_OBSOLETE))
+#define CHIP_IS_57840_VF(bp)		(CHIP_NUM(bp) == CHIP_NUM_57840_VF)
 #define CHIP_IS_E1H(bp)			(CHIP_IS_57711(bp) || \
 					 CHIP_IS_57711E(bp))
 #define CHIP_IS_E2(bp)			(CHIP_IS_57712(bp) || \
@@ -843,10 +853,13 @@ struct bnx2x_common {
 					 CHIP_IS_57800_MF(bp) || \
 					 CHIP_IS_57810(bp) || \
 					 CHIP_IS_57810_MF(bp) || \
+					 CHIP_IS_57810_VF(bp) || \
 					 CHIP_IS_57811(bp) || \
 					 CHIP_IS_57811_MF(bp) || \
+					 CHIP_IS_57811_VF(bp) || \
 					 CHIP_IS_57840(bp) || \
-					 CHIP_IS_57840_MF(bp))
+					 CHIP_IS_57840_MF(bp) || \
+					 CHIP_IS_57840_VF(bp))
 #define CHIP_IS_E1x(bp)			(CHIP_IS_E1((bp)) || CHIP_IS_E1H((bp)))
 #define USES_WARPCORE(bp)		(CHIP_IS_E3(bp))
 #define IS_E1H_OFFSET			(!CHIP_IS_E1(bp))
@@ -1198,8 +1211,9 @@ struct bnx2x_fw_stats_data {
 enum {
 	BNX2X_SP_RTNL_SETUP_TC,
 	BNX2X_SP_RTNL_TX_TIMEOUT,
-	BNX2X_SP_RTNL_AFEX_F_UPDATE,
 	BNX2X_SP_RTNL_FAN_FAILURE,
+	BNX2X_SP_RTNL_AFEX_F_UPDATE,
+	BNX2X_SP_RTNL_ENABLE_SRIOV,
 	BNX2X_SP_RTNL_VFPF_MCAST,
 	BNX2X_SP_RTNL_VFPF_STORM_RX_MODE,
 };
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 5c077fc..37b9566 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -193,12 +193,18 @@ static struct {
 #ifndef PCI_DEVICE_ID_NX2_57712_MF
 #define PCI_DEVICE_ID_NX2_57712_MF	CHIP_NUM_57712_MF
 #endif
+#ifndef PCI_DEVICE_ID_NX2_57712_VF
+#define PCI_DEVICE_ID_NX2_57712_VF	CHIP_NUM_57712_VF
+#endif
 #ifndef PCI_DEVICE_ID_NX2_57800
 #define PCI_DEVICE_ID_NX2_57800		CHIP_NUM_57800
 #endif
 #ifndef PCI_DEVICE_ID_NX2_57800_MF
 #define PCI_DEVICE_ID_NX2_57800_MF	CHIP_NUM_57800_MF
 #endif
+#ifndef PCI_DEVICE_ID_NX2_57800_VF
+#define PCI_DEVICE_ID_NX2_57800_VF	CHIP_NUM_57800_VF
+#endif
 #ifndef PCI_DEVICE_ID_NX2_57810
 #define PCI_DEVICE_ID_NX2_57810		CHIP_NUM_57810
 #endif
@@ -208,6 +214,9 @@ static struct {
 #ifndef PCI_DEVICE_ID_NX2_57840_O
 #define PCI_DEVICE_ID_NX2_57840_O	CHIP_NUM_57840_OBSOLETE
 #endif
+#ifndef PCI_DEVICE_ID_NX2_57810_VF
+#define PCI_DEVICE_ID_NX2_57810_VF	CHIP_NUM_57810_VF
+#endif
 #ifndef PCI_DEVICE_ID_NX2_57840_4_10
 #define PCI_DEVICE_ID_NX2_57840_4_10	CHIP_NUM_57840_4_10
 #endif
@@ -220,29 +229,42 @@ static struct {
 #ifndef PCI_DEVICE_ID_NX2_57840_MF
 #define PCI_DEVICE_ID_NX2_57840_MF	CHIP_NUM_57840_MF
 #endif
+#ifndef PCI_DEVICE_ID_NX2_57840_VF
+#define PCI_DEVICE_ID_NX2_57840_VF	CHIP_NUM_57840_VF
+#endif
 #ifndef PCI_DEVICE_ID_NX2_57811
 #define PCI_DEVICE_ID_NX2_57811		CHIP_NUM_57811
 #endif
 #ifndef PCI_DEVICE_ID_NX2_57811_MF
 #define PCI_DEVICE_ID_NX2_57811_MF	CHIP_NUM_57811_MF
 #endif
+#ifndef PCI_DEVICE_ID_NX2_57811_VF
+#define PCI_DEVICE_ID_NX2_57811_VF	CHIP_NUM_57811_VF
+#endif
+
+
 static DEFINE_PCI_DEVICE_TABLE(bnx2x_pci_tbl) = {
 	{ PCI_VDEVICE(BROADCOM, PCI_DEVICE_ID_NX2_57710), BCM57710 },
 	{ PCI_VDEVICE(BROADCOM, PCI_DEVICE_ID_NX2_57711), BCM57711 },
 	{ PCI_VDEVICE(BROADCOM, PCI_DEVICE_ID_NX2_57711E), BCM57711E },
 	{ PCI_VDEVICE(BROADCOM, PCI_DEVICE_ID_NX2_57712), BCM57712 },
 	{ PCI_VDEVICE(BROADCOM, PCI_DEVICE_ID_NX2_57712_MF), BCM57712_MF },
+	{ PCI_VDEVICE(BROADCOM, PCI_DEVICE_ID_NX2_57712_VF), BCM57712_VF },
 	{ PCI_VDEVICE(BROADCOM, PCI_DEVICE_ID_NX2_57800), BCM57800 },
 	{ PCI_VDEVICE(BROADCOM, PCI_DEVICE_ID_NX2_57800_MF), BCM57800_MF },
+	{ PCI_VDEVICE(BROADCOM, PCI_DEVICE_ID_NX2_57800_VF), BCM57800_VF },
 	{ PCI_VDEVICE(BROADCOM, PCI_DEVICE_ID_NX2_57810), BCM57810 },
 	{ PCI_VDEVICE(BROADCOM, PCI_DEVICE_ID_NX2_57810_MF), BCM57810_MF },
 	{ PCI_VDEVICE(BROADCOM, PCI_DEVICE_ID_NX2_57840_O), BCM57840_O },
 	{ PCI_VDEVICE(BROADCOM, PCI_DEVICE_ID_NX2_57840_4_10), BCM57840_4_10 },
 	{ PCI_VDEVICE(BROADCOM, PCI_DEVICE_ID_NX2_57840_2_20), BCM57840_2_20 },
+	{ PCI_VDEVICE(BROADCOM, PCI_DEVICE_ID_NX2_57810_VF), BCM57810_VF },
 	{ PCI_VDEVICE(BROADCOM, PCI_DEVICE_ID_NX2_57840_MFO), BCM57840_MFO },
 	{ PCI_VDEVICE(BROADCOM, PCI_DEVICE_ID_NX2_57840_MF), BCM57840_MF },
+	{ PCI_VDEVICE(BROADCOM, PCI_DEVICE_ID_NX2_57840_VF), BCM57840_VF },
 	{ PCI_VDEVICE(BROADCOM, PCI_DEVICE_ID_NX2_57811), BCM57811 },
 	{ PCI_VDEVICE(BROADCOM, PCI_DEVICE_ID_NX2_57811_MF), BCM57811_MF },
+	{ PCI_VDEVICE(BROADCOM, PCI_DEVICE_ID_NX2_57811_VF), BCM57811_VF },
 	{ 0 }
 };
 
@@ -9407,8 +9429,10 @@ static void bnx2x_sp_rtnl_task(struct work_struct *work)
 
 	rtnl_lock();
 
-	if (!netif_running(bp->dev))
-		goto sp_rtnl_exit;
+	if (!netif_running(bp->dev)) {
+		rtnl_unlock();
+		return;
+	}
 
 	/* if stop on error is defined no recovery flows should be executed */
 #ifdef BNX2X_STOP_ON_ERROR
@@ -9427,7 +9451,8 @@ static void bnx2x_sp_rtnl_task(struct work_struct *work)
 
 		bnx2x_parity_recover(bp);
 
-		goto sp_rtnl_exit;
+		rtnl_unlock();
+		return;
 	}
 
 	if (test_and_clear_bit(BNX2X_SP_RTNL_TX_TIMEOUT, &bp->sp_rtnl_state)) {
@@ -9441,7 +9466,8 @@ static void bnx2x_sp_rtnl_task(struct work_struct *work)
 		bnx2x_nic_unload(bp, UNLOAD_NORMAL, true);
 		bnx2x_nic_load(bp, LOAD_NORMAL);
 
-		goto sp_rtnl_exit;
+		rtnl_unlock();
+		return;
 	}
 #ifdef BNX2X_STOP_ON_ERROR
 sp_rtnl_not_reset:
@@ -9459,6 +9485,8 @@ sp_rtnl_not_reset:
 		DP(NETIF_MSG_HW, "fan failure detected. Unloading driver\n");
 		netif_device_detach(bp->dev);
 		bnx2x_close(bp->dev);
+		rtnl_unlock();
+		return;
 	}
 
 	if (test_and_clear_bit(BNX2X_SP_RTNL_VFPF_MCAST, &bp->sp_rtnl_state)) {
@@ -9474,8 +9502,28 @@ sp_rtnl_not_reset:
 		bnx2x_vfpf_storm_rx_mode(bp);
 	}
 
-sp_rtnl_exit:
+	/* work which needs rtnl lock not-taken (as it takes the lock itself and
+	 * can be called from other contexts as well)
+	 */
+
 	rtnl_unlock();
+
+	if (IS_SRIOV(bp) && test_and_clear_bit(BNX2X_SP_RTNL_ENABLE_SRIOV,
+					       &bp->sp_rtnl_state)) {
+		int rc = 0;
+
+		/* disbale sriov in case it is still enabled */
+		pci_disable_sriov(bp->pdev);
+		DP(BNX2X_MSG_IOV, "sriov disabled");
+
+		/* enable sriov */
+		DP(BNX2X_MSG_IOV, "vf num (%d)\n", (bp->vfdb->sriov.nr_virtfn));
+		rc = pci_enable_sriov(bp->pdev, (bp->vfdb->sriov.nr_virtfn));
+		if (rc)
+			BNX2X_ERR("pci_enable_sriov failed with %d\n", rc);
+		else
+			DP(BNX2X_MSG_IOV, "sriov enabled");
+	}
 }
 
 /* end of nic load/unload */
@@ -11274,6 +11322,26 @@ static int __devinit bnx2x_init_bp(struct bnx2x *bp)
  * net_device service functions
  */
 
+static int bnx2x_open_epilog(struct bnx2x *bp)
+{
+	/* Enable sriov via delayed work. This must be done via delayed work
+	 * because it causes the probe of the vf devices to be run, which invoke
+	 * register_netdevice which must have rtnl lock taken. As we are holding
+	 * the lock right now, that could only work if the probe would not take
+	 * the lock. However, as the probe of the vf may be called from other
+	 * contexts as well (such as passthrough to vm failes) it can't assume
+	 * the lock is being held for it. Using delayed work here allows the
+	 * probe code to simply take the lock (i.e. wait for it to be released
+	 * if it is being held).
+	 */
+	smp_mb__before_clear_bit();
+	set_bit(BNX2X_SP_RTNL_ENABLE_SRIOV, &bp->sp_rtnl_state);
+	smp_mb__after_clear_bit();
+	schedule_delayed_work(&bp->sp_rtnl_task, 0);
+
+	return 0;
+}
+
 /* called with rtnl_lock */
 static int bnx2x_open(struct net_device *dev)
 {
@@ -11281,6 +11349,7 @@ static int bnx2x_open(struct net_device *dev)
 	bool global = false;
 	int other_engine = BP_PATH(bp) ? 0 : 1;
 	bool other_load_status, load_status;
+	int rc;
 
 	bp->stats_init = true;
 
@@ -11338,7 +11407,12 @@ static int bnx2x_open(struct net_device *dev)
 		} while (0);
 
 	bp->recovery_state = BNX2X_RECOVERY_DONE;
-	return bnx2x_nic_load(bp, LOAD_OPEN);
+
+	rc = bnx2x_nic_load(bp, LOAD_OPEN);
+	if (rc)
+		return rc;
+
+	return bnx2x_open_epilog(bp);
 }
 
 /* called with rtnl_lock */
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
index 41a56eb..c0728fc 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
@@ -2029,6 +2029,10 @@ void __devexit bnx2x_iov_remove_one(struct bnx2x *bp)
 	if (!IS_SRIOV(bp))
 		return;
 
+	DP(BNX2X_MSG_IOV, "about to call disable sriov");
+	pci_disable_sriov(bp->pdev);
+	DP(BNX2X_MSG_IOV, "sriov disabled");
+
 	/* free vf database */
 	__bnx2x_iov_free_vfdb(bp);
 }
-- 
1.7.9.GIT

^ permalink raw reply related

* [PATCH v2 net-next 15/22] bnx2x: Support of PF driver of a VF setup_q request
From: Ariel Elior @ 2012-11-15 16:47 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Ariel Elior, Eilon Greenstein
In-Reply-To: <1352998067-9707-1-git-send-email-ariele@broadcom.com>

Upon receiving a 'setup_q' request from the VF over the VF <-> PF
channel the PF driver will open a corresponding queue in the
device. The PF driver configures the queue with appropriate mac
address, vlan configuration, etc from the VF.

Signed-off-by: Ariel Elior <ariele@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x.h       |    1 +
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c   |   19 +-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c |  899 +++++++++++++++++----
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h |  176 ++++
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c  |  145 ++++
 5 files changed, 1069 insertions(+), 171 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
index 4d89ef6..88dbf02 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
@@ -972,6 +972,7 @@ extern struct workqueue_struct *bnx2x_wq;
 #define BNX2X_MAX_NUM_OF_VFS	64
 #define BNX2X_VF_CID_WND	0
 #define BNX2X_CIDS_PER_VF	(1 << BNX2X_VF_CID_WND)
+#define BNX2X_CLIENTS_PER_VF	1
 #define BNX2X_FIRST_VF_CID	256
 #define BNX2X_VF_CIDS		(BNX2X_MAX_NUM_OF_VFS * BNX2X_CIDS_PER_VF)
 #define BNX2X_VF_ID_INVALID	0xFF
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 27a0b28..df65695 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -2019,7 +2019,7 @@ static void bnx2x_free_fw_stats_mem(struct bnx2x *bp)
 
 static int bnx2x_alloc_fw_stats_mem(struct bnx2x *bp)
 {
-	int num_groups;
+	int num_groups, vf_headroom = 0;
 	int is_fcoe_stats = NO_FCOE(bp) ? 0 : 1;
 
 	/* number of queues for statistics is number of eth queues + FCoE */
@@ -2031,18 +2031,27 @@ static int bnx2x_alloc_fw_stats_mem(struct bnx2x *bp)
 	 * for fcoe l2 queue if applicable)
 	 */
 	bp->fw_stats_num = 2 + is_fcoe_stats + num_queue_stats;
+
+	/* vf stats appear in the request list, but their data is allocated by
+	 * the VFs themselves. We don't include them in the bp->fw_stats_num as
+	 * it is used to determine where to place the vf stats queries in the
+	 * request struct
+	 */
+	if (IS_SRIOV(bp))
+		vf_headroom = bp->vfdb->sriov.nr_virtfn * BNX2X_CLIENTS_PER_VF;
+
 	/* Request is built from stats_query_header and an array of
 	 * stats_query_cmd_group each of which contains
 	 * STATS_QUERY_CMD_COUNT rules. The real number or requests is
 	 * configured in the stats_query_header.
 	 */
 	num_groups =
-		(((bp->fw_stats_num) / STATS_QUERY_CMD_COUNT) +
-		 (((bp->fw_stats_num) % STATS_QUERY_CMD_COUNT) ?
+		(((bp->fw_stats_num + vf_headroom) / STATS_QUERY_CMD_COUNT) +
+		 (((bp->fw_stats_num + vf_headroom) % STATS_QUERY_CMD_COUNT) ?
 		 1 : 0));
 
-	DP(BNX2X_MSG_SP, "stats fw_stats_num %d, num_groups %d",
-	   bp->fw_stats_num, num_groups);
+	DP(BNX2X_MSG_SP, "stats fw_stats_num %d, vf headroom %d, num_groups %d",
+	   bp->fw_stats_num, vf_headroom, num_groups);
 
 	bp->fw_stats_req_sz = sizeof(struct stats_query_header) +
 		num_groups * sizeof(struct stats_query_cmd_group);
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
index f027332..22e01c6 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
@@ -96,10 +96,236 @@ static void bnx2x_vf_igu_ack_sb(struct bnx2x *bp, struct bnx2x_virtf *vf,
 	REG_WR(bp, igu_addr_ctl, ctl);
 	mmiowb();
 	barrier();
+
 }
-static int bnx2x_ari_enabled(struct pci_dev *dev)
+/* VFOP - VF slow-path operation support */
+
+/* VFOP operations states */
+enum bnx2x_vfop_qctor_state {
+	   BNX2X_VFOP_QCTOR_INIT,
+	   BNX2X_VFOP_QCTOR_SETUP,
+	   BNX2X_VFOP_QCTOR_INT_EN
+};
+
+enum bnx2x_vfop_vlan_mac_state {
+	   BNX2X_VFOP_VLAN_MAC_CONFIG_SINGLE,
+	   BNX2X_VFOP_VLAN_MAC_CLEAR,
+	   BNX2X_VFOP_VLAN_MAC_CHK_DONE,
+	   BNX2X_VFOP_MAC_CONFIG_LIST,
+	   BNX2X_VFOP_VLAN_CONFIG_LIST,
+	   BNX2X_VFOP_VLAN_CONFIG_LIST_0
+};
+
+enum bnx2x_vfop_qsetup_state {
+	   BNX2X_VFOP_QSETUP_CTOR,
+	   BNX2X_VFOP_QSETUP_VLAN0,
+	   BNX2X_VFOP_QSETUP_DONE
+};
+
+#define bnx2x_vfop_reset_wq(vf)	atomic_set(&vf->op_in_progress, 0)
+
+void bnx2x_vfop_qctor_dump_tx(struct bnx2x *bp, struct bnx2x_virtf *vf,
+			      struct bnx2x_queue_init_params *init_params,
+			      struct bnx2x_queue_setup_params *setup_params,
+			      u16 q_idx, u16 sb_idx)
 {
-	return dev->bus->self && dev->bus->self->ari_enabled;
+	DP(BNX2X_MSG_IOV,
+	   "VF[%d] Q_SETUP: txq[%d]-- vfsb=%d, sb-index=%d, hc-rate=%d, flags=0x%lx, traffic-type=%d",
+	   vf->abs_vfid,
+	   q_idx,
+	   sb_idx,
+	   init_params->tx.sb_cq_index,
+	   init_params->tx.hc_rate,
+	   setup_params->flags,
+	   setup_params->txq_params.traffic_type);
+}
+
+void bnx2x_vfop_qctor_dump_rx(struct bnx2x *bp, struct bnx2x_virtf *vf,
+			    struct bnx2x_queue_init_params *init_params,
+			    struct bnx2x_queue_setup_params *setup_params,
+			    u16 q_idx, u16 sb_idx)
+{
+	struct bnx2x_rxq_setup_params *rxq_params = &setup_params->rxq_params;
+
+	DP(BNX2X_MSG_IOV, "VF[%d] Q_SETUP: rxq[%d]-- vfsb=%d, sb-index=%d, hc-rate=%d, mtu=%d, buf-size=%d\n"
+	   "sge-size=%d, max_sge_pkt=%d, tpa-agg-size=%d, flags=0x%lx, drop-flags=0x%x, cache-log=%d\n",
+	   vf->abs_vfid,
+	   q_idx,
+	   sb_idx,
+	   init_params->rx.sb_cq_index,
+	   init_params->rx.hc_rate,
+	   setup_params->gen_params.mtu,
+	   rxq_params->buf_sz,
+	   rxq_params->sge_buf_sz,
+	   rxq_params->max_sges_pkt,
+	   rxq_params->tpa_agg_sz,
+	   setup_params->flags,
+	   rxq_params->drop_flags,
+	   rxq_params->cache_line_log);
+}
+
+void bnx2x_vfop_qctor_prep(struct bnx2x *bp,
+			   struct bnx2x_virtf *vf,
+			   struct bnx2x_vf_queue *q,
+			   struct bnx2x_vfop_qctor_params *p,
+			   unsigned long q_type)
+{
+
+	struct bnx2x_queue_init_params *init_p = &p->qstate.params.init;
+	struct bnx2x_queue_setup_params *setup_p = &p->prep_qsetup;
+
+	/* INIT */
+
+	/* Enable host coalescing in the transition to INIT state */
+	if (test_bit(BNX2X_Q_FLG_HC, &init_p->rx.flags))
+		__set_bit(BNX2X_Q_FLG_HC_EN, &init_p->rx.flags);
+
+	if (test_bit(BNX2X_Q_FLG_HC, &init_p->tx.flags))
+		__set_bit(BNX2X_Q_FLG_HC_EN, &init_p->tx.flags);
+
+	/* FW SB ID */
+	init_p->rx.fw_sb_id = vf_igu_sb(vf, q->sb_idx);
+	init_p->tx.fw_sb_id = vf_igu_sb(vf, q->sb_idx);
+
+	/* context */
+	init_p->cxts[0] = q->cxt;
+
+	/* SETUP */
+
+	/* Setup-op general parameters */
+	setup_p->gen_params.spcl_id = vf->sp_cl_id;
+	setup_p->gen_params.stat_id = vfq_stat_id(vf, q);
+
+	/* Setup-op pause params:
+	 * Nothing to do, the pause thresholds are set by default to 0 which
+	 * effectively turns off the feature for this queue. We don't want
+	 * one queue (VF) to interfering with another queue (another VF)
+	 */
+	if (vf->cfg_flags & VF_CFG_FW_FC)
+		BNX2X_ERR("No support for pause to VFs (abs_vfid: %d)\n",
+			  vf->abs_vfid);
+	/* Setup-op flags:
+	 * collect statistics, zero statistics, local-switching, security,
+	 * OV for Flex10, RSS and MCAST for leading
+	 */
+	if (test_bit(BNX2X_Q_FLG_STATS, &setup_p->flags))
+		__set_bit(BNX2X_Q_FLG_ZERO_STATS, &setup_p->flags);
+
+	/* for VFs, enable tx switching, bd coherency, and mac address
+	 * anti-spoofing
+	 */
+	__set_bit(BNX2X_Q_FLG_TX_SWITCH, &setup_p->flags);
+	__set_bit(BNX2X_Q_FLG_TX_SEC, &setup_p->flags);
+	__set_bit(BNX2X_Q_FLG_ANTI_SPOOF, &setup_p->flags);
+
+	if (vfq_is_leading(q)) {
+		__set_bit(BNX2X_Q_FLG_LEADING_RSS, &setup_p->flags);
+		__set_bit(BNX2X_Q_FLG_MCAST, &setup_p->flags);
+	}
+
+	/* Setup-op rx parameters */
+	if (test_bit(BNX2X_Q_TYPE_HAS_RX, &q_type)) {
+		struct bnx2x_rxq_setup_params *rxq_p = &setup_p->rxq_params;
+
+		rxq_p->cl_qzone_id = vfq_qzone_id(vf, q);
+		rxq_p->fw_sb_id = vf_igu_sb(vf, q->sb_idx);
+		rxq_p->rss_engine_id = FW_VF_HANDLE(vf->abs_vfid);
+
+		if (test_bit(BNX2X_Q_FLG_TPA, &setup_p->flags))
+			rxq_p->max_tpa_queues = BNX2X_VF_MAX_TPA_AGG_QUEUES;
+	}
+
+	/* Setup-op tx parameters */
+	if (test_bit(BNX2X_Q_TYPE_HAS_TX, &q_type)) {
+		setup_p->txq_params.tss_leading_cl_id = vf->leading_rss;
+		setup_p->txq_params.fw_sb_id = vf_igu_sb(vf, q->sb_idx);
+	}
+}
+
+/* VFOP queue construction */
+static void bnx2x_vfop_qctor(struct bnx2x *bp, struct bnx2x_virtf *vf)
+{
+	struct bnx2x_vfop *vfop = bnx2x_vfop_cur(bp, vf);
+	struct bnx2x_vfop_args_qctor *args = &vfop->args.qctor;
+	struct bnx2x_queue_state_params *q_params = &vfop->op_p->qctor.qstate;
+	enum bnx2x_vfop_qctor_state state = vfop->state;
+
+	bnx2x_vfop_reset_wq(vf);
+
+	if (vfop->rc < 0)
+		goto op_err;
+
+	DP(BNX2X_MSG_IOV, "vf[%d] STATE: %d\n", vf->abs_vfid, state);
+
+	switch (state) {
+	case BNX2X_VFOP_QCTOR_INIT:
+
+		/* has this queue already been opened? */
+		if (bnx2x_get_q_logical_state(bp, q_params->q_obj) ==
+		    BNX2X_Q_LOGICAL_STATE_ACTIVE) {
+			DP(BNX2X_MSG_IOV,
+			   "Entered qctor but queue was already up. Aborting gracefully\n");
+			goto op_done;
+		}
+
+		/* next state */
+		vfop->state = BNX2X_VFOP_QCTOR_SETUP;
+
+		q_params->cmd = BNX2X_Q_CMD_INIT;
+		vfop->rc = bnx2x_queue_state_change(bp, q_params);
+
+		bnx2x_vfop_finalize(vf, vfop->rc, VFOP_CONT);
+
+	case BNX2X_VFOP_QCTOR_SETUP:
+		/* next state */
+		vfop->state = BNX2X_VFOP_QCTOR_INT_EN;
+
+		/* copy pre-prepared setup params to the queue-state params */
+		vfop->op_p->qctor.qstate.params.setup =
+			vfop->op_p->qctor.prep_qsetup;
+
+		q_params->cmd = BNX2X_Q_CMD_SETUP;
+		vfop->rc = bnx2x_queue_state_change(bp, q_params);
+
+		bnx2x_vfop_finalize(vf, vfop->rc, VFOP_CONT);
+
+	case BNX2X_VFOP_QCTOR_INT_EN:
+
+		/* enable interrupts */
+		bnx2x_vf_igu_ack_sb(bp, vf, vf_igu_sb(vf, args->sb_idx),
+				    USTORM_ID, 0, IGU_INT_ENABLE, 0);
+		goto op_done;
+	default:
+		bnx2x_vfop_default(state);
+	}
+op_err:
+	BNX2X_ERR("QCTOR[%d:%d] error: cmd %d, rc %d\n",
+		  vf->abs_vfid, args->qid, q_params->cmd, vfop->rc);
+op_done:
+	bnx2x_vfop_end(bp, vf, vfop);
+op_pending:
+	return;
+}
+
+static int bnx2x_vfop_qctor_cmd(struct bnx2x *bp,
+				struct bnx2x_virtf *vf,
+				struct bnx2x_vfop_cmd *cmd,
+				int qid)
+{
+	struct bnx2x_vfop *vfop = bnx2x_vfop_add(bp, vf);
+
+	if (vfop) {
+		vf->op_params.qctor.qstate.q_obj = &bnx2x_vfq(vf, qid, sp_obj);
+
+		vfop->args.qctor.qid = qid;
+		vfop->args.qctor.sb_idx = bnx2x_vfq(vf, qid, sb_idx);
+
+		bnx2x_vfop_opset(BNX2X_VFOP_QCTOR_INIT,
+				 bnx2x_vfop_qctor, cmd->done);
+		return bnx2x_vfop_transition(bp, vf, bnx2x_vfop_qctor,
+					     cmd->block);
+	}
+	return -ENOMEM;
 }
 
 static void __devinit
@@ -110,221 +336,359 @@ bnx2x_vf_set_igu_info(struct bnx2x *bp, u8 igu_sb_id, u8 abs_vfid)
 		if (!vf_sb_count(vf))
 			vf->igu_base_id = igu_sb_id;
 		++vf_sb_count(vf);
-	 }
+	}
 }
 
-static void __devinit
-bnx2x_get_vf_igu_cam_info(struct bnx2x *bp)
+/* VFOP MAC/VLAN helpers */
+static inline void bnx2x_vfop_credit(struct bnx2x *bp,
+				     struct bnx2x_vfop *vfop,
+				     struct bnx2x_vlan_mac_obj *obj)
 {
-	int sb_id;
-	u32 val;
-	u8 fid;
+	struct bnx2x_vfop_args_filters *args = &vfop->args.filters;
 
-	/* IGU in normal mode - read CAM */
-	for (sb_id = 0; sb_id < IGU_REG_MAPPING_MEMORY_SIZE; sb_id++) {
-		val = REG_RD(bp, IGU_REG_MAPPING_MEMORY + sb_id * 4);
-		if (!(val & IGU_REG_MAPPING_MEMORY_VALID))
-			continue;
-		fid = GET_FIELD((val), IGU_REG_MAPPING_MEMORY_FID);
-		if (!(fid & IGU_FID_ENCODE_IS_PF))
-			bnx2x_vf_set_igu_info(bp, sb_id,
-					      (fid & IGU_FID_VF_NUM_MASK));
+	/* update credit only if there is no error
+	 * and a valid credit counter
+	 */
+	if (!vfop->rc && args->credit) {
+		int cnt = 0;
+		struct list_head *pos;
 
-		DP(BNX2X_MSG_IOV, "%s[%d], igu_sb_id=%d, msix=%d\n",
-		   ((fid & IGU_FID_ENCODE_IS_PF) ? "PF" : "VF"),
-		   ((fid & IGU_FID_ENCODE_IS_PF) ? (fid & IGU_FID_PF_NUM_MASK) :
-		   (fid & IGU_FID_VF_NUM_MASK)), sb_id,
-		   GET_FIELD((val), IGU_REG_MAPPING_MEMORY_VECTOR));
+		list_for_each(pos, &obj->head)
+			cnt++;
+
+		atomic_set(args->credit, cnt);
 	}
 }
 
-static void __bnx2x_iov_free_vfdb(struct bnx2x *bp)
+static int bnx2x_vfop_set_user_req(struct bnx2x *bp,
+				    struct bnx2x_vfop_filter *pos,
+				    struct bnx2x_vlan_mac_data *user_req)
 {
-	if (bp->vfdb) {
-		kfree(bp->vfdb->vfqs);
-		kfree(bp->vfdb->vfs);
-		kfree(bp->vfdb);
+	user_req->cmd = pos->add ? BNX2X_VLAN_MAC_ADD :
+		BNX2X_VLAN_MAC_DEL;
+
+	switch (pos->type) {
+	case BNX2X_VFOP_FILTER_MAC:
+		memcpy(user_req->u.mac.mac, pos->mac, ETH_ALEN);
+		break;
+	case BNX2X_VFOP_FILTER_VLAN:
+		user_req->u.vlan.vlan = pos->vid;
+		break;
+	default:
+		BNX2X_ERR("Invalid filter type, skipping\n");
+		return 1;
 	}
-	bp->vfdb = NULL;
+	return 0;
 }
 
-static int bnx2x_sriov_pci_cfg_info(struct bnx2x *bp, struct bnx2x_sriov *iov)
+static int
+bnx2x_vfop_config_vlan0(struct bnx2x *bp,
+			struct bnx2x_vlan_mac_ramrod_params *vlan_mac,
+			bool add)
 {
-	int pos;
-	struct pci_dev *dev = bp->pdev;
-
-	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_SRIOV);
-	if (!pos) {
-		BNX2X_ERR("failed to find SRIOV capability in device\n");
-		return -ENODEV;
-	}
+	int rc;
 
-	iov->pos = pos;
-	DP(BNX2X_MSG_IOV, "sriov ext pos %d\n", pos);
-	pci_read_config_word(dev, pos + PCI_SRIOV_CTRL, &iov->ctrl);
-	pci_read_config_word(dev, pos + PCI_SRIOV_TOTAL_VF, &iov->total);
-	pci_read_config_word(dev, pos + PCI_SRIOV_INITIAL_VF, &iov->initial);
-	pci_read_config_word(dev, pos + PCI_SRIOV_VF_OFFSET, &iov->offset);
-	pci_read_config_word(dev, pos + PCI_SRIOV_VF_STRIDE, &iov->stride);
-	pci_read_config_dword(dev, pos + PCI_SRIOV_SUP_PGSIZE, &iov->pgsz);
-	pci_read_config_dword(dev, pos + PCI_SRIOV_CAP, &iov->cap);
-	pci_read_config_byte(dev, pos + PCI_SRIOV_FUNC_LINK, &iov->link);
+	vlan_mac->user_req.cmd = add ? BNX2X_VLAN_MAC_ADD :
+		BNX2X_VLAN_MAC_DEL;
+	vlan_mac->user_req.u.vlan.vlan = 0;
 
-	return 0;
+	rc = bnx2x_config_vlan_mac(bp, vlan_mac);
+	if (rc == -EEXIST)
+		rc = 0;
+	return rc;
 }
 
-static int bnx2x_sriov_info(struct bnx2x *bp, struct bnx2x_sriov *iov)
+static int bnx2x_vfop_config_list(struct bnx2x *bp,
+				  struct bnx2x_vfop_filters *filters,
+				  struct bnx2x_vlan_mac_ramrod_params *vlan_mac)
 {
-	u32 val;
-
-	/* read the SRIOV capability structure
-	 * The fields can be read via configuration read or
-	 * directly from the device (starting at offset PCICFG_OFFSET)
-	 */
-	if (bnx2x_sriov_pci_cfg_info(bp, iov))
-		return -ENODEV;
+	struct bnx2x_vfop_filter *pos, *tmp;
+	struct list_head rollback_list, *filters_list = &filters->head;
+	struct bnx2x_vlan_mac_data *user_req = &vlan_mac->user_req;
+	int rc = 0, cnt = 0;
 
-	/* get the number of SRIOV bars */
-	iov->nres = 0;
+	INIT_LIST_HEAD(&rollback_list);
 
-	/* read the first_vfid */
-	val = REG_RD(bp, PCICFG_OFFSET + GRC_CONFIG_REG_PF_INIT_VF);
-	iov->first_vf_in_pf = ((val & GRC_CR_PF_INIT_VF_PF_FIRST_VF_NUM_MASK)
-			       * 8) - (BNX2X_MAX_NUM_OF_VFS * BP_PATH(bp));
+	list_for_each_entry_safe(pos, tmp, filters_list, link) {
+		if (bnx2x_vfop_set_user_req(bp, pos, user_req))
+			continue;
 
-	DP(BNX2X_MSG_IOV,
-	   "IOV info[%d]: first vf %d, nres %d, cap 0x%x, ctrl 0x%x, total %d, initial %d, num vfs %d, offset %d, stride %d, page size 0x%x\n",
-	   BP_FUNC(bp),
-	   iov->first_vf_in_pf, iov->nres, iov->cap, iov->ctrl, iov->total,
-	   iov->initial, iov->nr_virtfn, iov->offset, iov->stride, iov->pgsz);
+		rc = bnx2x_config_vlan_mac(bp, vlan_mac);
+		if (rc >= 0) {
+			cnt += pos->add ? 1 : -1;
+			list_del(&pos->link);
+			list_add(&pos->link, &rollback_list);
+			rc = 0;
+		} else if (rc == -EEXIST) {
+			rc = 0;
+		} else {
+			BNX2X_ERR("Failed to add a new vlan_mac command\n");
+			break;
+		}
+	}
 
-	return 0;
+	/* rollback if error or too many rules added */
+	if (rc || cnt > filters->add_cnt) {
+		BNX2X_ERR("error or too many rules added. Performing rollback\n");
+		list_for_each_entry_safe(pos, tmp, &rollback_list, link) {
+			pos->add = !pos->add;	/* reverse op */
+			bnx2x_vfop_set_user_req(bp, pos, user_req);
+			bnx2x_config_vlan_mac(bp, vlan_mac);
+			list_del(&pos->link);
+		}
+		cnt = 0;
+		if (!rc)
+			rc = -EINVAL;
+	}
+	filters->add_cnt = cnt;
+	return rc;
 }
 
-static u8 bnx2x_iov_get_max_queue_count(struct bnx2x *bp)
+/* VFOP set VLAN/MAC */
+static void bnx2x_vfop_vlan_mac(struct bnx2x *bp, struct bnx2x_virtf *vf)
 {
-	int i;
-	u8 queue_count = 0;
+	struct bnx2x_vfop *vfop = bnx2x_vfop_cur(bp, vf);
+	struct bnx2x_vlan_mac_ramrod_params *vlan_mac = &vfop->op_p->vlan_mac;
+	struct bnx2x_vlan_mac_obj *obj = vlan_mac->vlan_mac_obj;
+	struct bnx2x_vfop_filters *filters = vfop->args.filters.multi_filter;
 
-	if (IS_SRIOV(bp))
-		for_each_vf(bp, i)
-			queue_count += bnx2x_vf(bp, i, alloc_resc.num_sbs);
+	enum bnx2x_vfop_vlan_mac_state state = vfop->state;
 
-	return queue_count;
-}
+	if (vfop->rc < 0)
+		goto op_err;
 
-/* must be called after PF bars are mapped */
-int __devinit bnx2x_iov_init_one(struct bnx2x *bp, int int_mode_param,
-				 int num_vfs_param)
-{
-	int err, i, qcount;
-	struct bnx2x_sriov *iov;
-	struct pci_dev *dev = bp->pdev;
+	DP(BNX2X_MSG_IOV, "vf[%d] STATE: %d\n", vf->abs_vfid, state);
 
-	bp->vfdb = NULL;
+	bnx2x_vfop_reset_wq(vf);
 
-	/* verify sriov capability is present in configuration space */
-	if (!pci_find_ext_capability(dev, PCI_EXT_CAP_ID_SRIOV))
-		return 0;
+	switch (state) {
+	case BNX2X_VFOP_VLAN_MAC_CLEAR:
+		/* next state */
+		vfop->state = BNX2X_VFOP_VLAN_MAC_CHK_DONE;
 
-	/* verify is pf */
-	if (IS_VF(bp))
-		return 0;
+		/* do delete */
+		vfop->rc = obj->delete_all(bp, obj,
+					   &vlan_mac->user_req.vlan_mac_flags,
+					   &vlan_mac->ramrod_flags);
 
-	/* verify chip revision */
-	if (CHIP_IS_E1x(bp))
-		return 0;
+		bnx2x_vfop_finalize(vf, vfop->rc, VFOP_DONE);
 
-	/* check if SRIOV support is turned off */
-	if (!num_vfs_param)
-		return 0;
+	case BNX2X_VFOP_VLAN_MAC_CONFIG_SINGLE:
+		/* next state */
+		vfop->state = BNX2X_VFOP_VLAN_MAC_CHK_DONE;
 
-	/* SRIOV assumes that num of PF CIDs < BNX2X_FIRST_VF_CID */
-	if (BNX2X_L2_MAX_CID(bp) >= BNX2X_FIRST_VF_CID) {
-		BNX2X_ERR("PF cids %d are overspilling into vf space (starts at %d). Abort SRIOV",
-			  BNX2X_L2_MAX_CID(bp), BNX2X_FIRST_VF_CID);
-		return 0;
-	}
+		/* do config */
+		vfop->rc = bnx2x_config_vlan_mac(bp, vlan_mac);
+		if (vfop->rc == -EEXIST)
+			vfop->rc = 0;
 
-	/* SRIOV can be enabled only with MSIX */
-	if (int_mode_param == BNX2X_INT_MODE_MSI ||
-	    int_mode_param == BNX2X_INT_MODE_INTX)
-		BNX2X_ERR("Forced MSI/INTx mode is incompatible with SRIOV\n");
+		bnx2x_vfop_finalize(vf, vfop->rc, VFOP_DONE);
 
-	err = -EIO;
-	/* verify ari is enabled */
-	if (!bnx2x_ari_enabled(bp->pdev)) {
-		BNX2X_ERR("ARI not supported, SRIOV can not be enabled\n");
-		return err;
-	}
+	case BNX2X_VFOP_VLAN_MAC_CHK_DONE:
+		vfop->rc = !!obj->raw.check_pending(&obj->raw);
+		bnx2x_vfop_finalize(vf, vfop->rc, VFOP_DONE);
 
-	/* verify igu is in normal mode */
-	if (CHIP_INT_MODE_IS_BC(bp)) {
-		BNX2X_ERR("IGU not normal mode,  SRIOV can not be enabled\n");
-		return err;
-	}
+	case BNX2X_VFOP_MAC_CONFIG_LIST:
+		/* next state */
+		vfop->state = BNX2X_VFOP_VLAN_MAC_CHK_DONE;
 
-	/* allocate the vfs database */
-	bp->vfdb = kzalloc(sizeof(*(bp->vfdb)), GFP_KERNEL);
-	if (!bp->vfdb) {
-		BNX2X_ERR("failed to allocate vf database\n");
-		err = -ENOMEM;
-		goto failed;
+		/* do list config */
+		vfop->rc = bnx2x_vfop_config_list(bp, filters, vlan_mac);
+		if (vfop->rc)
+			goto op_err;
+
+		set_bit(RAMROD_CONT, &vlan_mac->ramrod_flags);
+		vfop->rc = bnx2x_config_vlan_mac(bp, vlan_mac);
+		bnx2x_vfop_finalize(vf, vfop->rc, VFOP_DONE);
+
+	case BNX2X_VFOP_VLAN_CONFIG_LIST:
+		/* next state */
+		vfop->state = BNX2X_VFOP_VLAN_CONFIG_LIST_0;
+
+		/* remove vlan0 - could be no-op */
+		vfop->rc = bnx2x_vfop_config_vlan0(bp, vlan_mac, false);
+		if (vfop->rc)
+			goto op_err;
+
+		/* Do vlan list config. if this operation fails we try to
+		 * restore vlan0 to keep the queue is working order
+		 */
+		vfop->rc = bnx2x_vfop_config_list(bp, filters, vlan_mac);
+		if (!vfop->rc) {
+			set_bit(RAMROD_CONT, &vlan_mac->ramrod_flags);
+			vfop->rc = bnx2x_config_vlan_mac(bp, vlan_mac);
+		}
+		bnx2x_vfop_finalize(vf, vfop->rc, VFOP_CONT); /* fall-through */
+
+	case BNX2X_VFOP_VLAN_CONFIG_LIST_0:
+		/* next state */
+		vfop->state = BNX2X_VFOP_VLAN_MAC_CHK_DONE;
+
+		if (list_empty(&obj->head))
+			/* add vlan0 */
+			vfop->rc = bnx2x_vfop_config_vlan0(bp, vlan_mac, true);
+		bnx2x_vfop_finalize(vf, vfop->rc, VFOP_DONE);
+
+	default:
+		bnx2x_vfop_default(state);
 	}
+op_err:
+	BNX2X_ERR("VLAN-MAC error: rc %d\n", vfop->rc);
+op_done:
+	kfree(filters);
+	bnx2x_vfop_credit(bp, vfop, obj);
+	bnx2x_vfop_end(bp, vf, vfop);
+op_pending:
+	return;
+}
 
-	/* get the sriov info - Linux already collected all the pertinent
-	 * information, however the sriov structure is for the private use
-	 * of the pci module. Also we want this information regardless
-	 *  of the hyper-visor.
-	 */
-	iov = &(bp->vfdb->sriov);
-	err = bnx2x_sriov_info(bp, iov);
-	if (err)
-		goto failed;
+struct bnx2x_vfop_vlan_mac_flags {
+	bool drv_only;
+	bool dont_consume;
+	bool single_cmd;
+	bool add;
+};
 
-	/* calcuate the actual number of VFs */
-	iov->nr_virtfn = min_t(u16, iov->total, (u16)num_vfs_param);
+static void
+bnx2x_vfop_vlan_mac_prep_ramrod(struct bnx2x_vlan_mac_ramrod_params *ramrod,
+				struct bnx2x_vfop_vlan_mac_flags *flags)
+{
+	struct bnx2x_vlan_mac_data *ureq = &ramrod->user_req;
 
-	/* allcate the vf array */
-	bp->vfdb->vfs = kzalloc(sizeof(struct bnx2x_virtf) *
-				BNX2X_NR_VIRTFN(bp), GFP_KERNEL);
-	if (!bp->vfdb->vfs) {
-		BNX2X_ERR("failed to allocate vf array\n");
-		err = -ENOMEM;
-		goto failed;
+	memset(ramrod, 0, sizeof(*ramrod));
+
+	/* ramrod flags */
+	if (flags->drv_only)
+		set_bit(RAMROD_DRV_CLR_ONLY, &ramrod->ramrod_flags);
+	if (flags->single_cmd)
+		set_bit(RAMROD_EXEC, &ramrod->ramrod_flags);
+
+	/* mac_vlan flags */
+	if (flags->dont_consume)
+		set_bit(BNX2X_DONT_CONSUME_CAM_CREDIT, &ureq->vlan_mac_flags);
+
+	/* cmd */
+	ureq->cmd = flags->add ? BNX2X_VLAN_MAC_ADD : BNX2X_VLAN_MAC_DEL;
+}
+
+
+int bnx2x_vfop_vlan_set_cmd(struct bnx2x *bp,
+			    struct bnx2x_virtf *vf,
+			    struct bnx2x_vfop_cmd *cmd,
+			    int qid, u16 vid, bool add)
+{
+	struct bnx2x_vfop *vfop = bnx2x_vfop_add(bp, vf);
+
+	if (vfop) {
+		struct bnx2x_vfop_args_filters filters = {
+			.multi_filter = NULL, /* single command */
+			.credit = &bnx2x_vfq(vf, qid, vlan_count),
+		};
+		struct bnx2x_vfop_vlan_mac_flags flags = {
+			.drv_only = false,
+			.dont_consume = (filters.credit != NULL),
+			.single_cmd = true,
+			.add = add,
+		};
+		struct bnx2x_vlan_mac_ramrod_params *ramrod =
+			&vf->op_params.vlan_mac;
+
+		/* set ramrod params */
+		bnx2x_vfop_vlan_mac_prep_ramrod(ramrod, &flags);
+		ramrod->user_req.u.vlan.vlan = vid;
+
+		/* set object */
+		ramrod->vlan_mac_obj = &bnx2x_vfq(vf, qid, vlan_obj);
+
+		/* set extra args */
+		vfop->args.filters = filters;
+
+		bnx2x_vfop_opset(BNX2X_VFOP_VLAN_MAC_CONFIG_SINGLE,
+				 bnx2x_vfop_vlan_mac, cmd->done);
+		return bnx2x_vfop_transition(bp, vf, bnx2x_vfop_vlan_mac,
+					     cmd->block);
 	}
+	return -ENOMEM;
+}
 
-	/* Initial VF init - index and abs_vfid - nr_virtfn must be set */
-	for_each_vf(bp, i) {
-		bnx2x_vf(bp, i, index) = i;
-		bnx2x_vf(bp, i, abs_vfid) = iov->first_vf_in_pf + i;
-		bnx2x_vf(bp, i, state) = VF_FREE;
-		INIT_LIST_HEAD(&bnx2x_vf(bp, i, op_list_head));
-		mutex_init(&bnx2x_vf(bp, i, op_mutex));
-		bnx2x_vf(bp, i, op_current) = CHANNEL_TLV_NONE;
+/* VFOP queue setup (queue constructor + set vlan 0) */
+static void bnx2x_vfop_qsetup(struct bnx2x *bp, struct bnx2x_virtf *vf)
+{
+	struct bnx2x_vfop *vfop = bnx2x_vfop_cur(bp, vf);
+	int qid = vfop->args.qctor.qid;
+	enum bnx2x_vfop_qsetup_state state = vfop->state;
+	struct bnx2x_vfop_cmd cmd = {
+		.done = bnx2x_vfop_qsetup,
+		.block = false,
+	};
+
+	if (vfop->rc < 0)
+		goto op_err;
+
+	DP(BNX2X_MSG_IOV, "vf[%d] STATE: %d\n", vf->abs_vfid, state);
+
+	switch (state) {
+	case BNX2X_VFOP_QSETUP_CTOR:
+		/* init the queue ctor command */
+		vfop->state = BNX2X_VFOP_QSETUP_VLAN0;
+		vfop->rc = bnx2x_vfop_qctor_cmd(bp, vf, &cmd, qid);
+		if (vfop->rc)
+			goto op_err;
+		return;
+
+	case BNX2X_VFOP_QSETUP_VLAN0:
+		/* skip if non-leading or FPGA/EMU*/
+		if (qid || CHIP_REV_IS_SLOW(bp))
+			goto op_done;
+
+		/* init the queue set-vlan command (for vlan 0) */
+		vfop->state = BNX2X_VFOP_QSETUP_DONE;
+		vfop->rc = bnx2x_vfop_vlan_set_cmd(bp, vf, &cmd, qid, 0, true);
+		if (vfop->rc)
+			goto op_err;
+		return;
+op_err:
+	BNX2X_ERR("QSETUP[%d:%d] error: rc %d\n", vf->abs_vfid, qid, vfop->rc);
+op_done:
+	case BNX2X_VFOP_QSETUP_DONE:
+		bnx2x_vfop_end(bp, vf, vfop);
+		return;
+	default:
+		bnx2x_vfop_default(state);
 	}
+}
 
-	/* re-read the IGU CAM for VFs - index and abs_vfid must be set */
-	bnx2x_get_vf_igu_cam_info(bp);
+int bnx2x_vfop_qsetup_cmd(struct bnx2x *bp,
+			  struct bnx2x_virtf *vf,
+			  struct bnx2x_vfop_cmd *cmd,
+			  int qid)
+{
+	struct bnx2x_vfop *vfop = bnx2x_vfop_add(bp, vf);
 
-	/* get the total queue count and allocate the global queue arrays */
-	qcount = bnx2x_iov_get_max_queue_count(bp);
+	if (vfop) {
+		vfop->args.qctor.qid = qid;
 
-	/* allocate the queue arrays for all VFs */
-	bp->vfdb->vfqs = kzalloc(qcount * sizeof(struct bnx2x_vf_queue),
-				 GFP_KERNEL);
-	if (!bp->vfdb->vfqs) {
-		BNX2X_ERR("failed to allocate vf queue array\n");
-		err = -ENOMEM;
-		goto failed;
+		bnx2x_vfop_opset(BNX2X_VFOP_QSETUP_CTOR,
+				 bnx2x_vfop_qsetup, cmd->done);
+		return bnx2x_vfop_transition(bp, vf, bnx2x_vfop_qsetup,
+					     cmd->block);
 	}
+	return -ENOMEM;
+}
 
-	return 0;
-failed:
-	DP(BNX2X_MSG_IOV, "Failed err=%d\n", err);
-	__bnx2x_iov_free_vfdb(bp);
-	return err;
+
+static u8 bnx2x_iov_get_max_queue_count(struct bnx2x *bp)
+{
+	int i;
+	u8 queue_count = 0;
+
+	if (IS_SRIOV(bp))
+		for_each_vf(bp, i)
+			queue_count += bnx2x_vf(bp, i, alloc_resc.num_sbs);
+
+	return queue_count;
 }
+
 /* VF enable primitives
  *
  * when pretend is required the caller is responsible
@@ -600,6 +964,209 @@ static void bnx2x_vf_set_bars(struct bnx2x *bp, struct bnx2x_virtf *vf)
 	}
 }
 
+static int bnx2x_ari_enabled(struct pci_dev *dev)
+{
+	return dev->bus->self && dev->bus->self->ari_enabled;
+}
+
+static void __devinit
+bnx2x_get_vf_igu_cam_info(struct bnx2x *bp)
+{
+	int sb_id;
+	u32 val;
+	u8 fid;
+
+	/* IGU in normal mode - read CAM */
+	for (sb_id = 0; sb_id < IGU_REG_MAPPING_MEMORY_SIZE; sb_id++) {
+		val = REG_RD(bp, IGU_REG_MAPPING_MEMORY + sb_id * 4);
+		if (!(val & IGU_REG_MAPPING_MEMORY_VALID))
+			continue;
+		fid = GET_FIELD((val), IGU_REG_MAPPING_MEMORY_FID);
+		if (!(fid & IGU_FID_ENCODE_IS_PF))
+			bnx2x_vf_set_igu_info(bp, sb_id,
+					      (fid & IGU_FID_VF_NUM_MASK));
+
+		DP(BNX2X_MSG_IOV, "%s[%d], igu_sb_id=%d, msix=%d\n",
+		   ((fid & IGU_FID_ENCODE_IS_PF) ? "PF" : "VF"),
+		   ((fid & IGU_FID_ENCODE_IS_PF) ? (fid & IGU_FID_PF_NUM_MASK) :
+		   (fid & IGU_FID_VF_NUM_MASK)), sb_id,
+		   GET_FIELD((val), IGU_REG_MAPPING_MEMORY_VECTOR));
+	}
+}
+
+static void __bnx2x_iov_free_vfdb(struct bnx2x *bp)
+{
+	if (bp->vfdb) {
+		kfree(bp->vfdb->vfqs);
+		kfree(bp->vfdb->vfs);
+		kfree(bp->vfdb);
+	}
+	bp->vfdb = NULL;
+}
+
+static int bnx2x_sriov_pci_cfg_info(struct bnx2x *bp, struct bnx2x_sriov *iov)
+{
+	int pos;
+	struct pci_dev *dev = bp->pdev;
+
+	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_SRIOV);
+	if (!pos) {
+		BNX2X_ERR("failed to find SRIOV capability in device\n");
+		return -ENODEV;
+	}
+
+	iov->pos = pos;
+	DP(BNX2X_MSG_IOV, "sriov ext pos %d\n", pos);
+	pci_read_config_word(dev, pos + PCI_SRIOV_CTRL, &iov->ctrl);
+	pci_read_config_word(dev, pos + PCI_SRIOV_TOTAL_VF, &iov->total);
+	pci_read_config_word(dev, pos + PCI_SRIOV_INITIAL_VF, &iov->initial);
+	pci_read_config_word(dev, pos + PCI_SRIOV_VF_OFFSET, &iov->offset);
+	pci_read_config_word(dev, pos + PCI_SRIOV_VF_STRIDE, &iov->stride);
+	pci_read_config_dword(dev, pos + PCI_SRIOV_SUP_PGSIZE, &iov->pgsz);
+	pci_read_config_dword(dev, pos + PCI_SRIOV_CAP, &iov->cap);
+	pci_read_config_byte(dev, pos + PCI_SRIOV_FUNC_LINK, &iov->link);
+
+	return 0;
+}
+static int bnx2x_sriov_info(struct bnx2x *bp, struct bnx2x_sriov *iov)
+{
+	u32 val;
+
+	/* read the SRIOV capability structure
+	 * The fields can be read via configuration read or
+	 * directly from the device (starting at offset PCICFG_OFFSET)
+	 */
+	if (bnx2x_sriov_pci_cfg_info(bp, iov))
+		return -ENODEV;
+
+	/* get the number of SRIOV bars */
+	iov->nres = 0;
+
+	/* read the first_vfid */
+	val = REG_RD(bp, PCICFG_OFFSET + GRC_CONFIG_REG_PF_INIT_VF);
+	iov->first_vf_in_pf = ((val & GRC_CR_PF_INIT_VF_PF_FIRST_VF_NUM_MASK)
+			       * 8) - (BNX2X_MAX_NUM_OF_VFS * BP_PATH(bp));
+
+	DP(BNX2X_MSG_IOV,
+	   "IOV info[%d]: first vf %d, nres %d, cap 0x%x, ctrl 0x%x, total %d, initial %d, num vfs %d, offset %d, stride %d, page size 0x%x\n",
+	   BP_FUNC(bp),
+	   iov->first_vf_in_pf, iov->nres, iov->cap, iov->ctrl, iov->total,
+	   iov->initial, iov->nr_virtfn, iov->offset, iov->stride, iov->pgsz);
+
+	return 0;
+}
+/* must be called after PF bars are mapped */
+int __devinit bnx2x_iov_init_one(struct bnx2x *bp, int int_mode_param,
+				 int num_vfs_param)
+{
+	int err, i, qcount;
+	struct bnx2x_sriov *iov;
+	struct pci_dev *dev = bp->pdev;
+
+	bp->vfdb = NULL;
+
+	/* verify is pf */
+	if (IS_VF(bp))
+		return 0;
+
+	/* verify sriov capability is present in configuration space */
+	if (!pci_find_ext_capability(dev, PCI_EXT_CAP_ID_SRIOV))
+		return 0;
+
+	/* verify chip revision */
+	if (CHIP_IS_E1x(bp))
+		return 0;
+
+	/* check if SRIOV support is turned off */
+	if (!num_vfs_param)
+		return 0;
+
+	/* SRIOV assumes that num of PF CIDs < BNX2X_FIRST_VF_CID */
+	if (BNX2X_L2_MAX_CID(bp) >= BNX2X_FIRST_VF_CID) {
+		BNX2X_ERR("PF cids %d are overspilling into vf space (starts at %d). Abort SRIOV",
+			  BNX2X_L2_MAX_CID(bp), BNX2X_FIRST_VF_CID);
+		return 0;
+	}
+
+	/* SRIOV can be enabled only with MSIX */
+	if (int_mode_param == BNX2X_INT_MODE_MSI ||
+	    int_mode_param == BNX2X_INT_MODE_INTX)
+		BNX2X_ERR("Forced MSI/INTx mode is incompatible with SRIOV\n");
+
+	err = -EIO;
+	/* verify ari is enabled */
+	if (!bnx2x_ari_enabled(bp->pdev)) {
+		BNX2X_ERR("ARI not supported, SRIOV can not be enabled\n");
+		return err;
+	}
+
+	/* verify igu is in normal mode */
+	if (CHIP_INT_MODE_IS_BC(bp)) {
+		BNX2X_ERR("IGU not normal mode,  SRIOV can not be enabled\n");
+		return err;
+	}
+
+	/* allocate the vfs database */
+	bp->vfdb = kzalloc(sizeof(*(bp->vfdb)), GFP_KERNEL);
+	if (!bp->vfdb) {
+		BNX2X_ERR("failed to allocate vf database\n");
+		err = -ENOMEM;
+		goto failed;
+	}
+
+	/* get the sriov info - Linux already collected all the pertinent
+	 * information, however the sriov structure is for the private use
+	 * of the pci module. Also we want this information regardless
+	 * of the hyper-visor.
+	 */
+	iov = &(bp->vfdb->sriov);
+	err = bnx2x_sriov_info(bp, iov);
+	if (err)
+		goto failed;
+
+	/* calculate the actual number of VFs */
+	iov->nr_virtfn = min_t(u16, iov->total, (u16)num_vfs_param);
+
+	/* allocate the vf array */
+	bp->vfdb->vfs = kzalloc(sizeof(struct bnx2x_virtf) *
+				BNX2X_NR_VIRTFN(bp), GFP_KERNEL);
+	if (!bp->vfdb->vfs) {
+		BNX2X_ERR("failed to allocate vf array\n");
+		err = -ENOMEM;
+		goto failed;
+	}
+
+	/* Initial VF init - index and abs_vfid - nr_virtfn must be set */
+	for_each_vf(bp, i) {
+		bnx2x_vf(bp, i, index) = i;
+		bnx2x_vf(bp, i, abs_vfid) = iov->first_vf_in_pf + i;
+		bnx2x_vf(bp, i, state) = VF_FREE;
+		INIT_LIST_HEAD(&bnx2x_vf(bp, i, op_list_head));
+		mutex_init(&bnx2x_vf(bp, i, op_mutex));
+		bnx2x_vf(bp, i, op_current) = CHANNEL_TLV_NONE;
+	}
+
+	/* re-read the IGU CAM for VFs - index and abs_vfid must be set */
+	bnx2x_get_vf_igu_cam_info(bp);
+
+	/* get the total queue count and allocate the global queue arrays */
+	qcount = bnx2x_iov_get_max_queue_count(bp);
+
+	/* allocate the queue arrays for all VFs */
+	bp->vfdb->vfqs = kzalloc(qcount * sizeof(struct bnx2x_vf_queue),
+				 GFP_KERNEL);
+	if (!bp->vfdb->vfqs) {
+		BNX2X_ERR("failed to allocate vf queue array\n");
+		err = -ENOMEM;
+		goto failed;
+	}
+
+	return 0;
+failed:
+	DP(BNX2X_MSG_IOV, "Failed err=%d\n", err);
+	__bnx2x_iov_free_vfdb(bp);
+	return err;
+}
 void __devexit bnx2x_iov_remove_one(struct bnx2x *bp)
 {
 	/* if SRIOV is not enabled there's nothing to do */
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
index 698ff71..059715c 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
@@ -26,6 +26,8 @@
  * The VF array is indexed by the relative vfid.
  */
 #define BNX2X_VF_MAX_QUEUES		16
+#define BNX2X_VF_MAX_TPA_AGG_QUEUES	8
+
 struct bnx2x_sriov {
 	u32 first_vf_in_pf;
 
@@ -90,6 +92,11 @@ struct bnx2x_virtf;
 /* VFOP definitions */
 typedef void (*vfop_handler_t)(struct bnx2x *bp, struct bnx2x_virtf *vf);
 
+struct bnx2x_vfop_cmd {
+	vfop_handler_t done;
+	bool block;
+};
+
 /* VFOP queue filters command additional arguments */
 struct bnx2x_vfop_filter {
 	struct list_head link;
@@ -407,6 +414,10 @@ static u8 vfq_cl_id(struct bnx2x_virtf *vf, struct bnx2x_vf_queue *q)
 {
 	return vf->igu_base_id + q->index;
 }
+static inline u8 vfq_stat_id(struct bnx2x_virtf *vf, struct bnx2x_vf_queue *q)
+{
+	return vfq_cl_id(vf, q);
+}
 static inline u8 vfq_qzone_id(struct bnx2x_virtf *vf, struct bnx2x_vf_queue *q)
 {
 	return vfq_cl_id(vf, q);
@@ -440,6 +451,44 @@ int bnx2x_vf_init(struct bnx2x *bp, struct bnx2x_virtf *vf,
 		  dma_addr_t *sb_map);
 
 
+/* VFOP generic helpers */
+#define bnx2x_vfop_default(state) do {				\
+		BNX2X_ERR("Bad state %d\n", (state));		\
+		vfop->rc = -EINVAL;				\
+		goto op_err;					\
+	} while (0)
+
+enum {
+	VFOP_DONE,
+	VFOP_CONT,
+	VFOP_VERIFY_PEND,
+};
+
+#define bnx2x_vfop_finalize(vf, rc, next) do {				\
+		if ((rc) < 0)						\
+			goto op_err;					\
+		else if ((rc) > 0)					\
+			goto op_pending;				\
+		else if ((next) == VFOP_DONE)				\
+			goto op_done;					\
+		else if ((next) == VFOP_VERIFY_PEND)			\
+			BNX2X_ERR("expected pending");			\
+		else {							\
+			DP(BNX2X_MSG_IOV, "no ramrod. scheduling");	\
+			atomic_set(&vf->op_in_progress, 1);		\
+			queue_delayed_work(bnx2x_wq, &bp->sp_task, 0);  \
+			return;						\
+		}							\
+	} while (0)
+
+#define bnx2x_vfop_opset(first_state, trans_hndlr, done_hndlr)		\
+	do {								\
+		vfop->state = first_state;				\
+		vfop->op_p = &vf->op_params;				\
+		vfop->transition = trans_hndlr;				\
+		vfop->done = done_hndlr;				\
+	} while (0)
+
 static inline struct bnx2x_vfop *bnx2x_vfop_cur(struct bnx2x *bp,
 						struct bnx2x_virtf *vf)
 {
@@ -448,6 +497,133 @@ static inline struct bnx2x_vfop *bnx2x_vfop_cur(struct bnx2x *bp,
 	return list_first_entry(&vf->op_list_head, struct bnx2x_vfop, link);
 }
 
+static inline struct bnx2x_vfop *bnx2x_vfop_add(struct bnx2x *bp,
+						struct bnx2x_virtf *vf)
+{
+	struct bnx2x_vfop *vfop = kzalloc(sizeof(*vfop), GFP_KERNEL);
+
+	WARN(!mutex_is_locked(&vf->op_mutex), "about to access vf op linked list but mutex was not locked!");
+	if (vfop) {
+		INIT_LIST_HEAD(&vfop->link);
+		list_add(&vfop->link, &vf->op_list_head);
+	}
+	return vfop;
+}
+
+static inline void bnx2x_vfop_end(struct bnx2x *bp, struct bnx2x_virtf *vf,
+				  struct bnx2x_vfop *vfop)
+{
+	/* rc < 0 - error, otherwise set to 0 */
+	DP(BNX2X_MSG_IOV, "rc was %d\n", vfop->rc);
+	if (vfop->rc >= 0)
+		vfop->rc = 0;
+	DP(BNX2X_MSG_IOV, "rc is now %d\n", vfop->rc);
+
+	/* unlink the current op context and propagate error code
+	 * must be done before invoking the 'done()' handler
+	 */
+	WARN(!mutex_is_locked(&vf->op_mutex), "about to access vf op linked list but mutex was not locked!");
+	list_del(&vfop->link);
+
+	if (list_empty(&vf->op_list_head)) {
+		DP(BNX2X_MSG_IOV, "list was empty %d\n", vfop->rc);
+		vf->op_rc = vfop->rc;
+		DP(BNX2X_MSG_IOV, "copying rc vf->op_rc %d,  vfop->rc %d\n",
+		   vf->op_rc, vfop->rc);
+	} else {
+		struct bnx2x_vfop *cur_vfop;
+		DP(BNX2X_MSG_IOV, "list not empty %d\n", vfop->rc);
+		cur_vfop = bnx2x_vfop_cur(bp, vf);
+		cur_vfop->rc = vfop->rc;
+		DP(BNX2X_MSG_IOV, "copying rc vf->op_rc %d, vfop->rc %d\n",
+		   vf->op_rc, vfop->rc);
+	}
+
+	/* invoke done handler */
+	if (vfop->done) {
+		DP(BNX2X_MSG_IOV, "calling done handler\n");
+		vfop->done(bp, vf);
+	}
+
+	DP(BNX2X_MSG_IOV, "done handler complete. vf->op_rc %d, vfop->rc %d\n",
+	   vf->op_rc, vfop->rc);
+
+	/* if this is the last nested op reset the wait_blocking flag
+	 * to release any blocking wrappers, only after 'done()' is invoked
+	 */
+	if (list_empty(&vf->op_list_head)) {
+		DP(BNX2X_MSG_IOV, "list was empty after done %d\n", vfop->rc);
+		vf->op_wait_blocking = false;
+	}
+
+	kfree(vfop);
+}
+
+static inline int bnx2x_vfop_wait_blocking(struct bnx2x *bp,
+					   struct bnx2x_virtf *vf)
+{
+	/* can take a while if any port is running */
+	int cnt = 5000;
+
+	if (CHIP_REV_IS_EMUL(bp))
+		cnt *= 20;
+
+	might_sleep();
+	while (cnt--) {
+		if (vf->op_wait_blocking == false) {
+#ifdef BNX2X_STOP_ON_ERROR
+			DP(BNX2X_MSG_IOV, "exit  (cnt %d)\n", 5000 - cnt);
+#endif
+			return 0;
+		}
+		usleep_range(1000, 2000);
+
+		if (bp->panic)
+			return -EIO;
+	}
+
+	/* timeout! */
+#ifdef BNX2X_STOP_ON_ERROR
+	bnx2x_panic();
+#endif
+
+	return -EBUSY;
+}
+
+static inline int bnx2x_vfop_transition(struct bnx2x *bp,
+					struct bnx2x_virtf *vf,
+					vfop_handler_t transition,
+					bool block)
+{
+	if (block)
+		vf->op_wait_blocking = true;
+	transition(bp, vf);
+	if (block)
+		return bnx2x_vfop_wait_blocking(bp, vf);
+	return 0;
+}
+
+/* VFOP queue construction helpers */
+void bnx2x_vfop_qctor_dump_tx(struct bnx2x *bp, struct bnx2x_virtf *vf,
+			    struct bnx2x_queue_init_params *init_params,
+			    struct bnx2x_queue_setup_params *setup_params,
+			    u16 q_idx, u16 sb_idx);
+
+void bnx2x_vfop_qctor_dump_rx(struct bnx2x *bp, struct bnx2x_virtf *vf,
+			    struct bnx2x_queue_init_params *init_params,
+			    struct bnx2x_queue_setup_params *setup_params,
+			    u16 q_idx, u16 sb_idx);
+
+void bnx2x_vfop_qctor_prep(struct bnx2x *bp,
+			   struct bnx2x_virtf *vf,
+			   struct bnx2x_vf_queue *q,
+			   struct bnx2x_vfop_qctor_params *p,
+			   unsigned long q_type);
+int bnx2x_vfop_qsetup_cmd(struct bnx2x *bp,
+			  struct bnx2x_virtf *vf,
+			  struct bnx2x_vfop_cmd *cmd,
+			  int qid);
+
 int bnx2x_vf_idx_by_abs_fid(struct bnx2x *bp, u16 abs_vfid);
 u8 bnx2x_vf_max_queue_cnt(struct bnx2x *bp, struct bnx2x_virtf *vf);
 /* VF FLR helpers */
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
index 1dfe734..c198d16 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
@@ -377,6 +377,148 @@ static void bnx2x_vf_mbx_init_vf(struct bnx2x *bp, struct bnx2x_virtf *vf,
 	bnx2x_vf_mbx_resp(bp, vf);
 }
 
+/* convert MBX queue-flags to standard SP queue-flags */
+static void bnx2x_vf_mbx_set_q_flags(u32 mbx_q_flags,
+				     unsigned long *sp_q_flags)
+{
+	if (mbx_q_flags & VFPF_QUEUE_FLG_TPA)
+		__set_bit(BNX2X_Q_FLG_TPA, sp_q_flags);
+	if (mbx_q_flags & VFPF_QUEUE_FLG_TPA_IPV6)
+		__set_bit(BNX2X_Q_FLG_TPA_IPV6, sp_q_flags);
+	if (mbx_q_flags & VFPF_QUEUE_FLG_TPA_GRO)
+		__set_bit(BNX2X_Q_FLG_TPA_GRO, sp_q_flags);
+	if (mbx_q_flags & VFPF_QUEUE_FLG_STATS)
+		__set_bit(BNX2X_Q_FLG_STATS, sp_q_flags);
+	if (mbx_q_flags & VFPF_QUEUE_FLG_OV)
+		__set_bit(BNX2X_Q_FLG_OV, sp_q_flags);
+	if (mbx_q_flags & VFPF_QUEUE_FLG_VLAN)
+		__set_bit(BNX2X_Q_FLG_VLAN, sp_q_flags);
+	if (mbx_q_flags & VFPF_QUEUE_FLG_COS)
+		__set_bit(BNX2X_Q_FLG_COS, sp_q_flags);
+	if (mbx_q_flags & VFPF_QUEUE_FLG_HC)
+		__set_bit(BNX2X_Q_FLG_HC, sp_q_flags);
+	if (mbx_q_flags & VFPF_QUEUE_FLG_DHC)
+		__set_bit(BNX2X_Q_FLG_DHC, sp_q_flags);
+}
+
+static void bnx2x_vf_mbx_setup_q(struct bnx2x *bp, struct bnx2x_virtf *vf,
+				 struct bnx2x_vf_mbx *mbx)
+{
+	struct vfpf_setup_q_tlv *setup_q = &mbx->msg->req.setup_q;
+	struct bnx2x_vfop_cmd cmd = {
+		.done = bnx2x_vf_mbx_resp,
+		.block = false,
+	};
+
+	/* verify vf_qid */
+	if (setup_q->vf_qid >= vf_rxq_count(vf)) {
+		BNX2X_ERR("vf_qid %d invalid, max queue count is %d\n",
+			  setup_q->vf_qid, vf_rxq_count(vf));
+		vf->op_rc = -EINVAL;
+		goto response;
+	}
+
+	/* tx queues must be setup alongside rx queues thus if the rx queue
+	 * is not marked as valid there's nothing to do.
+	 */
+	if (setup_q->param_valid & (VFPF_RXQ_VALID|VFPF_TXQ_VALID)) {
+		struct bnx2x_vf_queue *q = vfq_get(vf, setup_q->vf_qid);
+		unsigned long q_type = 0;
+
+		struct bnx2x_queue_init_params *init_p;
+		struct bnx2x_queue_setup_params *setup_p;
+
+		/* reinit the VF operation context */
+		memset(&vf->op_params.qctor, 0 , sizeof(vf->op_params.qctor));
+		setup_p = &vf->op_params.qctor.prep_qsetup;
+		init_p =  &vf->op_params.qctor.qstate.params.init;
+
+		/* activate immediateely */
+		__set_bit(BNX2X_Q_FLG_ACTIVE, &setup_p->flags);
+
+		if (setup_q->param_valid & VFPF_TXQ_VALID) {
+			struct bnx2x_txq_setup_params *txq_params =
+				&setup_p->txq_params;
+
+			__set_bit(BNX2X_Q_TYPE_HAS_TX, &q_type);
+
+			/* save sb resource index */
+			q->sb_idx = setup_q->txq.vf_sb;
+
+			/* tx init */
+			init_p->tx.hc_rate = setup_q->txq.hc_rate;
+			init_p->tx.sb_cq_index = setup_q->txq.sb_index;
+
+			bnx2x_vf_mbx_set_q_flags(setup_q->txq.flags,
+						 &init_p->tx.flags);
+
+			/* tx setup - flags */
+			bnx2x_vf_mbx_set_q_flags(setup_q->txq.flags,
+						 &setup_p->flags);
+
+			/* tx setup - general, nothing */
+
+			/* tx setup - tx */
+			txq_params->dscr_map = setup_q->txq.txq_addr;
+			txq_params->sb_cq_index = setup_q->txq.sb_index;
+			txq_params->traffic_type = setup_q->txq.traffic_type;
+
+			bnx2x_vfop_qctor_dump_tx(bp, vf, init_p, setup_p,
+						 q->index, q->sb_idx);
+		}
+
+		if (setup_q->param_valid & VFPF_RXQ_VALID) {
+			struct bnx2x_rxq_setup_params *rxq_params =
+							&setup_p->rxq_params;
+
+			__set_bit(BNX2X_Q_TYPE_HAS_RX, &q_type);
+
+			/* Note: there is no support for different SBs
+			 * for TX and RX
+			 */
+			q->sb_idx = setup_q->rxq.vf_sb;
+
+			/* rx init */
+			init_p->rx.hc_rate = setup_q->rxq.hc_rate;
+			init_p->rx.sb_cq_index = setup_q->rxq.sb_index;
+			bnx2x_vf_mbx_set_q_flags(setup_q->rxq.flags,
+						 &init_p->rx.flags);
+
+			/* rx setup - flags */
+			bnx2x_vf_mbx_set_q_flags(setup_q->rxq.flags,
+						 &setup_p->flags);
+
+			/* rx setup - general */
+			setup_p->gen_params.mtu = setup_q->rxq.mtu;
+
+			/* rx setup - rx */
+			rxq_params->drop_flags = setup_q->rxq.drop_flags;
+			rxq_params->dscr_map = setup_q->rxq.rxq_addr;
+			rxq_params->sge_map = setup_q->rxq.sge_addr;
+			rxq_params->rcq_map = setup_q->rxq.rcq_addr;
+			rxq_params->rcq_np_map = setup_q->rxq.rcq_np_addr;
+			rxq_params->buf_sz = setup_q->rxq.buf_sz;
+			rxq_params->tpa_agg_sz = setup_q->rxq.tpa_agg_sz;
+			rxq_params->max_sges_pkt = setup_q->rxq.max_sge_pkt;
+			rxq_params->sge_buf_sz = setup_q->rxq.sge_buf_sz;
+			rxq_params->cache_line_log =
+				setup_q->rxq.cache_line_log;
+			rxq_params->sb_cq_index = setup_q->rxq.sb_index;
+
+			bnx2x_vfop_qctor_dump_rx(bp, vf, init_p, setup_p,
+						 q->index, q->sb_idx);
+		}
+		/* complete the preparations */
+		bnx2x_vfop_qctor_prep(bp, vf, q, &vf->op_params.qctor, q_type);
+
+		vf->op_rc = bnx2x_vfop_qsetup_cmd(bp, vf, &cmd, q->index);
+		if (vf->op_rc)
+			goto response;
+		return;
+	}
+response:
+	bnx2x_vf_mbx_resp(bp, vf);
+}
 /* dispatch request */
 static void bnx2x_vf_mbx_request(struct bnx2x *bp, struct bnx2x_virtf *vf,
 				  struct bnx2x_vf_mbx *mbx)
@@ -399,6 +541,9 @@ static void bnx2x_vf_mbx_request(struct bnx2x *bp, struct bnx2x_virtf *vf,
 		case CHANNEL_TLV_INIT:
 			bnx2x_vf_mbx_init_vf(bp, vf, mbx);
 			break;
+		case CHANNEL_TLV_SETUP_Q:
+			bnx2x_vf_mbx_setup_q(bp, vf, mbx);
+			break;
 		}
 
 	/* unknown TLV - this may belong to a VF driver from the future - a
-- 
1.7.9.GIT

^ permalink raw reply related

* [PATCH v2 net-next 14/22] bnx2x: Support statistics collection for VFs by the PF
From: Ariel Elior @ 2012-11-15 16:47 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Ariel Elior, Eilon Greenstein
In-Reply-To: <1352998067-9707-1-git-send-email-ariele@broadcom.com>

Statistics are collected by the PF driver. The collection is
performed via a query sent to the device which is basically an array
of 3-tuples of the form (statistics client, function, DMAE address).
In this patch the PF driver adds to the query, on top of the
statistics clients it is maintaining for itself (rss queues, storage,
etc), the 3-tuples for the VFs it is maintaining. The addresses used
are the GPAs of the statistics buffers supplied by the VF in the
init message on the VF <-> PF channel. The function parameter
ensures that the iommu will translate the GPA to the correct physical
address.

Signed-off-by: Ariel Elior <ariele@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c  |   78 +--------------
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c    |   21 ++++
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h    |   11 ++
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c |   94 ++++++++++++++++++
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h |    2 +
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c |  108 +++++++++++++++++----
 6 files changed, 222 insertions(+), 92 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 7c2fc15..1f82d8b 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -5233,7 +5233,9 @@ static void bnx2x_timer(unsigned long data)
 	if (!netif_running(bp->dev))
 		return;
 
-	if (!BP_NOMCP(bp)) {
+
+	if (IS_PF(bp) &&
+	    !BP_NOMCP(bp)) {
 		int mb_idx = BP_FW_MB_IDX(bp);
 		u32 drv_pulse;
 		u32 mcp_pulse;
@@ -7654,66 +7656,6 @@ void bnx2x_free_mem(struct bnx2x *bp)
 		       BCM_PAGE_SIZE * NUM_EQ_PAGES);
 }
 
-static int bnx2x_alloc_fw_stats_mem(struct bnx2x *bp)
-{
-	int num_groups;
-	int is_fcoe_stats = NO_FCOE(bp) ? 0 : 1;
-
-	/* number of queues for statistics is number of eth queues + FCoE */
-	u8 num_queue_stats = BNX2X_NUM_ETH_QUEUES(bp) + is_fcoe_stats;
-
-	/* Total number of FW statistics requests =
-	 * 1 for port stats + 1 for PF stats + potential 1 for FCoE stats +
-	 * num of queues
-	 */
-	bp->fw_stats_num = 2 + is_fcoe_stats + num_queue_stats;
-
-
-	/* Request is built from stats_query_header and an array of
-	 * stats_query_cmd_group each of which contains
-	 * STATS_QUERY_CMD_COUNT rules. The real number or requests is
-	 * configured in the stats_query_header.
-	 */
-	num_groups = ((bp->fw_stats_num) / STATS_QUERY_CMD_COUNT) +
-		     (((bp->fw_stats_num) % STATS_QUERY_CMD_COUNT) ? 1 : 0);
-
-	bp->fw_stats_req_sz = sizeof(struct stats_query_header) +
-			num_groups * sizeof(struct stats_query_cmd_group);
-
-	/* Data for statistics requests + stats_conter
-	 *
-	 * stats_counter holds per-STORM counters that are incremented
-	 * when STORM has finished with the current request.
-	 *
-	 * memory for FCoE offloaded statistics are counted anyway,
-	 * even if they will not be sent.
-	 */
-	bp->fw_stats_data_sz = sizeof(struct per_port_stats) +
-		sizeof(struct per_pf_stats) +
-		sizeof(struct fcoe_statistics_params) +
-		sizeof(struct per_queue_stats) * num_queue_stats +
-		sizeof(struct stats_counter);
-
-	BNX2X_PCI_ALLOC(bp->fw_stats, &bp->fw_stats_mapping,
-			bp->fw_stats_data_sz + bp->fw_stats_req_sz);
-
-	/* Set shortcuts */
-	bp->fw_stats_req = (struct bnx2x_fw_stats_req *)bp->fw_stats;
-	bp->fw_stats_req_mapping = bp->fw_stats_mapping;
-
-	bp->fw_stats_data = (struct bnx2x_fw_stats_data *)
-		((u8 *)bp->fw_stats + bp->fw_stats_req_sz);
-
-	bp->fw_stats_data_mapping = bp->fw_stats_mapping +
-				   bp->fw_stats_req_sz;
-	return 0;
-
-alloc_mem_err:
-	BNX2X_PCI_FREE(bp->fw_stats, bp->fw_stats_mapping,
-		       bp->fw_stats_data_sz + bp->fw_stats_req_sz);
-	BNX2X_ERR("Can't allocate memory\n");
-	return -ENOMEM;
-}
 
 int bnx2x_alloc_mem_cnic(struct bnx2x *bp)
 {
@@ -7760,10 +7702,6 @@ int bnx2x_alloc_mem(struct bnx2x *bp)
 	BNX2X_PCI_ALLOC(bp->slowpath, &bp->slowpath_mapping,
 			sizeof(struct bnx2x_slowpath));
 
-	/* Allocated memory for FW statistics  */
-	if (bnx2x_alloc_fw_stats_mem(bp))
-		goto alloc_mem_err;
-
 	/* Allocate memory for CDU context:
 	 * This memory is allocated separately and not in the generic ILT
 	 * functions because CDU differs in few aspects:
@@ -7792,6 +7730,9 @@ int bnx2x_alloc_mem(struct bnx2x *bp)
 	if (bnx2x_ilt_mem_op(bp, ILT_MEMOP_ALLOC))
 		goto alloc_mem_err;
 
+	if (bnx2x_iov_alloc_mem(bp))
+		goto alloc_mem_err;
+
 	/* Slow path ring */
 	BNX2X_PCI_ALLOC(bp->spq, &bp->spq_mapping, BCM_PAGE_SIZE);
 
@@ -7799,13 +7740,6 @@ int bnx2x_alloc_mem(struct bnx2x *bp)
 	BNX2X_PCI_ALLOC(bp->eq_ring, &bp->eq_mapping,
 			BCM_PAGE_SIZE * NUM_EQ_PAGES);
 
-
-	/* fastpath */
-	/* need to be done at the end, since it's self adjusting to amount
-	 * of memory available for RSS queues
-	 */
-	if (bnx2x_alloc_fp_mem(bp))
-		goto alloc_mem_err;
 	return 0;
 
 alloc_mem_err:
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
index b8b4b74..1eb92b8 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
@@ -5199,6 +5199,27 @@ void bnx2x_init_queue_obj(struct bnx2x *bp,
 	obj->set_pending = bnx2x_queue_set_pending;
 }
 
+/* return a queue object's logical state*/
+int bnx2x_get_q_logical_state(struct bnx2x *bp,
+			       struct bnx2x_queue_sp_obj *obj)
+{
+	switch (obj->state) {
+	case BNX2X_Q_STATE_ACTIVE:
+	case BNX2X_Q_STATE_MULTI_COS:
+		return BNX2X_Q_LOGICAL_STATE_ACTIVE;
+	case BNX2X_Q_STATE_RESET:
+	case BNX2X_Q_STATE_INITIALIZED:
+	case BNX2X_Q_STATE_MCOS_TERMINATED:
+	case BNX2X_Q_STATE_INACTIVE:
+	case BNX2X_Q_STATE_STOPPED:
+	case BNX2X_Q_STATE_TERMINATED:
+	case BNX2X_Q_STATE_FLRED:
+		return BNX2X_Q_LOGICAL_STATE_STOPPED;
+	default:
+		return -EINVAL;
+	}
+}
+
 /********************** Function state object *********************************/
 enum bnx2x_func_state bnx2x_func_get_state(struct bnx2x *bp,
 					   struct bnx2x_func_sp_obj *o)
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h
index adbd91b..157fc4f 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h
@@ -776,6 +776,13 @@ enum bnx2x_q_state {
 	BNX2X_Q_STATE_MAX,
 };
 
+/* Allowed Queue states */
+enum bnx2x_q_logical_state {
+	BNX2X_Q_LOGICAL_STATE_ACTIVE,
+	BNX2X_Q_LOGICAL_STATE_STOPPED,
+};
+
+
 /* Allowed commands */
 enum bnx2x_queue_cmd {
 	BNX2X_Q_CMD_INIT,
@@ -1261,6 +1268,10 @@ void bnx2x_init_queue_obj(struct bnx2x *bp,
 int bnx2x_queue_state_change(struct bnx2x *bp,
 			     struct bnx2x_queue_state_params *params);
 
+int bnx2x_get_q_logical_state(struct bnx2x *bp,
+			       struct bnx2x_queue_sp_obj *obj);
+
+
 /********************* VLAN-MAC ****************/
 void bnx2x_init_mac_obj(struct bnx2x *bp,
 			struct bnx2x_vlan_mac_obj *mac_obj,
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
index 6141c73..f027332 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
@@ -1059,6 +1059,82 @@ void bnx2x_iov_sp_event(struct bnx2x *bp, int vf_cid, bool queue_work)
 			queue_delayed_work(bnx2x_wq, &bp->sp_task, 0);
 	}
 }
+
+void bnx2x_iov_adjust_stats_req(struct bnx2x *bp)
+{
+	int i;
+	int first_queue_query_index, num_queues_req;
+	dma_addr_t cur_data_offset;
+	struct stats_query_entry *cur_query_entry;
+	u8 stats_count = 0;
+	bool is_fcoe = false;
+
+	if (!IS_SRIOV(bp))
+		return;
+
+	if (!NO_FCOE(bp))
+		is_fcoe = true;
+
+	/* fcoe adds one global request and one queue request */
+	num_queues_req = BNX2X_NUM_ETH_QUEUES(bp) + is_fcoe;
+
+	first_queue_query_index = BNX2X_FIRST_QUEUE_QUERY_IDX -
+		(is_fcoe ? 0 : 1);
+
+	DP(BNX2X_MSG_IOV,
+	   "BNX2X_NUM_ETH_QUEUES %d, is_fcoe %d, first_queue_query_index %d => determined the last non virtual statistics query index is %d. Will add queries on top of that\n",
+	   BNX2X_NUM_ETH_QUEUES(bp), is_fcoe, first_queue_query_index,
+	   first_queue_query_index + num_queues_req);
+
+	cur_data_offset = bp->fw_stats_data_mapping +
+		offsetof(struct bnx2x_fw_stats_data, queue_stats) +
+		num_queues_req * sizeof(struct per_queue_stats);
+
+	cur_query_entry = &bp->fw_stats_req->
+		query[first_queue_query_index + num_queues_req];
+
+	for_each_vf(bp, i) {
+		int j;
+		struct bnx2x_virtf *vf = BP_VF(bp, i);
+		if (vf->state != VF_ENABLED) {
+			DP(BNX2X_MSG_IOV,
+			   "vf %d not enabled so no stats for it\n",
+			   vf->abs_vfid);
+			continue;
+		}
+
+		DP(BNX2X_MSG_IOV, "add addresses for vf %d\n", vf->abs_vfid);
+
+		for_each_vfq(vf, j) {
+			struct bnx2x_vf_queue *rxq = vfq_get(vf, j);
+
+			/* collect stats fro active queues only */
+			if (bnx2x_get_q_logical_state(bp, &rxq->sp_obj) ==
+			    BNX2X_Q_LOGICAL_STATE_STOPPED)
+				continue;
+
+			/* create stats query entry for this queue */
+			cur_query_entry->kind = STATS_TYPE_QUEUE;
+			cur_query_entry->index = vfq_cl_id(vf, rxq);
+			cur_query_entry->funcID =
+				cpu_to_le16(FW_VF_HANDLE(vf->abs_vfid));
+			cur_query_entry->address.hi =
+				cpu_to_le32(U64_HI(vf->fw_stat_map));
+			cur_query_entry->address.lo =
+				cpu_to_le32(U64_LO(vf->fw_stat_map));
+			DP(BNX2X_MSG_IOV,
+			   "added address %x %x for vf %d queue %d client %d\n",
+			   cur_query_entry->address.hi,
+			   cur_query_entry->address.lo, cur_query_entry->funcID,
+			   j, cur_query_entry->index);
+			cur_query_entry++;
+			cur_data_offset += sizeof(struct per_queue_stats);
+			stats_count++;
+		}
+	}
+	bp->fw_stats_req->hdr.cmd_num = bp->fw_stats_num + stats_count;
+}
+
 void bnx2x_iov_sp_task(struct bnx2x *bp)
 {
 	int i;
@@ -1079,6 +1155,24 @@ void bnx2x_iov_sp_task(struct bnx2x *bp)
 		}
 	}
 }
+
+static inline
+struct bnx2x_virtf *__vf_from_stat_id(struct bnx2x *bp, u8 stat_id)
+{
+	int i;
+	struct bnx2x_virtf *vf = NULL;
+
+	for_each_vf(bp, i) {
+		vf = BP_VF(bp, i);
+		if (stat_id >= vf->igu_base_id &&
+		    stat_id < vf->igu_base_id + vf_sb_count(vf))
+			break;
+	}
+	return vf;
+
+}
+
+/* VF API helpers */
 static void bnx2x_vf_qtbl_set_q(struct bnx2x *bp, u8 abs_vfid, u8 qid,
 				u8 enable)
 {
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
index a3a8240..698ff71 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
@@ -425,6 +425,8 @@ void bnx2x_iov_set_queue_sp_obj(struct bnx2x *bp, int vf_cid,
 				struct bnx2x_queue_sp_obj **q_obj);
 void bnx2x_iov_sp_event(struct bnx2x *bp, int vf_cid, bool queue_work);
 int bnx2x_iov_eq_sp_event(struct bnx2x *bp, union event_ring_elem *elem);
+void bnx2x_iov_adjust_stats_req(struct bnx2x *bp);
+void bnx2x_iov_storm_stats_update(struct bnx2x *bp);
 void bnx2x_iov_sp_task(struct bnx2x *bp);
 /* global vf mailbox routines */
 void bnx2x_vf_mbx(struct bnx2x *bp, struct vf_pf_event_data *vfpf_event);
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c
index 348ed02..73cd0eb 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c
@@ -19,7 +19,7 @@
 
 #include "bnx2x_stats.h"
 #include "bnx2x_cmn.h"
-
+#include "bnx2x_sriov.h"
 
 /* Statistics */
 
@@ -79,6 +79,41 @@ static inline u16 bnx2x_get_port_stats_dma_len(struct bnx2x *bp)
  * Init service functions
  */
 
+static void bnx2x_dp_stats(struct bnx2x *bp)
+{
+	int i;
+	DP(BNX2X_MSG_STATS, "dumping stats:\n"
+	   "fw_stats_req\n"
+	   "    hdr\n"
+	   "        cmd_num %d\n"
+	   "        reserved0 %d\n"
+	   "        drv_stats_counter %d\n"
+	   "        reserved1 %d\n"
+	   "        stats_counters_addrs %x %x\n",
+	   bp->fw_stats_req->hdr.cmd_num,
+	   bp->fw_stats_req->hdr.reserved0,
+	   bp->fw_stats_req->hdr.drv_stats_counter,
+	   bp->fw_stats_req->hdr.reserved1,
+	   bp->fw_stats_req->hdr.stats_counters_addrs.hi,
+	   bp->fw_stats_req->hdr.stats_counters_addrs.lo);
+
+	for (i = 0; i < bp->fw_stats_req->hdr.cmd_num; i++) {
+		DP(BNX2X_MSG_STATS,
+		   "query[%d]\n"
+		   "              kind %d\n"
+		   "              index %d\n"
+		   "              funcID %d\n"
+		   "              reserved %d\n"
+		   "              address %x %x\n",
+		   i, bp->fw_stats_req->query[i].kind,
+		   bp->fw_stats_req->query[i].index,
+		   bp->fw_stats_req->query[i].funcID,
+		   bp->fw_stats_req->query[i].reserved,
+		   bp->fw_stats_req->query[i].address.hi,
+		   bp->fw_stats_req->query[i].address.lo);
+	}
+}
+
 /* Post the next statistics ramrod. Protect it with the spin in
  * order to ensure the strict order between statistics ramrods
  * (each ramrod has a sequence number passed in a
@@ -103,7 +138,10 @@ static void bnx2x_storm_stats_post(struct bnx2x *bp)
 		DP(BNX2X_MSG_STATS, "Sending statistics ramrod %d\n",
 			bp->fw_stats_req->hdr.drv_stats_counter);
 
+		/* adjust the ramrod to include VF queues statistics */
+		bnx2x_iov_adjust_stats_req(bp);
 
+		bnx2x_dp_stats(bp);
 
 		/* send FW stats ramrod */
 		rc = bnx2x_sp_post(bp, RAMROD_CMD_ID_COMMON_STAT_QUERY, 0,
@@ -482,6 +520,12 @@ static void bnx2x_func_stats_init(struct bnx2x *bp)
 
 static void bnx2x_stats_start(struct bnx2x *bp)
 {
+	/* vfs travel through here as part of the statistics FSM, but no action
+	 * is required
+	 */
+	if (IS_VF(bp))
+		return;
+
 	if (bp->port.pmf)
 		bnx2x_port_stats_init(bp);
 
@@ -501,6 +545,11 @@ static void bnx2x_stats_pmf_start(struct bnx2x *bp)
 
 static void bnx2x_stats_restart(struct bnx2x *bp)
 {
+	/* vfs travel through here as part of the statistics FSM, but no action
+	 * is required
+	 */
+	if (IS_VF(bp))
+		return;
 	bnx2x_stats_comp(bp);
 	bnx2x_stats_start(bp);
 }
@@ -832,19 +881,10 @@ static int bnx2x_hw_stats_update(struct bnx2x *bp)
 	return 0;
 }
 
-static int bnx2x_storm_stats_update(struct bnx2x *bp)
+static int bnx2x_storm_stats_validate_counters(struct bnx2x *bp)
 {
-	struct tstorm_per_port_stats *tport =
-				&bp->fw_stats_data->port.tstorm_port_statistics;
-	struct tstorm_per_pf_stats *tfunc =
-				&bp->fw_stats_data->pf.tstorm_pf_statistics;
-	struct host_func_stats *fstats = &bp->func_stats;
-	struct bnx2x_eth_stats *estats = &bp->eth_stats;
-	struct bnx2x_eth_stats_old *estats_old = &bp->eth_stats_old;
 	struct stats_counter *counters = &bp->fw_stats_data->storm_counters;
-	int i;
 	u16 cur_stats_counter;
-
 	/* Make sure we use the value of the counter
 	 * used for sending the last stats ramrod.
 	 */
@@ -880,6 +920,23 @@ static int bnx2x_storm_stats_update(struct bnx2x *bp)
 		   le16_to_cpu(counters->tstats_counter), bp->stats_counter);
 		return -EAGAIN;
 	}
+	return 0;
+}
+
+static int bnx2x_storm_stats_update(struct bnx2x *bp)
+{
+	struct tstorm_per_port_stats *tport =
+				&bp->fw_stats_data->port.tstorm_port_statistics;
+	struct tstorm_per_pf_stats *tfunc =
+				&bp->fw_stats_data->pf.tstorm_pf_statistics;
+	struct host_func_stats *fstats = &bp->func_stats;
+	struct bnx2x_eth_stats *estats = &bp->eth_stats;
+	struct bnx2x_eth_stats_old *estats_old = &bp->eth_stats_old;
+	int i;
+
+	/* vfs stat counter is managed by pf */
+	if (IS_PF(bp) && bnx2x_storm_stats_validate_counters(bp))
+		return -EAGAIN;
 
 	estats->error_bytes_received_hi = 0;
 	estats->error_bytes_received_lo = 0;
@@ -1173,23 +1230,34 @@ static void bnx2x_stats_update(struct bnx2x *bp)
 	if (bnx2x_edebug_stats_stopped(bp))
 		return;
 
-	if (*stats_comp != DMAE_COMP_VAL)
-		return;
+	if (IS_PF(bp)) {
+		if (*stats_comp != DMAE_COMP_VAL)
+			return;
 
-	if (bp->port.pmf)
-		bnx2x_hw_stats_update(bp);
+		if (bp->port.pmf)
+			bnx2x_hw_stats_update(bp);
 
-	if (bnx2x_storm_stats_update(bp)) {
-		if (bp->stats_pending++ == 3) {
-			BNX2X_ERR("storm stats were not updated for 3 times\n");
-			bnx2x_panic();
+		if (bnx2x_storm_stats_update(bp)) {
+			if (bp->stats_pending++ == 3) {
+				BNX2X_ERR("storm stats were not updated for 3 times\n");
+				bnx2x_panic();
+			}
+			return;
 		}
-		return;
+	} else {
+		/* vf doesn't collect HW statistics, and doesn't get completions
+		 * perform only update
+		 */
+		bnx2x_storm_stats_update(bp);
 	}
 
 	bnx2x_net_stats_update(bp);
 	bnx2x_drv_stats_update(bp);
 
+	/* vf is done */
+	if (IS_VF(bp))
+		return;
+
 	if (netif_msg_timer(bp)) {
 		struct bnx2x_eth_stats *estats = &bp->eth_stats;
 
-- 
1.7.9.GIT

^ permalink raw reply related

* [PATCH v2 net-next 18/22] bnx2x: Support of PF driver of a VF close request
From: Ariel Elior @ 2012-11-15 16:47 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Ariel Elior, Eilon Greenstein
In-Reply-To: <1352998067-9707-1-git-send-email-ariele@broadcom.com>

The 'close' command is the opposite of an init request. Here the
queues of the VF are closed (if any are opened) and released.
This flow applies the 'q_teardown' flow on all the queues.
The VF state is changed by this request.
Interrupts are disabled for the VF when closed.

Signed-off-by: Ariel Elior <ariele@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c |   96 +++++++++++++++++++++
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h |    4 +
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c  |   27 ++++++
 3 files changed, 127 insertions(+), 0 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
index 146ad65..888e3ec 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
@@ -137,6 +137,11 @@ enum bnx2x_vfop_mcast_state {
 	   BNX2X_VFOP_MCAST_CHK_DONE
 };
 
+enum bnx2x_vfop_close_state {
+	   BNX2X_VFOP_CLOSE_QUEUES,
+	   BNX2X_VFOP_CLOSE_HW
+};
+
 enum bnx2x_vfop_rxmode_state {
 	   BNX2X_VFOP_RXMODE_CONFIG,
 	   BNX2X_VFOP_RXMODE_DONE
@@ -2298,6 +2303,28 @@ static void bnx2x_vf_qtbl_set_q(struct bnx2x *bp, u8 abs_vfid, u8 qid,
 	REG_WR(bp, reg, val);
 }
 
+static void bnx2x_vf_clr_qtbl(struct bnx2x *bp, struct bnx2x_virtf *vf)
+{
+	int i;
+
+	for_each_vfq(vf, i)
+		bnx2x_vf_qtbl_set_q(bp, vf->abs_vfid,
+				    vfq_qzone_id(vf, vfq_get(vf, i)), false);
+}
+
+static void bnx2x_vf_igu_disable(struct bnx2x *bp, struct bnx2x_virtf *vf)
+{
+	u32 val;
+
+	/* clear the VF configuration - pretend */
+	bnx2x_pretend_func(bp, HW_VF_HANDLE(bp, vf->abs_vfid));
+	val = REG_RD(bp, IGU_REG_VF_CONFIGURATION);
+	val &= ~(IGU_VF_CONF_MSI_MSIX_EN | IGU_VF_CONF_SINGLE_ISR_EN |
+		 IGU_VF_CONF_FUNC_EN | IGU_VF_CONF_PARENT_MASK);
+	REG_WR(bp, IGU_REG_VF_CONFIGURATION, val);
+	bnx2x_pretend_func(bp, BP_ABS_FUNC(bp));
+}
+
 u8 bnx2x_vf_max_queue_cnt(struct bnx2x *bp, struct bnx2x_virtf *vf)
 {
 	return min_t(u8, min_t(u8, vf_sb_count(vf), BNX2X_CIDS_PER_VF),
@@ -2469,6 +2496,75 @@ int bnx2x_vf_init(struct bnx2x *bp, struct bnx2x_virtf *vf, dma_addr_t *sb_map)
 	return 0;
 }
 
+/* VFOP close (teardown the queues, delete mcasts and close HW) */
+static void bnx2x_vfop_close(struct bnx2x *bp, struct bnx2x_virtf *vf)
+{
+	struct bnx2x_vfop *vfop = bnx2x_vfop_cur(bp, vf);
+	struct bnx2x_vfop_args_qx *qx = &vfop->args.qx;
+	enum bnx2x_vfop_close_state state = vfop->state;
+	struct bnx2x_vfop_cmd cmd = {
+		.done = bnx2x_vfop_close,
+		.block = false,
+	};
+
+	if (vfop->rc < 0)
+		goto op_err;
+
+	DP(BNX2X_MSG_IOV, "vf[%d] STATE: %d\n", vf->abs_vfid, state);
+
+	switch (state) {
+	case BNX2X_VFOP_CLOSE_QUEUES:
+
+		if (++(qx->qid) < vf_rxq_count(vf)) {
+			vfop->rc = bnx2x_vfop_qdown_cmd(bp, vf, &cmd, qx->qid);
+			if (vfop->rc)
+				goto op_err;
+			return;
+		}
+
+		/* remove multicasts */
+		vfop->state = BNX2X_VFOP_CLOSE_HW;
+		vfop->rc = bnx2x_vfop_mcast_cmd(bp, vf, &cmd, NULL, 0, false);
+		if (vfop->rc)
+			goto op_err;
+		return;
+
+	case BNX2X_VFOP_CLOSE_HW:
+
+		/* disable the interrupts */
+		DP(BNX2X_MSG_IOV, "disabling igu\n");
+		bnx2x_vf_igu_disable(bp, vf);
+
+		/* disable the VF */
+		DP(BNX2X_MSG_IOV, "clearing qtbl\n");
+		bnx2x_vf_clr_qtbl(bp, vf);
+
+		goto op_done;
+	default:
+		bnx2x_vfop_default(state);
+	}
+op_err:
+	BNX2X_ERR("VF[%d] CLOSE error: rc %d\n", vf->abs_vfid, vfop->rc);
+op_done:
+	vf->state = VF_ACQUIRED;
+	DP(BNX2X_MSG_IOV, "set state to acquired\n");
+	bnx2x_vfop_end(bp, vf, vfop);
+}
+
+int bnx2x_vfop_close_cmd(struct bnx2x *bp,
+			 struct bnx2x_virtf *vf,
+			 struct bnx2x_vfop_cmd *cmd)
+{
+	struct bnx2x_vfop *vfop = bnx2x_vfop_add(bp, vf);
+	if (vfop) {
+		vfop->args.qx.qid = -1; /* loop */
+		bnx2x_vfop_opset(BNX2X_VFOP_CLOSE_QUEUES,
+				 bnx2x_vfop_close, cmd->done);
+		return bnx2x_vfop_transition(bp, vf, bnx2x_vfop_close,
+					     cmd->block);
+	}
+	return -ENOMEM;
+}
 void bnx2x_lock_vf_pf_channel(struct bnx2x *bp, struct bnx2x_virtf *vf,
 			      enum channel_tlvs tlv)
 {
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
index 4a79741..9fadabc 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
@@ -661,6 +661,10 @@ int bnx2x_vfop_rxmode_cmd(struct bnx2x *bp,
 			  struct bnx2x_vfop_cmd *cmd,
 			  int qid, unsigned long accept_flags);
 
+int bnx2x_vfop_close_cmd(struct bnx2x *bp,
+			 struct bnx2x_virtf *vf,
+			 struct bnx2x_vfop_cmd *cmd);
+
 int bnx2x_vf_idx_by_abs_fid(struct bnx2x *bp, u16 abs_vfid);
 u8 bnx2x_vf_max_queue_cnt(struct bnx2x *bp, struct bnx2x_virtf *vf);
 /* VF FLR helpers */
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
index 832aa31..96a1de3 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
@@ -811,6 +811,30 @@ static void bnx2x_vf_mbx_teardown_q(struct bnx2x *bp, struct bnx2x_virtf *vf,
 		bnx2x_vf_mbx_resp(bp, vf);
 }
 
+/* Done handler for 'close' operation: send response and reopen the
+ * channel.
+ */
+static void bnx2x_vf_mbx_close_done(struct bnx2x *bp, struct bnx2x_virtf *vf)
+{
+	bnx2x_vf_mbx_resp(bp, vf);
+	bnx2x_vf_enable_mbx(bp, vf->abs_vfid);
+}
+
+static void bnx2x_vf_mbx_close_vf(struct bnx2x *bp, struct bnx2x_virtf *vf,
+				  struct bnx2x_vf_mbx *mbx)
+{
+	struct bnx2x_vfop_cmd cmd = {
+		.done = bnx2x_vf_mbx_close_done,
+		.block = false,
+	};
+
+	DP(BNX2X_MSG_IOV, "VF[%d] VF_CLOSE\n", vf->abs_vfid);
+
+	vf->op_rc = bnx2x_vfop_close_cmd(bp, vf, &cmd);
+	if (vf->op_rc)
+		bnx2x_vf_mbx_close_done(bp, vf);
+}
+
 /* dispatch request */
 static void bnx2x_vf_mbx_request(struct bnx2x *bp, struct bnx2x_virtf *vf,
 				  struct bnx2x_vf_mbx *mbx)
@@ -842,6 +866,9 @@ static void bnx2x_vf_mbx_request(struct bnx2x *bp, struct bnx2x_virtf *vf,
 		case CHANNEL_TLV_TEARDOWN_Q:
 			bnx2x_vf_mbx_teardown_q(bp, vf, mbx);
 			break;
+		case CHANNEL_TLV_CLOSE:
+			bnx2x_vf_mbx_close_vf(bp, vf, mbx);
+			break;
 		}
 
 	/* unknown TLV - this may belong to a VF driver from the future - a
-- 
1.7.9.GIT

^ permalink raw reply related

* [PATCH v2 net-next 21/22] bnx2x: Support PF <-> VF Bulletin Board
From: Ariel Elior @ 2012-11-15 16:47 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Ariel Elior, Eilon Greenstein
In-Reply-To: <1352998067-9707-1-git-send-email-ariele@broadcom.com>

The PF <-> VF Bulletin Board is a simple interface between the
PF and the VF. The main reason for the Bulletin Board is to allow
the PF to be the initiator. The VF publishes at 'acquire' stage
the GPA of a Bulletin Board structure it has allocated. The PF notes
this GPA in the VF database. The VF samples the Bulletin Board
periodically for new messages. The latest version of the BB is always
used.

Signed-off-by: Ariel Elior <ariele@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x.h       |    6 ++
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c   |   89 +++++++++++++++++++
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h   |    2 +
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c  |   95 ++++++++++++++++++++-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c |   13 +++
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h |   17 ++++
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c  |   67 +++++++++++++++
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.h  |   38 ++++++++
 8 files changed, 326 insertions(+), 1 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
index dfae9b0..91beeaf 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
@@ -1261,6 +1261,12 @@ struct bnx2x {
 	/* we set aside a copy of the acquire response */
 	struct pfvf_acquire_resp_tlv acquire_resp;
 
+	/* bulletin board for messages from pf to vf*/
+	union pf_vf_bulletin   *pf2vf_bulletin;
+	dma_addr_t		pf2vf_bulletin_mapping;
+
+	struct pf_vf_bulletin_content	old_bulletin;
+
 	struct net_device	*dev;
 	struct pci_dev		*pdev;
 
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index df65695..c73ba72 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -3791,6 +3791,95 @@ int bnx2x_setup_tc(struct net_device *dev, u8 num_tc)
 	return 0;
 }
 
+/* New mac for VF. Consider these cases:
+ * 1. VF hasn't been acquired yet - save the mac in local bulletin board and
+ *    supply at acquire.
+ * 2. VF has already been acquired but has not yet initialized - store in local
+ *    bulletin board. mac will be posted on VF bulletin board after VF init. VF
+ *    will configure this mac when it is ready.
+ * 3. VF has already initialized but has not yet setup a queue - post the new
+ *    mac on VF's bulletin board right now. VF will configure this mac when it
+ *    is ready.
+ * 4. VF has already set a queue - delete any macs already configured for this
+ *    queue and manually config the new mac.
+ * In any event, once this function has been called refuse any attempts by the
+ * VF to configure any mac for itself except for this mac. In case of a race
+ * where the VF fails to see the new post on its bulletin board before sending a
+ * mac configuration request, the PF will simply fail the request and VF can try
+ * again after consulting its bulletin board
+ */
+int bnx2x_set_vf_mac(struct net_device *dev, int queue, u8 *mac)
+{
+
+	struct bnx2x *bp = netdev_priv(dev);
+	int rc, q_logical_state, vfidx = queue;
+	struct bnx2x_virtf *vf = BP_VF(bp, vfidx);
+	struct pf_vf_bulletin_content *bulletin = BP_VF_BULLETIN(bp, vfidx);
+
+	/* if SRIOV is disabled there is nothing to do (and somewhere, someone
+	 * has erred).
+	 */
+	if (!IS_SRIOV(bp)) {
+		BNX2X_ERR("bnx2x_set_vf_mac called though sriov is disabled\n");
+		return -EINVAL;
+	}
+
+	if (!is_valid_ether_addr(mac)) {
+		BNX2X_ERR("mac address invalid\n");
+		return -EINVAL;
+	}
+
+	/* update PF's copy of the VF's bulletin. will no longer accept mac
+	 * configuration requests from vf unless match this mac
+	 */
+	bulletin->valid_bitmap |= 1 << MAC_ADDR_VALID;
+	memcpy(bulletin->mac, mac, ETH_ALEN);
+
+	/* Post update on VF's bulletin board */
+	rc = bnx2x_post_vf_bulletin(bp, vfidx);
+	if (rc) {
+		BNX2X_ERR("failed to update VF[%d] bulletin", vfidx);
+		return rc;
+	}
+
+	/* is vf initialized and queue set up? */
+	q_logical_state =
+		bnx2x_get_q_logical_state(bp, &bnx2x_vfq(vf, 0, sp_obj));
+	if (vf->state == VF_ENABLED &&
+	    q_logical_state == BNX2X_Q_LOGICAL_STATE_ACTIVE) {
+
+		/* configure the mac in device on this vf's queue */
+		unsigned long flags = 0;
+		struct bnx2x_vlan_mac_obj *mac_obj = &bnx2x_vfq(vf, 0, mac_obj);
+
+		/* must lock vfpf channel to protect against vf flows */
+		bnx2x_lock_vf_pf_channel(bp, vf, CHANNEL_TLV_PF_SET_MAC);
+
+		/* remove existing eth macs */
+		rc = bnx2x_del_all_macs(bp, mac_obj, BNX2X_ETH_MAC, true);
+		if (rc) {
+			BNX2X_ERR("failed to delete eth macs\n");
+			return -EINVAL;
+		}
+
+		/* remove existing uc list macs */
+		rc = bnx2x_del_all_macs(bp, mac_obj, BNX2X_UC_LIST_MAC, true);
+		if (rc) {
+			BNX2X_ERR("failed to delete uc_list macs\n");
+			return -EINVAL;
+		}
+
+		/* configure the new mac to device */
+		__set_bit(RAMROD_COMP_WAIT, &flags);
+		bnx2x_set_mac_one(bp, (u8 *)&bulletin->mac, mac_obj, true,
+				  BNX2X_ETH_MAC, &flags);
+
+		bnx2x_unlock_vf_pf_channel(bp, vf, CHANNEL_TLV_PF_SET_MAC);
+	}
+
+	return rc;
+}
+
 /* called with rtnl_lock */
 int bnx2x_change_mac_addr(struct net_device *dev, void *p)
 {
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
index 2dfde45..b80c456 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
@@ -496,6 +496,8 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev);
 /* setup_tc callback */
 int bnx2x_setup_tc(struct net_device *dev, u8 num_tc);
 
+int bnx2x_set_vf_mac(struct net_device *dev, int queue, u8 *mac);
+
 /* select_queue callback */
 u16 bnx2x_select_queue(struct net_device *dev, struct sk_buff *skb);
 
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 42e572e..5c077fc 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -5230,6 +5230,62 @@ void bnx2x_drv_pulse(struct bnx2x *bp)
 		 bp->fw_drv_pulse_wr_seq);
 }
 
+/* crc is the first field in the bulletin board. compute the crc over the
+ * entire bulletin board excluding the crc field itself */
+u32 bnx2x_crc_vf_bulletin(struct bnx2x *bp,
+			  struct pf_vf_bulletin_content *bulletin)
+{
+	return crc32(BULLETIN_CRC_SEED,
+		 ((u8 *)bulletin) + sizeof(bulletin->crc),
+		 BULLETIN_CONTENT_SIZE - sizeof(bulletin->crc));
+}
+
+/* Check for new posts on the bulletin board */
+enum sample_bulletin_result bnx2x_sample_bulletin(struct bnx2x *bp)
+{
+	struct pf_vf_bulletin_content bulletin = bp->pf2vf_bulletin->content;
+	int attempts;
+
+	/* bulletin board hasn't changed since last sample */
+	if (bp->old_bulletin.version == bulletin.version)
+		return PFVF_BULLETIN_UNCHANGED;
+
+	/* validate crc of new bulletin board */
+	if (bp->old_bulletin.version != bp->pf2vf_bulletin->content.version) {
+
+		/* sampling structure in mid post may result with corrupted data
+		 * validate crc to ensure coherency.
+		 */
+		for (attempts = 0; attempts < BULLETIN_ATTEMPTS; attempts++) {
+			bulletin = bp->pf2vf_bulletin->content;
+			if (bulletin.crc == bnx2x_crc_vf_bulletin(bp,
+								  &bulletin))
+				break;
+
+			BNX2X_ERR("bad crc on bulletin board. contained %x computed %x\n",
+				  bulletin.crc,
+				  bnx2x_crc_vf_bulletin(bp, &bulletin));
+		}
+		if (attempts >= BULLETIN_ATTEMPTS) {
+			BNX2X_ERR("pf to vf bulletin board crc was wrong %d consecutive times. Aborting\n",
+				  attempts);
+			return PFVF_BULLETIN_CRC_ERR;
+		}
+	}
+
+	/* the mac address in bulletin board is valid and is new */
+	if (bulletin.valid_bitmap & 1 << MAC_ADDR_VALID &&
+	    memcmp(bulletin.mac, bp->old_bulletin.mac, ETH_ALEN)) {
+
+		/* update new mac to net device */
+		memcpy(bp->dev->dev_addr, bulletin.mac, ETH_ALEN);
+	}
+
+	/* copy new bulletin board to bp */
+	bp->old_bulletin = bulletin;
+
+	return PFVF_BULLETIN_UPDATED;
+}
 
 static void bnx2x_timer(unsigned long data)
 {
@@ -5267,6 +5323,10 @@ static void bnx2x_timer(unsigned long data)
 	if (bp->state == BNX2X_STATE_OPEN)
 		bnx2x_stats_handle(bp, STATS_EVENT_UPDATE);
 
+	/* sample pf vf bulletin board for new posts from pf */
+	if (IS_VF(bp))
+		bnx2x_sample_bulletin(bp);
+
 	mod_timer(&bp->timer, jiffies + bp->current_interval);
 }
 
@@ -11581,7 +11641,7 @@ static const struct net_device_ops bnx2x_netdev_ops = {
 	.ndo_poll_controller	= poll_bnx2x,
 #endif
 	.ndo_setup_tc		= bnx2x_setup_tc,
-
+	.ndo_set_vf_mac		= bnx2x_set_vf_mac,
 #ifdef NETDEV_FCOE_WWNN
 	.ndo_fcoe_get_wwn	= bnx2x_fcoe_get_wwn,
 #endif
@@ -12243,6 +12303,11 @@ static int __devinit bnx2x_init_one(struct pci_dev *pdev,
 		/* allocate vf2pf mailbox for vf to pf channel */
 		BNX2X_PCI_ALLOC(bp->vf2pf_mbox, &bp->vf2pf_mbox_mapping,
 				sizeof(struct bnx2x_vf_mbx_msg));
+
+		/* allocate pf 2 vf bulletin board */
+		BNX2X_PCI_ALLOC(bp->pf2vf_bulletin, &bp->pf2vf_bulletin_mapping,
+				sizeof(union pf_vf_bulletin));
+
 	} else {
 		doorbell_size = BNX2X_L2_MAX_CID(bp) * (1 << BNX2X_DB_SHIFT);
 		if (doorbell_size > pci_resource_len(pdev, 2)) {
@@ -13308,6 +13373,9 @@ int __devinit bnx2x_vfpf_acquire(struct bnx2x *bp, u8 tx_count,
 	req->resc_request.num_mac_filters = VF_ACQUIRE_MAC_FILTERS;
 	req->resc_request.num_mc_filters = VF_ACQUIRE_MC_FILTERS;
 
+	/* pf 2 vf bulletin board address */
+	req->bulletin_addr = bp->pf2vf_bulletin_mapping;
+
 	/* add list termination tlv */
 	bnx2x_add_tlv(bp, req, req->first_tlv.tl.length, CHANNEL_TLV_LIST_END,
 		      sizeof(struct channel_list_end_tlv));
@@ -13664,6 +13732,9 @@ int bnx2x_vfpf_set_mac(struct bnx2x *bp)
 	req->filters[0].flags =
 		VFPF_Q_FILTER_DEST_MAC_VALID | VFPF_Q_FILTER_SET_MAC;
 
+	/* sample bulletin board for new mac */
+	bnx2x_sample_bulletin(bp);
+
 	/* copy mac from device to request */
 	memcpy(req->filters[0].mac, bp->dev->dev_addr, ETH_ALEN);
 
@@ -13681,6 +13752,28 @@ int bnx2x_vfpf_set_mac(struct bnx2x *bp)
 		return rc;
 	}
 
+	/* failure may mean PF was configured with a new mac for us */
+	while (resp->hdr.status == PFVF_STATUS_FAILURE) {
+
+		DP(BNX2X_MSG_IOV,
+		   "vfpf SET MAC failed. Check bulletin board for new posts\n");
+
+		/* check if bulletin board was updated */
+		if (bnx2x_sample_bulletin(bp) == PFVF_BULLETIN_UPDATED) {
+
+			/* copy mac from device to request */
+			memcpy(req->filters[0].mac, bp->dev->dev_addr,
+			       ETH_ALEN);
+
+			/* send message to pf */
+			rc = bnx2x_send_msg2pf(bp, &resp->hdr.status,
+					       bp->vf2pf_mbox_mapping);
+		} else {
+			/* no new info in bulletin */
+			break;
+		}
+	}
+
 	if (resp->hdr.status != PFVF_STATUS_SUCCESS) {
 		BNX2X_ERR("vfpf SET MAC failed: %d\n", resp->hdr.status);
 		return -EINVAL;
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
index 104978a..41a56eb 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
@@ -2053,6 +2053,10 @@ void bnx2x_iov_free_mem(struct bnx2x *bp)
 	BNX2X_PCI_FREE(BP_VF_MBX_DMA(bp)->addr,
 		       BP_VF_MBX_DMA(bp)->mapping,
 		       BP_VF_MBX_DMA(bp)->size);
+
+	BNX2X_PCI_FREE(BP_VF_BULLETIN_DMA(bp)->addr,
+		       BP_VF_BULLETIN_DMA(bp)->mapping,
+		       BP_VF_BULLETIN_DMA(bp)->size);
 }
 
 
@@ -2093,6 +2097,12 @@ int bnx2x_iov_alloc_mem(struct bnx2x *bp)
 			tot_size);
 	BP_VF_MBX_DMA(bp)->size = tot_size;
 
+	/* allocate local bulletin boards */
+	tot_size = BNX2X_NR_VIRTFN(bp) * BULLETIN_CONTENT_SIZE;
+	BNX2X_PCI_ALLOC(BP_VF_BULLETIN_DMA(bp)->addr,
+			&BP_VF_BULLETIN_DMA(bp)->mapping, tot_size);
+	BP_VF_BULLETIN_DMA(bp)->size = tot_size;
+
 	return 0;
 
 alloc_mem_err:
@@ -2810,6 +2820,9 @@ int bnx2x_vf_init(struct bnx2x *bp, struct bnx2x_virtf *vf, dma_addr_t *sb_map)
 
 	vf->state = VF_ENABLED;
 
+	/* update vf bulletin board */
+	bnx2x_post_vf_bulletin(bp, vf->index);
+
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
index f3c60ef..bd2edc3 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
@@ -382,6 +382,11 @@ struct bnx2x_vfdb {
 	struct bnx2x_vf_mbx	mbxs[BNX2X_MAX_NUM_OF_VFS];
 #define BP_VF_MBX(bp, vfid)	(&((bp)->vfdb->mbxs[(vfid)]))
 
+	struct hw_dma		bulletin_dma;
+#define BP_VF_BULLETIN_DMA(bp)	(&((bp)->vfdb->bulletin_dma))
+#define	BP_VF_BULLETIN(bp, vf) \
+	(((struct pf_vf_bulletin_content *)(BP_VF_BULLETIN_DMA(bp)->addr)) + vf)
+
 	struct hw_dma		sp_dma;
 #define bnx2x_vf_sp(bp, vf, field) ((bp)->vfdb->sp_dma.addr +		\
 		(vf)->index * sizeof(struct bnx2x_vf_sp) +		\
@@ -708,4 +713,16 @@ void bnx2x_dp_tlv_list(struct bnx2x *bp, void *tlvs_list);
 
 bool bnx2x_tlv_supported(u16 tlvtype);
 
+u32 bnx2x_crc_vf_bulletin(struct bnx2x *bp,
+			  struct pf_vf_bulletin_content *bulletin);
+int bnx2x_post_vf_bulletin(struct bnx2x *bp, int vf);
+
+enum sample_bulletin_result {
+	   PFVF_BULLETIN_UNCHANGED,
+	   PFVF_BULLETIN_UPDATED,
+	   PFVF_BULLETIN_CRC_ERR
+};
+
+enum sample_bulletin_result bnx2x_sample_bulletin(struct bnx2x *bp);
+
 #endif /* bnx2x_sriov.h */
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
index 0955ba4..8a01cc3 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
@@ -303,6 +303,10 @@ static void bnx2x_vf_mbx_acquire_resp(struct bnx2x *bp, struct bnx2x_virtf *vf,
 		resc->num_mc_filters = 0;
 
 		if (status == PFVF_STATUS_SUCCESS) {
+			/* fill in the allocated resources */
+			struct pf_vf_bulletin_content *bulletin =
+				BP_VF_BULLETIN(bp, vf->index);
+
 			for_each_vfq(vf, i)
 				resc->hw_qid[i] =
 					vfq_qzone_id(vf, vfq_get(vf, i));
@@ -311,6 +315,12 @@ static void bnx2x_vf_mbx_acquire_resp(struct bnx2x *bp, struct bnx2x_virtf *vf,
 				resc->hw_sbs[i].hw_sb_id = vf_igu_sb(vf, i);
 				resc->hw_sbs[i].sb_qid = vf_hc_qzone(vf, i);
 			}
+
+			/* if a mac has been set for this vf, supply it */
+			if (bulletin->valid_bitmap & 1 << MAC_ADDR_VALID) {
+				memcpy(resc->current_mac_addr, bulletin->mac,
+				       ETH_ALEN);
+			}
 		}
 	}
 
@@ -362,6 +372,9 @@ static void bnx2x_vf_mbx_acquire(struct bnx2x *bp, struct bnx2x_virtf *vf,
 	/* acquire the resources */
 	rc = bnx2x_vf_acquire(bp, vf, &acquire->resc_request);
 
+	/* store address of vf's bulletin board */
+	vf->bulletin_map = acquire->bulletin_addr;
+
 	/* response */
 	bnx2x_vf_mbx_acquire_resp(bp, vf, mbx, rc);
 }
@@ -773,11 +786,39 @@ static void bnx2x_vf_mbx_set_q_filters(struct bnx2x *bp,
 				       struct bnx2x_vf_mbx *mbx)
 {
 	struct vfpf_set_q_filters_tlv *filters = &mbx->msg->req.set_q_filters;
+	struct pf_vf_bulletin_content *bulletin = BP_VF_BULLETIN(bp, vf->index);
 	struct bnx2x_vfop_cmd cmd = {
 		.done = bnx2x_vf_mbx_resp,
 		.block = false,
 	};
 
+	/* if a mac was already set for this VF via the set vf mac ndo, we only
+	 * accept mac configurations of that mac. Why accept them at all?
+	 * because PF may have been unable to configure the mac at the time
+	 * since queue was not set up.
+	 */
+	if (bulletin->valid_bitmap & 1 << MAC_ADDR_VALID) {
+
+		/* once a mac was set by ndo can only accept a single mac... */
+		if (filters->n_mac_vlan_filters > 1) {
+			BNX2X_ERR("VF[%d] requested the addition of multiple macs after set_vf_mac ndo was called\n",
+				  vf->abs_vfid);
+			vf->op_rc = -EPERM;
+			goto response;
+		}
+
+		/* ...and only the mac set by the ndo */
+		if (filters->n_mac_vlan_filters == 1 &&
+		    memcmp(filters->filters->mac, bulletin->mac, ETH_ALEN)) {
+
+			BNX2X_ERR("VF[%d] requested the addition of a mac address not matching the one configured by set_vf_mac ndo\n",
+				  vf->abs_vfid);
+
+
+			vf->op_rc = -EPERM;
+			goto response;
+		}
+	}
 
 	/* verify vf_qid */
 	if (filters->vf_qid > vf_rxq_count(vf))
@@ -991,3 +1032,29 @@ mbx_error:
 mbx_done:
 	return;
 }
+
+/* propagate local bulletin board to vf */
+int bnx2x_post_vf_bulletin(struct bnx2x *bp, int vf)
+{
+	struct pf_vf_bulletin_content *bulletin = BP_VF_BULLETIN(bp, vf);
+	dma_addr_t pf_addr = BP_VF_BULLETIN_DMA(bp)->mapping +
+		vf * BULLETIN_CONTENT_SIZE;
+	dma_addr_t vf_addr = bnx2x_vf(bp, vf, bulletin_map);
+	u32 len = BULLETIN_CONTENT_SIZE;
+	int rc;
+
+	/* can only update vf after init took place */
+	if (bnx2x_vf(bp, vf, state) != VF_ENABLED &&
+	    bnx2x_vf(bp, vf, state) != VF_ACQUIRED)
+		return 0;
+
+	/* increment bulletin board version and compute crc */
+	bulletin->version++;
+	bulletin->crc = bnx2x_crc_vf_bulletin(bp, bulletin);
+
+	/* propagate bulletin board via dmae to vm memory */
+	rc = bnx2x_copy32_vf_dmae(bp, false, pf_addr,
+				  bnx2x_vf(bp, vf, abs_vfid), U64_HI(vf_addr),
+				  U64_LO(vf_addr), len/4);
+	return rc;
+}
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.h
index 299bbcc..98f28aa 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.h
@@ -36,6 +36,7 @@ struct hw_sb_info {
  * A.K.A VF-PF mailbox
  */
 #define TLV_BUFFER_SIZE			1024
+#define PF_VF_BULLETIN_SIZE		512
 
 #define VFPF_QUEUE_FLG_TPA		0x0001
 #define VFPF_QUEUE_FLG_TPA_IPV6		0x0002
@@ -59,6 +60,9 @@ struct hw_sb_info {
 #define VFPF_RX_MASK_ACCEPT_ALL_UNICAST		0x00000004
 #define VFPF_RX_MASK_ACCEPT_ALL_MULTICAST	0x00000008
 #define VFPF_RX_MASK_ACCEPT_BROADCAST		0x00000010
+#define BULLETIN_CONTENT_SIZE		(sizeof(struct pf_vf_bulletin_content))
+#define BULLETIN_ATTEMPTS	5 /* crc failures before throwing towel */
+#define BULLETIN_CRC_SEED	0
 
 enum {
 	PFVF_STATUS_WAITING = 0,
@@ -295,6 +299,39 @@ union pfvf_tlvs {
 	struct channel_list_end_tlv	list_end;
 	struct tlv_buffer_size		tlv_buf_size;
 };
+
+/* This is a structure which is allocated in the VF, which the PF may update
+ * when it deems it necessary to do so. The bulletin board is sampled
+ * periodically by the VF. A copy per VF is maintained in the PF (to prevent
+ * loss of data upon multiple updates (or the need for read modify write)).
+ */
+struct pf_vf_bulletin_size {
+	u8 size[PF_VF_BULLETIN_SIZE];
+};
+
+struct pf_vf_bulletin_content {
+	u32 crc;			/* crc of structure to ensure is not in
+					 * mid-update
+					 */
+	u32 version;
+
+	aligned_u64 valid_bitmap;	/* bitmap indicating which fields
+					 * hold valid values
+					 */
+
+#define MAC_ADDR_VALID		0	/* alert the vf that a new mac address
+					 * is available for it
+					 */
+
+	u8 mac[ETH_ALEN];
+	u8 padding[2];
+};
+
+union pf_vf_bulletin {
+	struct pf_vf_bulletin_content content;
+	struct pf_vf_bulletin_size size;
+};
+
 enum channel_tlvs {
 	   CHANNEL_TLV_NONE, /* ends tlv sequence */
 	   CHANNEL_TLV_ACQUIRE,
@@ -307,6 +344,7 @@ enum channel_tlvs {
 	   CHANNEL_TLV_PF_RELEASE_VF,
 	   CHANNEL_TLV_LIST_END,
 	   CHANNEL_TLV_FLR,
+	   CHANNEL_TLV_PF_SET_MAC,
 	   CHANNEL_TLV_MAX
 };
 
-- 
1.7.9.GIT

^ permalink raw reply related

* [PATCH v2 net-next 07/22] bnx2x: Support ndo_set_rxmode in VF driver
From: Ariel Elior @ 2012-11-15 16:47 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Ariel Elior, Eilon Greenstein
In-Reply-To: <1352998067-9707-1-git-send-email-ariele@broadcom.com>

The VF driver uses the 'q_filter' request in the VF <-> PF channel to
have the PF configure the requested rxmode to device. ndo_set_rxmode
is called under bottom half lock, so sleeping until the response
arrives over the VF <-> PF channel is out of the question. For this reason
the VF driver returns from the ndo after scheduling a work item, which
in turn processes the rx mode request and adds the classification
information through the VF <-> PF channel accordingly.

Signed-off-by: Ariel Elior <ariele@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x.h      |    5 +
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c |  175 +++++++++++++++++++++-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.h |    7 +
 3 files changed, 181 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
index cd8c645..d1806e3 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
@@ -1194,6 +1194,8 @@ enum {
 	BNX2X_SP_RTNL_TX_TIMEOUT,
 	BNX2X_SP_RTNL_AFEX_F_UPDATE,
 	BNX2X_SP_RTNL_FAN_FAILURE,
+	BNX2X_SP_RTNL_VFPF_MCAST,
+	BNX2X_SP_RTNL_VFPF_STORM_RX_MODE,
 };
 
 
@@ -2230,6 +2232,9 @@ void bnx2x_vfpf_close_vf(struct bnx2x *bp);
 int bnx2x_vfpf_setup_q(struct bnx2x *bp, int fp_idx);
 int bnx2x_vfpf_teardown_queue(struct bnx2x *bp, int qidx);
 int bnx2x_vfpf_set_mac(struct bnx2x *bp);
+int bnx2x_vfpf_set_mcast(struct net_device *dev);
+int bnx2x_vfpf_storm_rx_mode(struct bnx2x *bp);
+
 int bnx2x_nic_load_analyze_req(struct bnx2x *bp, u32 load_code);
 /* Congestion management fairness mode */
 #define CMNG_FNS_NONE		0
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 66ff9bc..16f73d3 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -9380,6 +9380,19 @@ sp_rtnl_not_reset:
 		bnx2x_close(bp->dev);
 	}
 
+	if (test_and_clear_bit(BNX2X_SP_RTNL_VFPF_MCAST, &bp->sp_rtnl_state)) {
+		DP(BNX2X_MSG_SP,
+		   "sending set mcast vf pf channel message from rtnl sp-task\n");
+		bnx2x_vfpf_set_mcast(bp->dev);
+	}
+
+	if (test_and_clear_bit(BNX2X_SP_RTNL_VFPF_STORM_RX_MODE,
+			       &bp->sp_rtnl_state)) {
+		DP(BNX2X_MSG_SP,
+		   "sending set storm rx mode vf pf channel message from rtnl sp-task\n");
+		bnx2x_vfpf_storm_rx_mode(bp);
+	}
+
 sp_rtnl_exit:
 	rtnl_unlock();
 }
@@ -11399,12 +11412,25 @@ void bnx2x_set_rx_mode(struct net_device *dev)
 		  CHIP_IS_E1(bp)))
 		rx_mode = BNX2X_RX_MODE_ALLMULTI;
 	else {
-		/* some multicasts */
-		if (bnx2x_set_mc_list(bp) < 0)
-			rx_mode = BNX2X_RX_MODE_ALLMULTI;
+		if (IS_PF(bp)) {
+			/* some multicasts */
+			if (bnx2x_set_mc_list(bp) < 0)
+				rx_mode = BNX2X_RX_MODE_ALLMULTI;
 
-		if (bnx2x_set_uc_list(bp) < 0)
-			rx_mode = BNX2X_RX_MODE_PROMISC;
+			if (bnx2x_set_uc_list(bp) < 0)
+				rx_mode = BNX2X_RX_MODE_PROMISC;
+		} else {
+			/* configuring mcast to a vf involves sleeping (when we
+			 * wait for the pf's response). Since this function is
+			 * called from non sleepable context we must schedule
+			 * a work item for this purpose
+			 */
+			smp_mb__before_clear_bit();
+			set_bit(BNX2X_SP_RTNL_VFPF_MCAST,
+				&bp->sp_rtnl_state);
+			smp_mb__after_clear_bit();
+			schedule_delayed_work(&bp->sp_rtnl_task, 0);
+		}
 	}
 
 	bp->rx_mode = rx_mode;
@@ -11418,7 +11444,20 @@ void bnx2x_set_rx_mode(struct net_device *dev)
 		return;
 	}
 
-	bnx2x_set_storm_rx_mode(bp);
+	if (IS_PF(bp)) {
+		bnx2x_set_storm_rx_mode(bp);
+	} else {
+		/* configuring rx mode to storms in a vf involves sleeping (when
+		 * we wait for the pf's response). Since this function is
+		 * called from non sleepable context we must schedule
+		 * a work item for this purpose
+		 */
+		smp_mb__before_clear_bit();
+		set_bit(BNX2X_SP_RTNL_VFPF_STORM_RX_MODE,
+			&bp->sp_rtnl_state);
+		smp_mb__after_clear_bit();
+		schedule_delayed_work(&bp->sp_rtnl_task, 0);
+	}
 }
 
 /* called with rtnl_lock */
@@ -13610,3 +13649,127 @@ int bnx2x_vfpf_set_mac(struct bnx2x *bp)
 
 	return 0;
 }
+
+int bnx2x_vfpf_set_mcast(struct net_device *dev)
+{
+	struct bnx2x *bp = netdev_priv(dev);
+	struct vfpf_set_q_filters_tlv *req = &bp->vf2pf_mbox->req.set_q_filters;
+	struct pfvf_general_resp_tlv *resp = &bp->vf2pf_mbox->resp.general_resp;
+	int rc, i = 0;
+	struct netdev_hw_addr *ha;
+
+	if (bp->state != BNX2X_STATE_OPEN) {
+		DP(NETIF_MSG_IFUP, "state is %x, returning\n", bp->state);
+		return -EINVAL;
+	}
+
+	/* clear mailbox and prep first tlv */
+	bnx2x_vfpf_prep(bp, &req->first_tlv, CHANNEL_TLV_SET_Q_FILTERS,
+			sizeof(*req));
+
+	/* Get Rx mode requested */
+	DP(NETIF_MSG_IFUP, "dev->flags = %x\n", dev->flags);
+
+	netdev_for_each_mc_addr(ha, dev) {
+		DP(NETIF_MSG_IFUP, "Adding mcast MAC: %pM\n",
+		   bnx2x_mc_addr(ha));
+		memcpy(req->multicast[i], bnx2x_mc_addr(ha),
+		       sizeof(req->multicast[i]));
+		i++;
+	}
+
+	/* We support four PFVF_MAX_MULTICAST_PER_VF mcast
+	 * addresses tops
+	 */
+	if (i >= PFVF_MAX_MULTICAST_PER_VF) {
+		DP(NETIF_MSG_IFUP,
+		   "VF supports not more than %d multicast MAC addresses\n",
+		   PFVF_MAX_MULTICAST_PER_VF);
+		return -EINVAL;
+	}
+
+	req->n_multicast = i;
+	req->flags |= VFPF_SET_Q_FILTERS_MULTICAST_CHANGED;
+
+	req->vf_qid = 0;
+
+	/* add list termination tlv */
+	bnx2x_add_tlv(bp, req, req->first_tlv.tl.length, CHANNEL_TLV_LIST_END,
+		      sizeof(struct channel_list_end_tlv));
+
+	/* output tlvs list */
+	bnx2x_dp_tlv_list(bp, req);
+
+	rc = bnx2x_send_msg2pf(bp, &resp->hdr.status, bp->vf2pf_mbox_mapping);
+	if (rc) {
+		BNX2X_ERR("Sending a message failed: %d\n", rc);
+		return rc;
+	}
+
+	if (resp->hdr.status != PFVF_STATUS_SUCCESS) {
+		BNX2X_ERR("Set Rx mode/multicast failed: %d\n",
+			  resp->hdr.status);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int bnx2x_vfpf_storm_rx_mode(struct bnx2x *bp)
+{
+	int mode = bp->rx_mode;
+	struct vfpf_set_q_filters_tlv *req = &bp->vf2pf_mbox->req.set_q_filters;
+	struct pfvf_general_resp_tlv *resp = &bp->vf2pf_mbox->resp.general_resp;
+	int rc;
+
+	/* clear mailbox and prep first tlv */
+	bnx2x_vfpf_prep(bp, &req->first_tlv, CHANNEL_TLV_SET_Q_FILTERS,
+			sizeof(*req));
+
+	DP(NETIF_MSG_IFUP, "Rx mode is %d\n", mode);
+
+	switch (mode) {
+	case BNX2X_RX_MODE_NONE: /* no Rx */
+		req->rx_mask = VFPF_RX_MASK_ACCEPT_NONE;
+		break;
+	case BNX2X_RX_MODE_NORMAL:
+		req->rx_mask = VFPF_RX_MASK_ACCEPT_MATCHED_MULTICAST;
+		req->rx_mask |= VFPF_RX_MASK_ACCEPT_MATCHED_UNICAST;
+		req->rx_mask |= VFPF_RX_MASK_ACCEPT_BROADCAST;
+		break;
+	case BNX2X_RX_MODE_ALLMULTI:
+		req->rx_mask = VFPF_RX_MASK_ACCEPT_ALL_MULTICAST;
+		req->rx_mask |= VFPF_RX_MASK_ACCEPT_MATCHED_UNICAST;
+		req->rx_mask |= VFPF_RX_MASK_ACCEPT_BROADCAST;
+		break;
+	case BNX2X_RX_MODE_PROMISC:
+		req->rx_mask = VFPF_RX_MASK_ACCEPT_ALL_UNICAST;
+		req->rx_mask |= VFPF_RX_MASK_ACCEPT_ALL_MULTICAST;
+		req->rx_mask |= VFPF_RX_MASK_ACCEPT_BROADCAST;
+		break;
+	default:
+		BNX2X_ERR("BAD rx mode (%d)\n", mode);
+		return -EINVAL;
+	}
+
+	req->flags |= VFPF_SET_Q_FILTERS_RX_MASK_CHANGED;
+	req->vf_qid = 0;
+
+	/* add list termination tlv */
+	bnx2x_add_tlv(bp, req, req->first_tlv.tl.length, CHANNEL_TLV_LIST_END,
+		      sizeof(struct channel_list_end_tlv));
+
+	/* output tlvs list */
+	bnx2x_dp_tlv_list(bp, req);
+
+	rc = bnx2x_send_msg2pf(bp, &resp->hdr.status, bp->vf2pf_mbox_mapping);
+	if (rc)
+		BNX2X_ERR("Sending a message failed: %d\n", rc);
+
+	if (resp->hdr.status != PFVF_STATUS_SUCCESS) {
+		BNX2X_ERR("Set Rx mode failed: %d\n", resp->hdr.status);
+		return -EINVAL;
+	}
+
+	return rc;
+}
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.h
index 80fdfad..95f5c2b 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.h
@@ -53,6 +53,13 @@ struct hw_sb_info {
 #define VFPF_QUEUE_DROP_TTL0		(1 << 2)
 #define VFPF_QUEUE_DROP_UDP_CS_ERR	(1 << 3)
 
+#define VFPF_RX_MASK_ACCEPT_NONE		0x00000000
+#define VFPF_RX_MASK_ACCEPT_MATCHED_UNICAST	0x00000001
+#define VFPF_RX_MASK_ACCEPT_MATCHED_MULTICAST	0x00000002
+#define VFPF_RX_MASK_ACCEPT_ALL_UNICAST		0x00000004
+#define VFPF_RX_MASK_ACCEPT_ALL_MULTICAST	0x00000008
+#define VFPF_RX_MASK_ACCEPT_BROADCAST		0x00000010
+
 enum {
 	PFVF_STATUS_WAITING = 0,
 	PFVF_STATUS_SUCCESS,
-- 
1.7.9.GIT

^ permalink raw reply related

* [PATCH v2 net-next 01/22] bnx2x: Support probing and removing of VF device
From: Ariel Elior @ 2012-11-15 16:47 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Ariel Elior, Eilon Greenstein
In-Reply-To: <1352998067-9707-1-git-send-email-ariele@broadcom.com>

To support probing and removing of a bnx2x virtual function
the following were added:
1. add bnx2x_vfpf.h: defines the VF to PF channel
2. add bnx2x_sriov.h: header for bnx2x SR-IOV functionality
3. enumerate VF hw types (identify VFs)
4. if driving a VF, map VF bar
5. if driving a VF, allocate Vf to PF channel
6. refactor interrupt flows to include VF

Signed-off-by: Ariel Elior <ariele@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x.h       |   23 ++-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c   |   27 +-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h   |    2 +-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c  |  391 +++++++++++++--------
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h   |    9 +
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h |   27 ++
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.h  |   37 ++
 7 files changed, 363 insertions(+), 153 deletions(-)
 create mode 100644 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
 create mode 100644 drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.h

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
index de121cc..40db837 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
@@ -50,6 +50,14 @@
 #include "bnx2x_dcb.h"
 #include "bnx2x_stats.h"
 
+enum bnx2x_int_mode {
+	BNX2X_INT_MODE_MSIX,
+	BNX2X_INT_MODE_INTX,
+	BNX2X_INT_MODE_MSI
+};
+
+
+
 /* error/debug prints */
 
 #define DRV_MODULE_NAME		"bnx2x"
@@ -955,6 +963,9 @@ struct bnx2x_port {
 extern struct workqueue_struct *bnx2x_wq;
 
 #define BNX2X_MAX_NUM_OF_VFS	64
+#define BNX2X_VF_CID_WND	0
+#define BNX2X_CIDS_PER_VF	(1 << BNX2X_VF_CID_WND)
+#define BNX2X_VF_CIDS		(BNX2X_MAX_NUM_OF_VFS * BNX2X_CIDS_PER_VF)
 #define BNX2X_VF_ID_INVALID	0xFF
 
 /*
@@ -1231,6 +1242,10 @@ struct bnx2x {
 	  (vn) * ((CHIP_IS_E1x(bp) || (CHIP_MODE_IS_4_PORT(bp))) ? 2  : 1))
 #define BP_FW_MB_IDX(bp)		BP_FW_MB_IDX_VN(bp, BP_VN(bp))
 
+	/* vf pf channel mailbox contains request and response buffers */
+	struct bnx2x_vf_mbx_msg	*vf2pf_mbox;
+	dma_addr_t		vf2pf_mbox_mapping;
+
 	struct net_device	*dev;
 	struct pci_dev		*pdev;
 
@@ -1318,8 +1333,6 @@ struct bnx2x {
 #define DISABLE_MSI_FLAG		(1 << 7)
 #define TPA_ENABLE_FLAG			(1 << 8)
 #define NO_MCP_FLAG			(1 << 9)
-
-#define BP_NOMCP(bp)			(bp->flags & NO_MCP_FLAG)
 #define GRO_ENABLE_FLAG			(1 << 10)
 #define MF_FUNC_DIS			(1 << 11)
 #define OWN_CNIC_IRQ			(1 << 12)
@@ -1330,6 +1343,11 @@ struct bnx2x {
 #define BC_SUPPORTS_FCOE_FEATURES	(1 << 19)
 #define USING_SINGLE_MSIX_FLAG		(1 << 20)
 #define BC_SUPPORTS_DCBX_MSG_NON_PMF	(1 << 21)
+#define IS_VF_FLAG			(1 << 22)
+
+#define BP_NOMCP(bp)			(bp->flags & NO_MCP_FLAG)
+#define IS_VF(bp)			(bp->flags & IS_VF_FLAG)
+#define IS_PF(bp)			(!(bp->flags & IS_VF_FLAG))
 
 #define NO_ISCSI(bp)		((bp)->flags & NO_ISCSI_FLAG)
 #define NO_ISCSI_OOO(bp)	((bp)->flags & NO_ISCSI_OOO_FLAG)
@@ -1431,6 +1449,7 @@ struct bnx2x {
 	u8			igu_sb_cnt;
 	u8			min_msix_vec_cnt;
 
+	u32			igu_base_addr;
 	dma_addr_t		def_status_blk_mapping;
 
 	struct bnx2x_slowpath	*slowpath;
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 54d522d..73dbe0a 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -1426,12 +1426,17 @@ void bnx2x_free_irq(struct bnx2x *bp)
 
 int bnx2x_enable_msix(struct bnx2x *bp)
 {
-	int msix_vec = 0, i, rc, req_cnt;
+	int msix_vec = 0, i, rc;
 
-	bp->msix_table[msix_vec].entry = msix_vec;
-	BNX2X_DEV_INFO("msix_table[0].entry = %d (slowpath)\n",
-	   bp->msix_table[0].entry);
-	msix_vec++;
+	DP(BNX2X_MSG_SP, "BNX2X_NUM_ETH_QUEUES %d\n", BNX2X_NUM_ETH_QUEUES(bp));
+
+	/* VFs don't have default status blocks */
+	if (IS_PF(bp)) {
+		bp->msix_table[msix_vec].entry = msix_vec;
+		BNX2X_DEV_INFO("msix_table[0].entry = %d (slowpath)\n",
+			       bp->msix_table[0].entry);
+		msix_vec++;
+	}
 
 	/* Cnic requires an msix vector for itself */
 	if (CNIC_SUPPORT(bp)) {
@@ -1449,9 +1454,10 @@ int bnx2x_enable_msix(struct bnx2x *bp)
 		msix_vec++;
 	}
 
-	req_cnt = BNX2X_NUM_ETH_QUEUES(bp) + CNIC_SUPPORT(bp) + 1;
+	DP(BNX2X_MSG_SP, "about to request enable msix with %d vectors",
+	   msix_vec);
 
-	rc = pci_enable_msix(bp->pdev, &bp->msix_table[0], req_cnt);
+	rc = pci_enable_msix(bp->pdev, &bp->msix_table[0], msix_vec);
 
 	/*
 	 * reconfigure number of tx/rx queues according to available
@@ -1459,7 +1465,7 @@ int bnx2x_enable_msix(struct bnx2x *bp)
 	 */
 	if (rc >= BNX2X_MIN_MSIX_VEC_CNT(bp)) {
 		/* how less vectors we will have? */
-		int diff = req_cnt - rc;
+		int diff = msix_vec - rc;
 
 		BNX2X_DEV_INFO("Trying to use less MSI-X vectors: %d\n", rc);
 
@@ -3884,7 +3890,10 @@ int __devinit bnx2x_alloc_mem_bp(struct bnx2x *bp)
 	 * The biggest MSI-X table we might need is as a maximum number of fast
 	 * path IGU SBs plus default SB (for PF).
 	 */
-	msix_table_size = bp->igu_sb_cnt + 1;
+	msix_table_size = bp->igu_sb_cnt;
+	if (IS_PF(bp))
+		msix_table_size++;
+	BNX2X_DEV_INFO("msix_table_size %d", msix_table_size);
 
 	/* fp array: RSS plus CNIC related L2 queues */
 	fp_array_size = BNX2X_MAX_RSS_COUNT(bp) + CNIC_SUPPORT(bp);
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
index ad28074..c5813c6 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
@@ -863,7 +863,7 @@ static inline void bnx2x_del_all_napi(struct bnx2x *bp)
 		netif_napi_del(&bnx2x_fp(bp, i, napi));
 }
 
-void bnx2x_set_int_mode(struct bnx2x *bp);
+int bnx2x_set_int_mode(struct bnx2x *bp);
 
 static inline void bnx2x_disable_msi(struct bnx2x *bp)
 {
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 3519fed..78392de 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -59,6 +59,8 @@
 #include "bnx2x_init.h"
 #include "bnx2x_init_ops.h"
 #include "bnx2x_cmn.h"
+#include "bnx2x_vfpf.h"
+#include "bnx2x_sriov.h"
 #include "bnx2x_dcb.h"
 #include "bnx2x_sp.h"
 
@@ -133,39 +135,49 @@ enum bnx2x_board_type {
 	BCM57711E,
 	BCM57712,
 	BCM57712_MF,
+	BCM57712_VF,
 	BCM57800,
 	BCM57800_MF,
+	BCM57800_VF,
 	BCM57810,
 	BCM57810_MF,
-	BCM57840_O,
+	BCM57810_VF,
 	BCM57840_4_10,
 	BCM57840_2_20,
-	BCM57840_MFO,
 	BCM57840_MF,
+	BCM57840_VF,
 	BCM57811,
-	BCM57811_MF
+	BCM57811_MF,
+	BCM57840_O,
+	BCM57840_MFO,
+	BCM57811_VF
 };
 
 /* indexed by board_type, above */
 static struct {
 	char *name;
 } board_info[] __devinitdata = {
-	{ "Broadcom NetXtreme II BCM57710 10 Gigabit PCIe [Everest]" },
-	{ "Broadcom NetXtreme II BCM57711 10 Gigabit PCIe" },
-	{ "Broadcom NetXtreme II BCM57711E 10 Gigabit PCIe" },
-	{ "Broadcom NetXtreme II BCM57712 10 Gigabit Ethernet" },
-	{ "Broadcom NetXtreme II BCM57712 10 Gigabit Ethernet Multi Function" },
-	{ "Broadcom NetXtreme II BCM57800 10 Gigabit Ethernet" },
-	{ "Broadcom NetXtreme II BCM57800 10 Gigabit Ethernet Multi Function" },
-	{ "Broadcom NetXtreme II BCM57810 10 Gigabit Ethernet" },
-	{ "Broadcom NetXtreme II BCM57810 10 Gigabit Ethernet Multi Function" },
-	{ "Broadcom NetXtreme II BCM57840 10/20 Gigabit Ethernet" },
-	{ "Broadcom NetXtreme II BCM57840 10 Gigabit Ethernet" },
-	{ "Broadcom NetXtreme II BCM57840 20 Gigabit Ethernet" },
-	{ "Broadcom NetXtreme II BCM57840 10/20 Gigabit Ethernet Multi Function"},
-	{ "Broadcom NetXtreme II BCM57840 10/20 Gigabit Ethernet Multi Function"},
-	{ "Broadcom NetXtreme II BCM57811 10 Gigabit Ethernet"},
-	{ "Broadcom NetXtreme II BCM57811 10 Gigabit Ethernet Multi Function"},
+	[BCM57710]	= { "Broadcom NetXtreme II BCM57710 10 Gigabit PCIe [Everest]" },
+	[BCM57711]	= { "Broadcom NetXtreme II BCM57711 10 Gigabit PCIe" },
+	[BCM57711E]	= { "Broadcom NetXtreme II BCM57711E 10 Gigabit PCIe" },
+	[BCM57712]	= { "Broadcom NetXtreme II BCM57712 10 Gigabit Ethernet" },
+	[BCM57712_MF]	= { "Broadcom NetXtreme II BCM57712 10 Gigabit Ethernet Multi Function" },
+	[BCM57712_VF]	= { "Broadcom NetXtreme II BCM57712 10 Gigabit Ethernet Virtual Function" },
+	[BCM57800]	= { "Broadcom NetXtreme II BCM57800 10 Gigabit Ethernet" },
+	[BCM57800_MF]	= { "Broadcom NetXtreme II BCM57800 10 Gigabit Ethernet Multi Function" },
+	[BCM57800_VF]	= { "Broadcom NetXtreme II BCM57800 10 Gigabit Ethernet Virtual Function" },
+	[BCM57810]	= { "Broadcom NetXtreme II BCM57810 10 Gigabit Ethernet" },
+	[BCM57810_MF]	= { "Broadcom NetXtreme II BCM57810 10 Gigabit Ethernet Multi Function" },
+	[BCM57810_VF]	= { "Broadcom NetXtreme II BCM57810 10 Gigabit Ethernet Virtual Function" },
+	[BCM57840_4_10]	= { "Broadcom NetXtreme II BCM57840 10 Gigabit Ethernet" },
+	[BCM57840_2_20]	= { "Broadcom NetXtreme II BCM57840 20 Gigabit Ethernet" },
+	[BCM57840_MF]	= { "Broadcom NetXtreme II BCM57840 10/20 Gigabit Ethernet Multi Function" },
+	[BCM57840_VF]	= { "Broadcom NetXtreme II BCM57840 10/20 Gigabit Ethernet Virtual Function" },
+	[BCM57811]	= { "Broadcom NetXtreme II BCM57811 10 Gigabit Ethernet" },
+	[BCM57811_MF]	= { "Broadcom NetXtreme II BCM57811 10 Gigabit Ethernet Multi Function" },
+	[BCM57840_O]	= { "Broadcom NetXtreme II BCM57840 10/20 Gigabit Ethernet" },
+	[BCM57840_MFO]	= { "Broadcom NetXtreme II BCM57840 10/20 Gigabit Ethernet Multi Function" },
+	[BCM57811_VF]	= { "Broadcom NetXtreme II BCM57840 10/20 Gigabit Ethernet Virtual Function" }
 };
 
 #ifndef PCI_DEVICE_ID_NX2_57710
@@ -7774,41 +7786,49 @@ int bnx2x_setup_leading(struct bnx2x *bp)
  *
  * In case of MSI-X it will also try to enable MSI-X.
  */
-void bnx2x_set_int_mode(struct bnx2x *bp)
+int bnx2x_set_int_mode(struct bnx2x *bp)
 {
+	int rc = 0;
+
+	if (IS_VF(bp) && int_mode != BNX2X_INT_MODE_MSIX)
+		return -EINVAL;
+
 	switch (int_mode) {
-	case INT_MODE_MSI:
+	case BNX2X_INT_MODE_MSIX:
+		/* attempt to enable msix */
+		rc = bnx2x_enable_msix(bp);
+
+		/* msix attained */
+		if (!rc)
+			return 0;
+
+		/* vfs use only msix */
+		if (rc && IS_VF(bp))
+			return rc;
+
+		/* failed to enable multiple MSI-X */
+		BNX2X_DEV_INFO("Failed to enable multiple MSI-X (%d), set number of queues to %d\n",
+			       bp->num_queues,
+			       1 + bp->num_cnic_queues);
+
+		/* falling through... */
+	case BNX2X_INT_MODE_MSI:
 		bnx2x_enable_msi(bp);
+
 		/* falling through... */
-	case INT_MODE_INTx:
+	case BNX2X_INT_MODE_INTX:
 		bp->num_ethernet_queues = 1;
 		bp->num_queues = bp->num_ethernet_queues + bp->num_cnic_queues;
 		BNX2X_DEV_INFO("set number of queues to 1\n");
 		break;
 	default:
-		/* if we can't use MSI-X we only need one fp,
-		 * so try to enable MSI-X with the requested number of fp's
-		 * and fallback to MSI or legacy INTx with one fp
-		 */
-		if (bnx2x_enable_msix(bp) ||
-		    bp->flags & USING_SINGLE_MSIX_FLAG) {
-			/* failed to enable multiple MSI-X */
-			BNX2X_DEV_INFO("Failed to enable multiple MSI-X (%d), set number of queues to %d\n",
-				       bp->num_queues,
-				       1 + bp->num_cnic_queues);
-
-			bp->num_queues = 1 + bp->num_cnic_queues;
-
-			/* Try to enable MSI */
-			if (!(bp->flags & USING_SINGLE_MSIX_FLAG) &&
-			    !(bp->flags & DISABLE_MSI_FLAG))
-				bnx2x_enable_msi(bp);
-		}
-		break;
+		BNX2X_DEV_INFO("unknown value in int_mode module parameter\n");
+		return -EINVAL;
 	}
+	return 0;
 }
 
-/* must be called prioir to any HW initializations */
+/* must be called prior to any HW initializations */
 static inline u16 bnx2x_cid_ilt_lines(struct bnx2x *bp)
 {
 	return L2_ILT_LINES(bp);
@@ -11003,9 +11023,13 @@ static int __devinit bnx2x_init_bp(struct bnx2x *bp)
 	INIT_DELAYED_WORK(&bp->sp_task, bnx2x_sp_task);
 	INIT_DELAYED_WORK(&bp->sp_rtnl_task, bnx2x_sp_rtnl_task);
 	INIT_DELAYED_WORK(&bp->period_task, bnx2x_period_task);
-	rc = bnx2x_get_hwinfo(bp);
-	if (rc)
-		return rc;
+	if (IS_PF(bp)) {
+		rc = bnx2x_get_hwinfo(bp);
+		if (rc)
+			return rc;
+	} else {
+		random_ether_addr(bp->dev->dev_addr);
+	}
 
 	bnx2x_set_modes_bitmap(bp);
 
@@ -11018,7 +11042,7 @@ static int __devinit bnx2x_init_bp(struct bnx2x *bp)
 	func = BP_FUNC(bp);
 
 	/* need to reset chip if undi was active */
-	if (!BP_NOMCP(bp)) {
+	if (IS_PF(bp) && !BP_NOMCP(bp)) {
 		/* init fw_seq */
 		bp->fw_seq =
 			SHMEM_RD(bp, func_mb[BP_FW_MB_IDX(bp)].drv_mb_header) &
@@ -11055,6 +11079,8 @@ static int __devinit bnx2x_init_bp(struct bnx2x *bp)
 	bp->mrrs = mrrs;
 
 	bp->tx_ring_size = IS_MF_FCOE_AFEX(bp) ? 0 : MAX_TX_AVAIL;
+	if (IS_VF(bp))
+		bp->rx_ring_size = MAX_RX_AVAIL;
 
 	/* make sure that the numbers are in the right granularity */
 	bp->tx_ticks = (50 / BNX2X_BTR) * BNX2X_BTR;
@@ -11078,10 +11104,16 @@ static int __devinit bnx2x_init_bp(struct bnx2x *bp)
 	/* multiple tx priority */
 	if (CHIP_IS_E1x(bp))
 		bp->max_cos = BNX2X_MULTI_TX_COS_E1X;
-	if (CHIP_IS_E2(bp) || CHIP_IS_E3A0(bp))
+	else if (CHIP_IS_E2(bp) || CHIP_IS_E3A0(bp))
 		bp->max_cos = BNX2X_MULTI_TX_COS_E2_E3A0;
-	if (CHIP_IS_E3B0(bp))
+	else if (CHIP_IS_E3B0(bp))
 		bp->max_cos = BNX2X_MULTI_TX_COS_E3B0;
+	else if (IS_VF(bp))
+		bp->max_cos = 1;
+	else
+		BNX2X_ERR("unknown chip %x revision %x\n",
+			  CHIP_NUM(bp), CHIP_REV(bp));
+	pr_info("set bp->max_cos to %d\n", bp->max_cos);
 
 	/* We need at least one default status block for slow-path events,
 	 * second status block for the L2 queue, and a third status block for
@@ -11466,11 +11498,11 @@ static int bnx2x_set_coherency_mask(struct bnx2x *bp)
 	return 0;
 }
 
-static int __devinit bnx2x_init_dev(struct pci_dev *pdev,
+static int __devinit bnx2x_init_dev(struct bnx2x *bp,
+				    struct pci_dev *pdev,
 				    struct net_device *dev,
 				    unsigned long board_type)
 {
-	struct bnx2x *bp;
 	int rc;
 	u32 pci_cfg_dword;
 	bool chip_is_e1x = (board_type == BCM57710 ||
@@ -11478,11 +11510,9 @@ static int __devinit bnx2x_init_dev(struct pci_dev *pdev,
 			    board_type == BCM57711E);
 
 	SET_NETDEV_DEV(dev, &pdev->dev);
-	bp = netdev_priv(dev);
 
 	bp->dev = dev;
 	bp->pdev = pdev;
-	bp->flags = 0;
 
 	rc = pci_enable_device(pdev);
 	if (rc) {
@@ -11498,9 +11528,8 @@ static int __devinit bnx2x_init_dev(struct pci_dev *pdev,
 		goto err_out_disable;
 	}
 
-	if (!(pci_resource_flags(pdev, 2) & IORESOURCE_MEM)) {
-		dev_err(&bp->pdev->dev, "Cannot find second PCI device"
-		       " base address, aborting\n");
+	if (IS_PF(bp) && !(pci_resource_flags(pdev, 2) & IORESOURCE_MEM)) {
+		dev_err(&bp->pdev->dev, "Cannot find second PCI device base address, aborting\n");
 		rc = -ENODEV;
 		goto err_out_disable;
 	}
@@ -11517,12 +11546,14 @@ static int __devinit bnx2x_init_dev(struct pci_dev *pdev,
 		pci_save_state(pdev);
 	}
 
-	bp->pm_cap = pci_find_capability(pdev, PCI_CAP_ID_PM);
-	if (bp->pm_cap == 0) {
-		dev_err(&bp->pdev->dev,
-			"Cannot find power management capability, aborting\n");
-		rc = -EIO;
-		goto err_out_release;
+	if (IS_PF(bp)) {
+		bp->pm_cap = pci_find_capability(pdev, PCI_CAP_ID_PM);
+		if (bp->pm_cap == 0) {
+			dev_err(&bp->pdev->dev,
+				"Cannot find power management capability, aborting\n");
+			rc = -EIO;
+			goto err_out_release;
+		}
 	}
 
 	if (!pci_is_pcie(pdev)) {
@@ -11573,25 +11604,28 @@ static int __devinit bnx2x_init_dev(struct pci_dev *pdev,
 	 * Clean the following indirect addresses for all functions since it
 	 * is not used by the driver.
 	 */
-	REG_WR(bp, PXP2_REG_PGL_ADDR_88_F0, 0);
-	REG_WR(bp, PXP2_REG_PGL_ADDR_8C_F0, 0);
-	REG_WR(bp, PXP2_REG_PGL_ADDR_90_F0, 0);
-	REG_WR(bp, PXP2_REG_PGL_ADDR_94_F0, 0);
+	if (IS_PF(bp)) {
+		REG_WR(bp, PXP2_REG_PGL_ADDR_88_F0, 0);
+		REG_WR(bp, PXP2_REG_PGL_ADDR_8C_F0, 0);
+		REG_WR(bp, PXP2_REG_PGL_ADDR_90_F0, 0);
+		REG_WR(bp, PXP2_REG_PGL_ADDR_94_F0, 0);
+
+		if (chip_is_e1x) {
+			REG_WR(bp, PXP2_REG_PGL_ADDR_88_F1, 0);
+			REG_WR(bp, PXP2_REG_PGL_ADDR_8C_F1, 0);
+			REG_WR(bp, PXP2_REG_PGL_ADDR_90_F1, 0);
+			REG_WR(bp, PXP2_REG_PGL_ADDR_94_F1, 0);
+		}
 
-	if (chip_is_e1x) {
-		REG_WR(bp, PXP2_REG_PGL_ADDR_88_F1, 0);
-		REG_WR(bp, PXP2_REG_PGL_ADDR_8C_F1, 0);
-		REG_WR(bp, PXP2_REG_PGL_ADDR_90_F1, 0);
-		REG_WR(bp, PXP2_REG_PGL_ADDR_94_F1, 0);
+		/* Enable internal target-read (in case we are probed after PF
+		 * FLR). Must be done prior to any BAR read access. Only for
+		 * 57712 and up
+		 */
+		if (!chip_is_e1x)
+			REG_WR(bp,
+			       PGLUE_B_REG_INTERNAL_PFID_ENABLE_TARGET_READ, 1);
 	}
 
-	/*
-	 * Enable internal target-read (in case we are probed after PF FLR).
-	 * Must be done prior to any BAR read access. Only for 57712 and up
-	 */
-	if (!chip_is_e1x)
-		REG_WR(bp, PGLUE_B_REG_INTERNAL_PFID_ENABLE_TARGET_READ, 1);
-
 	dev->watchdog_timeo = TX_TIMEOUT;
 
 	dev->netdev_ops = &bnx2x_netdev_ops;
@@ -11643,7 +11677,8 @@ err_out:
 static void __devinit bnx2x_get_pcie_width_speed(struct bnx2x *bp,
 						 int *width, int *speed)
 {
-	u32 val = REG_RD(bp, PCICFG_OFFSET + PCICFG_LINK_CONTROL);
+	u32 val;
+	pci_read_config_dword(bp->pdev, PCICFG_LINK_CONTROL, &val);
 
 	*width = (val & PCICFG_LINK_WIDTH) >> PCICFG_LINK_WIDTH_SHIFT;
 
@@ -11908,7 +11943,6 @@ void bnx2x__init_func_obj(struct bnx2x *bp)
 static int bnx2x_set_qm_cid_count(struct bnx2x *bp)
 {
 	int cid_count = BNX2X_L2_MAX_CID(bp);
-
 	if (CNIC_SUPPORT(bp))
 		cid_count += CNIC_CID_MAX;
 	return roundup(cid_count, QM_CID_ROUND);
@@ -11921,9 +11955,9 @@ static int bnx2x_set_qm_cid_count(struct bnx2x *bp)
  *
  */
 static int bnx2x_get_num_non_def_sbs(struct pci_dev *pdev,
-				     int cnic_cnt)
+				     int cnic_cnt, bool is_vf)
 {
-	int pos;
+	int pos, index;
 	u16 control;
 
 	pos = pci_find_capability(pdev, PCI_CAP_ID_MSIX);
@@ -11932,88 +11966,124 @@ static int bnx2x_get_num_non_def_sbs(struct pci_dev *pdev,
 	 * If MSI-X is not supported - return number of SBs needed to support
 	 * one fast path queue: one FP queue + SB for CNIC
 	 */
-	if (!pos)
+	if (!pos) {
+		pr_err("no msix");
 		return 1 + cnic_cnt;
+	}
+
+	pr_info("msix capability found");
 
 	/*
 	 * The value in the PCI configuration space is the index of the last
 	 * entry, namely one less than the actual size of the table, which is
 	 * exactly what we want to return from this function: number of all SBs
 	 * without the default SB.
+	 * For VFs there is no default SB, then we return (index+1).
 	 */
 	pci_read_config_word(pdev, pos  + PCI_MSI_FLAGS, &control);
-	return control & PCI_MSIX_FLAGS_QSIZE;
+
+	index = control & PCI_MSIX_FLAGS_QSIZE;
+
+	return is_vf ? index + 1 : index;
 }
 
-static int __devinit bnx2x_init_one(struct pci_dev *pdev,
-				    const struct pci_device_id *ent)
+static int set_max_cos_est(int chip_id)
 {
-	struct net_device *dev = NULL;
-	struct bnx2x *bp;
-	int pcie_width, pcie_speed;
-	int rc, max_non_def_sbs;
-	int rx_count, tx_count, rss_count, doorbell_size;
-	int cnic_cnt;
-	/*
-	 * An estimated maximum supported CoS number according to the chip
-	 * version.
-	 * We will try to roughly estimate the maximum number of CoSes this chip
-	 * may support in order to minimize the memory allocated for Tx
-	 * netdev_queue's. This number will be accurately calculated during the
-	 * initialization of bp->max_cos based on the chip versions AND chip
-	 * revision in the bnx2x_init_bp().
-	 */
-	u8 max_cos_est = 0;
-
-	switch (ent->driver_data) {
+	switch (chip_id) {
 	case BCM57710:
 	case BCM57711:
 	case BCM57711E:
-		max_cos_est = BNX2X_MULTI_TX_COS_E1X;
-		break;
-
+		return BNX2X_MULTI_TX_COS_E1X;
 	case BCM57712:
 	case BCM57712_MF:
-		max_cos_est = BNX2X_MULTI_TX_COS_E2_E3A0;
-		break;
-
+	case BCM57712_VF:
+		return BNX2X_MULTI_TX_COS_E2_E3A0;
 	case BCM57800:
 	case BCM57800_MF:
+	case BCM57800_VF:
 	case BCM57810:
 	case BCM57810_MF:
-	case BCM57840_O:
 	case BCM57840_4_10:
 	case BCM57840_2_20:
+	case BCM57840_O:
 	case BCM57840_MFO:
+	case BCM57810_VF:
 	case BCM57840_MF:
+	case BCM57840_VF:
 	case BCM57811:
 	case BCM57811_MF:
-		max_cos_est = BNX2X_MULTI_TX_COS_E3B0;
-		break;
-
+	case BCM57811_VF:
+		return BNX2X_MULTI_TX_COS_E3B0;
+		return 1;
 	default:
-		pr_err("Unknown board_type (%ld), aborting\n",
-			   ent->driver_data);
+		pr_err("Unknown board_type (%d), aborting\n",
+		       chip_id);
 		return -ENODEV;
 	}
+}
+
+static int set_is_vf(int chip_id)
+{
+	switch (chip_id) {
+	case BCM57712_VF:
+	case BCM57800_VF:
+	case BCM57810_VF:
+	case BCM57840_VF:
+	case BCM57811_VF:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static int __devinit bnx2x_init_one(struct pci_dev *pdev,
+				    const struct pci_device_id *ent)
+{
+	struct net_device *dev = NULL;
+	struct bnx2x *bp;
+	int pcie_width, pcie_speed;
+	int rc, max_non_def_sbs;
+	int rx_count, tx_count, rss_count, doorbell_size;
+	int max_cos_est;
+	bool is_vf;
+	int cnic_cnt;
 
-	cnic_cnt = 1;
-	max_non_def_sbs = bnx2x_get_num_non_def_sbs(pdev, cnic_cnt);
+	/* An estimated maximum supported CoS number according to the chip
+	 * version.
+	 * We will try to roughly estimate the maximum number of CoSes this chip
+	 * may support in order to minimize the memory allocated for Tx
+	 * netdev_queue's. This number will be accurately calculated during the
+	 * initialization of bp->max_cos based on the chip versions AND chip
+	 * revision in the bnx2x_init_bp().
+	 */
+	max_cos_est = set_max_cos_est(ent->driver_data);
+	if (max_cos_est < 0)
+		return max_cos_est;
+	is_vf = set_is_vf(ent->driver_data);
+	cnic_cnt = is_vf ? 0 : 1;
 
-	WARN_ON(!max_non_def_sbs);
+	max_non_def_sbs = bnx2x_get_num_non_def_sbs(pdev, cnic_cnt, is_vf);
 
 	/* Maximum number of RSS queues: one IGU SB goes to CNIC */
 	rss_count = max_non_def_sbs - cnic_cnt;
 
+	if (rss_count < 1)
+		return -EINVAL;
+
 	/* Maximum number of netdev Rx queues: RSS + FCoE L2 */
 	rx_count = rss_count + cnic_cnt;
 
-	/*
-	 * Maximum number of netdev Tx queues:
+	/* Maximum number of netdev Tx queues:
 	 * Maximum TSS queues * Maximum supported number of CoS  + FCoE L2
 	 */
 	tx_count = rss_count * max_cos_est + cnic_cnt;
 
+	/* no multi queue support for vfs. for now... */
+	if (is_vf) {
+		tx_count = 1;
+		rx_count = 1;
+	}
+
 	/* dev zeroed in init_etherdev */
 	dev = alloc_etherdev_mqs(sizeof(*bp), tx_count, rx_count);
 	if (!dev)
@@ -12021,21 +12091,27 @@ static int __devinit bnx2x_init_one(struct pci_dev *pdev,
 
 	bp = netdev_priv(dev);
 
+	bp->flags = 0;
+	if (is_vf)
+		bp->flags |= IS_VF_FLAG;
+
 	bp->igu_sb_cnt = max_non_def_sbs;
+	bp->igu_base_addr = IS_VF(bp) ? PXP_VF_ADDR_IGU_START : BAR_IGU_INTMEM;
 	bp->msg_enable = debug;
 	bp->cnic_support = cnic_cnt;
 
 	pci_set_drvdata(pdev, dev);
 
-	rc = bnx2x_init_dev(pdev, dev, ent->driver_data);
+	rc = bnx2x_init_dev(bp, pdev, dev, ent->driver_data);
 	if (rc < 0) {
 		free_netdev(dev);
 		return rc;
 	}
 
+	BNX2X_DEV_INFO("This is a %s function\n",
+		       IS_PF(bp) ? "physical" : "virtual");
 	BNX2X_DEV_INFO("Cnic support is %s\n", CNIC_SUPPORT(bp) ? "on" : "off");
-	BNX2X_DEV_INFO("max_non_def_sbs %d\n", max_non_def_sbs);
-
+	BNX2X_DEV_INFO("Max num of status blocks %d\n", max_non_def_sbs);
 	BNX2X_DEV_INFO("Allocated netdev with %d tx and %d rx queues\n",
 			  tx_count, rx_count);
 
@@ -12044,18 +12120,28 @@ static int __devinit bnx2x_init_one(struct pci_dev *pdev,
 		goto init_one_exit;
 
 	/*
-	 * Map doorbels here as we need the real value of bp->max_cos which
-	 * is initialized in bnx2x_init_bp().
+	 * Map doorbells here as we need the real value of bp->max_cos which
+	 * is initialized in bnx2x_init_bp() to determine the number of
+	 * l2 connections.
 	 */
-	doorbell_size = BNX2X_L2_MAX_CID(bp) * (1 << BNX2X_DB_SHIFT);
-	if (doorbell_size > pci_resource_len(pdev, 2)) {
-		dev_err(&bp->pdev->dev,
-			"Cannot map doorbells, bar size too small, aborting\n");
-		rc = -ENOMEM;
-		goto init_one_exit;
+	if (IS_VF(bp)) {
+		/* vf doorbells are embedded within the regview */
+		bp->doorbells = bp->regview + PXP_VF_ADDR_DB_START;
+
+		/* allocate vf2pf mailbox for vf to pf channel */
+		BNX2X_PCI_ALLOC(bp->vf2pf_mbox, &bp->vf2pf_mbox_mapping,
+				sizeof(struct bnx2x_vf_mbx_msg));
+	} else {
+		doorbell_size = BNX2X_L2_MAX_CID(bp) * (1 << BNX2X_DB_SHIFT);
+		if (doorbell_size > pci_resource_len(pdev, 2)) {
+			dev_err(&bp->pdev->dev,
+				"Cannot map doorbells, bar size too small, aborting\n");
+			rc = -ENOMEM;
+			goto init_one_exit;
+		}
+		bp->doorbells = ioremap_nocache(pci_resource_start(pdev, 2),
+						doorbell_size);
 	}
-	bp->doorbells = ioremap_nocache(pci_resource_start(pdev, 2),
-					doorbell_size);
 	if (!bp->doorbells) {
 		dev_err(&bp->pdev->dev,
 			"Cannot map doorbell space, aborting\n");
@@ -12065,6 +12151,7 @@ static int __devinit bnx2x_init_one(struct pci_dev *pdev,
 
 	/* calc qm_cid_count */
 	bp->qm_cid_count = bnx2x_set_qm_cid_count(bp);
+	BNX2X_DEV_INFO("qm_cid_count %d\n", bp->qm_cid_count);
 
 	/* disable FCOE L2 queue for E1x*/
 	if (CHIP_IS_E1x(bp))
@@ -12086,13 +12173,21 @@ static int __devinit bnx2x_init_one(struct pci_dev *pdev,
 	/* Configure interrupt mode: try to enable MSI-X/MSI if
 	 * needed.
 	 */
-	bnx2x_set_int_mode(bp);
+	rc = bnx2x_set_int_mode(bp);
+	if (rc) {
+		dev_err(&pdev->dev, "Cannot set interrupts\n");
+		goto init_one_exit;
+	}
+	BNX2X_DEV_INFO("set interrupts successfully");
 
+
+	/* register the net device */
 	rc = register_netdev(dev);
 	if (rc) {
 		dev_err(&pdev->dev, "Cannot register net device\n");
 		goto init_one_exit;
 	}
+	BNX2X_DEV_INFO("device name after netdev register %s\n", dev->name);
 
 
 	if (!NO_FCOE(bp)) {
@@ -12103,6 +12198,8 @@ static int __devinit bnx2x_init_one(struct pci_dev *pdev,
 	}
 
 	bnx2x_get_pcie_width_speed(bp, &pcie_width, &pcie_speed);
+	BNX2X_DEV_INFO("got pcie width %d and speed %d\n",
+		       pcie_width, pcie_speed);
 
 	BNX2X_DEV_INFO(
 		"%s (%c%d) PCI-E x%d %s found at mem %lx, IRQ %d, node addr %pM\n",
@@ -12116,11 +12213,16 @@ static int __devinit bnx2x_init_one(struct pci_dev *pdev,
 
 	return 0;
 
+alloc_mem_err:
+	BNX2X_PCI_FREE(bp->vf2pf_mbox, bp->vf2pf_mbox_mapping,
+		       sizeof(struct bnx2x_vf_mbx_msg));
+	rc = -ENOMEM;
+
 init_one_exit:
 	if (bp->regview)
 		iounmap(bp->regview);
 
-	if (bp->doorbells)
+	if (IS_PF(bp) && bp->doorbells)
 		iounmap(bp->doorbells);
 
 	free_netdev(dev);
@@ -12160,13 +12262,15 @@ static void __devexit bnx2x_remove_one(struct pci_dev *pdev)
 	unregister_netdev(dev);
 
 	/* Power on: we can't let PCI layer write to us while we are in D3 */
-	bnx2x_set_power_state(bp, PCI_D0);
+	if (IS_PF(bp))
+		bnx2x_set_power_state(bp, PCI_D0);
 
 	/* Disable MSI/MSI-X */
 	bnx2x_disable_msi(bp);
 
 	/* Power off */
-	bnx2x_set_power_state(bp, PCI_D3hot);
+	if (IS_PF(bp))
+		bnx2x_set_power_state(bp, PCI_D3hot);
 
 	/* Make sure RESET task is not scheduled before continuing */
 	cancel_delayed_work_sync(&bp->sp_rtnl_task);
@@ -12174,11 +12278,15 @@ static void __devexit bnx2x_remove_one(struct pci_dev *pdev)
 	if (bp->regview)
 		iounmap(bp->regview);
 
-	if (bp->doorbells)
-		iounmap(bp->doorbells);
-
-	bnx2x_release_firmware(bp);
+	/* for vf doorbells are part of the regview and were unmapped along with
+	 * it. FW is only loaded by PF.
+	 */
+	if (IS_PF(bp)) {
+		if (bp->doorbells)
+			iounmap(bp->doorbells);
 
+		bnx2x_release_firmware(bp);
+	}
 	bnx2x_free_mem_bp(bp);
 
 	free_netdev(dev);
@@ -12188,6 +12296,7 @@ static void __devexit bnx2x_remove_one(struct pci_dev *pdev)
 
 	pci_disable_device(pdev);
 	pci_set_drvdata(pdev, NULL);
+	pr_info("rmmod complete");
 }
 
 static int bnx2x_eeh_nic_unload(struct bnx2x *bp)
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h
index 7d93adb..be8c2c4 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h
@@ -6538,6 +6538,15 @@
 	(7L<<ME_REG_ABS_PF_NUM_SHIFT) /* Absolute PF Num */
 
 
+#define PXP_VF_ADDR_IGU_START		0
+#define PXP_VF_ADDR_IGU_SIZE		(0x3000)
+#define PXP_VF_ADDR_IGU_END\
+	((PXP_VF_ADDR_IGU_START) + (PXP_VF_ADDR_IGU_SIZE) - 1)
+#define PXP_VF_ADDR_DB_START				0x7c00
+#define PXP_VF_ADDR_DB_SIZE					(0x200)
+#define PXP_VF_ADDR_DB_END\
+	((PXP_VF_ADDR_DB_START) + (PXP_VF_ADDR_DB_SIZE) - 1)
+
 #define MDIO_REG_BANK_CL73_IEEEB0	0x0
 #define MDIO_CL73_IEEEB0_CL73_AN_CONTROL	0x0
 #define MDIO_CL73_IEEEB0_CL73_AN_CONTROL_RESTART_AN	0x0200
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
new file mode 100644
index 0000000..181f30f
--- /dev/null
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
@@ -0,0 +1,27 @@
+/* bnx2x_sriov.h: Broadcom Everest network driver.
+ *
+ * Copyright 2009-2012 Broadcom Corporation
+ *
+ * Unless you and Broadcom execute a separate written software license
+ * agreement governing use of this software, this software is licensed to you
+ * under the terms of the GNU General Public License version 2, available
+ * at http://www.gnu.org/licenses/old-licenses/gpl-2.0.html (the "GPL").
+ *
+ * Notwithstanding the above, under no circumstances may you combine this
+ * software in any way with any other Broadcom software provided under a
+ * license other than the GPL, without Broadcom's express prior written
+ * consent.
+ *
+ * Maintained by: Eilon Greenstein <eilong@broadcom.com>
+ * Written by: Shmulik Ravid
+ *
+ */
+#ifndef BNX2X_SRIOV_H
+#define BNX2X_SRIOV_H
+
+struct bnx2x_vf_mbx_msg {
+	union vfpf_tlvs req;
+	union pfvf_tlvs resp;
+};
+
+#endif /* bnx2x_sriov.h */
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.h
new file mode 100644
index 0000000..82edebb
--- /dev/null
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.h
@@ -0,0 +1,37 @@
+/* bnx2x_vfpf.h: Broadcom Everest network driver.
+ *
+ * Copyright (c) 2011-2012 Broadcom Corporation
+ *
+ * Unless you and Broadcom execute a separate written software license
+ * agreement governing use of this software, this software is licensed to you
+ * under the terms of the GNU General Public License version 2, available
+ * at http://www.gnu.org/licenses/old-licenses/gpl-2.0.html (the "GPL").
+ *
+ * Notwithstanding the above, under no circumstances may you combine this
+ * software in any way with any other Broadcom software provided under a
+ * license other than the GPL, without Broadcom's express prior written
+ * consent.
+ *
+ * Maintained by: Eilon Greenstein <eilong@broadcom.com>
+ *
+ */
+#ifndef VF_PF_IF_H
+#define VF_PF_IF_H
+
+/* HW VF-PF channel definitions
+ * A.K.A VF-PF mailbox
+ */
+#define TLV_BUFFER_SIZE			1024
+
+struct tlv_buffer_size {
+	u8 tlv_buffer[TLV_BUFFER_SIZE];
+};
+union vfpf_tlvs {
+
+	struct tlv_buffer_size		tlv_buf_size;
+};
+
+union pfvf_tlvs {
+	struct tlv_buffer_size		tlv_buf_size;
+};
+#endif /* VF_PF_IF_H */
-- 
1.7.9.GIT

^ permalink raw reply related

* [PATCH v2 net-next 13/22] bnx2x: Support of PF driver of a VF init request
From: Ariel Elior @ 2012-11-15 16:47 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Ariel Elior, Eilon Greenstein
In-Reply-To: <1352998067-9707-1-git-send-email-ariele@broadcom.com>

The VF driver will send an 'init' request as part of its nic load
flow. This message is used by the VF to publish the GPA's of its
status blocks, slow path ring and statistics buffer.
The PF driver notes all this down in the VF database, and also uses
this message to transfer the VF to VF_INIT state internally.

Signed-off-by: Ariel Elior <ariele@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x.h       |    2 +
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c  |    2 +-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h   |    4 +
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c |  159 +++++++++++++++++++++
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h |    7 +
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c  |   20 +++
 6 files changed, 193 insertions(+), 1 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
index 52012f2..4d89ef6 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
@@ -1853,6 +1853,8 @@ int bnx2x_del_all_macs(struct bnx2x *bp,
 
 /* Init Function API  */
 void bnx2x_func_init(struct bnx2x *bp, struct bnx2x_func_init_params *p);
+void bnx2x_init_sb(struct bnx2x *bp, dma_addr_t mapping, int vfid,
+			  u8 vf_valid, int fw_sb_id, int igu_sb_id);
 u32 bnx2x_get_pretend_reg(struct bnx2x *bp);
 int bnx2x_get_gpio(struct bnx2x *bp, int gpio_num, u8 port);
 int bnx2x_set_gpio(struct bnx2x *bp, int gpio_num, u32 mode, u8 port);
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index cd80552..7c2fc15 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -5402,7 +5402,7 @@ static void bnx2x_map_sb_state_machines(struct hc_index_data *index_data)
 		SM_TX_ID << HC_INDEX_DATA_SM_ID_SHIFT;
 }
 
-static void bnx2x_init_sb(struct bnx2x *bp, dma_addr_t mapping, int vfid,
+void bnx2x_init_sb(struct bnx2x *bp, dma_addr_t mapping, int vfid,
 			  u8 vf_valid, int fw_sb_id, int igu_sb_id)
 {
 	int igu_seg_id;
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h
index 50d825e..3997f63 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h
@@ -3726,6 +3726,10 @@
 #define PXP_REG_HST_DISCARD_INTERNAL_WRITES_STATUS		 0x10309c
 /* [WB 160] Used for initialization of the inbound interrupts memory */
 #define PXP_REG_HST_INBOUND_INT 				 0x103800
+/* [RW 7] Indirect access to the permission table. The fields are : {Valid;
+ * VFID[5:0]}
+ */
+#define PXP_REG_HST_ZONE_PERMISSION_TABLE			 0x103400
 /* [RW 32] Interrupt mask register #0 read/write */
 #define PXP_REG_PXP_INT_MASK_0					 0x103074
 #define PXP_REG_PXP_INT_MASK_1					 0x103084
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
index b91cb00..6141c73 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
@@ -63,6 +63,40 @@ struct bnx2x_virtf *bnx2x_vf_by_abs_fid(struct bnx2x *bp, u16 abs_vfid)
 	return (idx < BNX2X_NR_VIRTFN(bp)) ? BP_VF(bp, idx) : NULL;
 }
 
+static void bnx2x_vf_igu_ack_sb(struct bnx2x *bp, struct bnx2x_virtf *vf,
+				u8 igu_sb_id, u8 segment, u16 index, u8 op,
+				u8 update)
+{
+	/* acking a VF sb through the PF - use the GRC */
+	u32 ctl;
+	u32 igu_addr_data = IGU_REG_COMMAND_REG_32LSB_DATA;
+	u32 igu_addr_ctl = IGU_REG_COMMAND_REG_CTRL;
+	u32 func_encode = vf->abs_vfid;
+	u32 addr_encode = IGU_CMD_E2_PROD_UPD_BASE + igu_sb_id;
+	struct igu_regular cmd_data = {0};
+
+	cmd_data.sb_id_and_flags =
+			((index << IGU_REGULAR_SB_INDEX_SHIFT) |
+			 (segment << IGU_REGULAR_SEGMENT_ACCESS_SHIFT) |
+			 (update << IGU_REGULAR_BUPDATE_SHIFT) |
+			 (op << IGU_REGULAR_ENABLE_INT_SHIFT));
+
+	ctl = addr_encode << IGU_CTRL_REG_ADDRESS_SHIFT		|
+	      func_encode << IGU_CTRL_REG_FID_SHIFT		|
+	      IGU_CTRL_CMD_TYPE_WR << IGU_CTRL_REG_TYPE_SHIFT;
+
+	DP(NETIF_MSG_HW, "write 0x%08x to IGU(via GRC) addr 0x%x\n",
+	   cmd_data.sb_id_and_flags, igu_addr_data);
+	REG_WR(bp, igu_addr_data, cmd_data.sb_id_and_flags);
+	mmiowb();
+	barrier();
+
+	DP(NETIF_MSG_HW, "write 0x%08x to IGU(via GRC) addr 0x%x\n",
+	   ctl, igu_addr_ctl);
+	REG_WR(bp, igu_addr_ctl, ctl);
+	mmiowb();
+	barrier();
+}
 static int bnx2x_ari_enabled(struct pci_dev *dev)
 {
 	return dev->bus->self && dev->bus->self->ari_enabled;
@@ -355,6 +389,52 @@ static void bnx2x_vf_pglue_clear_err(struct bnx2x *bp, u8 abs_vfid)
 	REG_WR(bp, was_err_reg, 1 << (abs_vfid & 0x1f));
 }
 
+static void bnx2x_vf_igu_reset(struct bnx2x *bp, struct bnx2x_virtf *vf)
+{
+	int i;
+	u32 val;
+
+	/* Set VF masks and configuration - pretend */
+	bnx2x_pretend_func(bp, HW_VF_HANDLE(bp, vf->abs_vfid));
+
+	REG_WR(bp, IGU_REG_SB_INT_BEFORE_MASK_LSB, 0);
+	REG_WR(bp, IGU_REG_SB_INT_BEFORE_MASK_MSB, 0);
+	REG_WR(bp, IGU_REG_SB_MASK_LSB, 0);
+	REG_WR(bp, IGU_REG_SB_MASK_MSB, 0);
+	REG_WR(bp, IGU_REG_PBA_STATUS_LSB, 0);
+	REG_WR(bp, IGU_REG_PBA_STATUS_MSB, 0);
+
+	val = REG_RD(bp, IGU_REG_VF_CONFIGURATION);
+	val |= (IGU_VF_CONF_FUNC_EN | IGU_VF_CONF_MSI_MSIX_EN);
+	if (vf->cfg_flags & VF_CFG_INT_SIMD)
+		val |= IGU_VF_CONF_SINGLE_ISR_EN;
+	val &= ~IGU_VF_CONF_PARENT_MASK;
+	val |= BP_FUNC(bp) << IGU_VF_CONF_PARENT_SHIFT;	/* parent PF */
+	REG_WR(bp, IGU_REG_VF_CONFIGURATION, val);
+
+	DP(BNX2X_MSG_IOV,
+	   "value in IGU_REG_VF_CONFIGURATION of vf %d after write %x\n",
+	   vf->abs_vfid, REG_RD(bp, IGU_REG_VF_CONFIGURATION));
+
+	bnx2x_pretend_func(bp, BP_ABS_FUNC(bp));
+
+	/* iterate over all queues, clear sb consumer */
+	for (i = 0; i < vf_sb_count(vf); i++) {
+		u8 igu_sb_id = vf_igu_sb(vf, i);
+
+		/* zero prod memory */
+		REG_WR(bp, IGU_REG_PROD_CONS_MEMORY + igu_sb_id * 4, 0);
+
+		/* clear sb state machine */
+		bnx2x_igu_clear_sb_gen(bp, vf->abs_vfid, igu_sb_id,
+				       false /* VF */);
+
+		/* disable + update */
+		bnx2x_vf_igu_ack_sb(bp, vf, igu_sb_id, USTORM_ID, 0,
+				    IGU_INT_DISABLE, 1);
+	}
+}
+
 void bnx2x_vf_enable_access(struct bnx2x *bp, u8 abs_vfid)
 {
 	/* set the VF-PF association in the FW */
@@ -372,6 +452,17 @@ void bnx2x_vf_enable_access(struct bnx2x *bp, u8 abs_vfid)
 	bnx2x_pretend_func(bp, BP_ABS_FUNC(bp));
 }
 
+static void bnx2x_vf_enable_traffic(struct bnx2x *bp, struct bnx2x_virtf *vf)
+{
+	/* Reset vf in IGU  interrupts are still disabled */
+	bnx2x_vf_igu_reset(bp, vf);
+
+	/* pretend to enable the vf with the PBF */
+	bnx2x_pretend_func(bp, HW_VF_HANDLE(bp, vf->abs_vfid));
+	REG_WR(bp, PBF_REG_DISABLE_VF, 0);
+	bnx2x_pretend_func(bp, BP_ABS_FUNC(bp));
+}
+
 static u8 bnx2x_vf_is_pcie_pending(struct bnx2x *bp, u8 abs_vfid)
 {
 	struct pci_dev *dev;
@@ -988,6 +1079,13 @@ void bnx2x_iov_sp_task(struct bnx2x *bp)
 		}
 	}
 }
+static void bnx2x_vf_qtbl_set_q(struct bnx2x *bp, u8 abs_vfid, u8 qid,
+				u8 enable)
+{
+	u32 reg = PXP_REG_HST_ZONE_PERMISSION_TABLE + qid * 4;
+	u32 val = enable ? (abs_vfid | (1 << 6)) : 0;
+	REG_WR(bp, reg, val);
+}
 
 u8 bnx2x_vf_max_queue_cnt(struct bnx2x *bp, struct bnx2x_virtf *vf)
 {
@@ -1099,6 +1197,67 @@ int bnx2x_vf_acquire(struct bnx2x *bp, struct bnx2x_virtf *vf,
 	return 0;
 }
 
+int bnx2x_vf_init(struct bnx2x *bp, struct bnx2x_virtf *vf, dma_addr_t *sb_map)
+{
+	struct bnx2x_func_init_params func_init = {0};
+	u16 flags = 0;
+	int i;
+
+	/* the sb resources are initialized at this point, do the
+	 * FW/HW initializations
+	 */
+	for_each_vf_sb(vf, i)
+		bnx2x_init_sb(bp, (dma_addr_t)sb_map[i], vf->abs_vfid, true,
+			      vf_igu_sb(vf, i), vf_igu_sb(vf, i));
+
+	/* Sanity checks */
+	if (vf->state != VF_ACQUIRED) {
+		DP(BNX2X_MSG_IOV, "VF[%d] is not in VF_ACQUIRED, but %d\n",
+		   vf->abs_vfid, vf->state);
+		return -EINVAL;
+	}
+	/* FLR cleanup epilogue */
+	if (bnx2x_vf_flr_clnup_epilog(bp, vf->abs_vfid))
+		return -EBUSY;
+
+	/* reset IGU VF statistics: MSIX */
+	REG_WR(bp, IGU_REG_STATISTIC_NUM_MESSAGE_SENT + vf->abs_vfid * 4 , 0);
+
+	/* vf init */
+	if (vf->cfg_flags & VF_CFG_STATS)
+		flags |= (FUNC_FLG_STATS | FUNC_FLG_SPQ);
+
+	if (vf->cfg_flags & VF_CFG_TPA)
+		flags |= FUNC_FLG_TPA;
+
+	if (is_vf_multi(vf))
+		flags |= FUNC_FLG_RSS;
+
+	/* function setup */
+	func_init.func_flgs = flags;
+	func_init.pf_id = BP_FUNC(bp);
+	func_init.func_id = FW_VF_HANDLE(vf->abs_vfid);
+	func_init.fw_stat_map = vf->fw_stat_map;
+	func_init.spq_map = vf->spq_map;
+	func_init.spq_prod = 0;
+	bnx2x_func_init(bp, &func_init);
+
+	/* Configure RSS TBD */
+
+	/* Enable the vf */
+	bnx2x_vf_enable_access(bp, vf->abs_vfid);
+	bnx2x_vf_enable_traffic(bp, vf);
+
+	/* queue protection table */
+	for_each_vfq(vf, i)
+		bnx2x_vf_qtbl_set_q(bp, vf->abs_vfid,
+				    vfq_qzone_id(vf, vfq_get(vf, i)), true);
+
+	vf->state = VF_ENABLED;
+
+	return 0;
+}
+
 void bnx2x_lock_vf_pf_channel(struct bnx2x *bp, struct bnx2x_virtf *vf,
 			      enum channel_tlvs tlv)
 {
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
index a6ae7f0..a3a8240 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
@@ -269,6 +269,8 @@ struct bnx2x_virtf {
 #define for_each_vf_sb(vf, var) \
 		for ((var) = 0; (var) < vf_sb_count(vf); (var)++)
 
+#define is_vf_multi(vf)	(vf_rxq_count(vf) > 1)
+
 #define HW_VF_HANDLE(bp, abs_vfid) \
 	(u16)(BP_ABS_FUNC((bp)) | (1<<3) |  ((u16)(abs_vfid) << 4))
 
@@ -431,6 +433,11 @@ void bnx2x_vf_enable_mbx(struct bnx2x *bp, u8 abs_vfid);
 int bnx2x_vf_acquire(struct bnx2x *bp, struct bnx2x_virtf *vf,
 			  struct vf_pf_resc_request *resc);
 
+/* init */
+int bnx2x_vf_init(struct bnx2x *bp, struct bnx2x_virtf *vf,
+		  dma_addr_t *sb_map);
+
+
 static inline struct bnx2x_vfop *bnx2x_vfop_cur(struct bnx2x *bp,
 						struct bnx2x_virtf *vf)
 {
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
index 01ee50a..1dfe734 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
@@ -22,6 +22,8 @@
  */
 #include "bnx2x.h"
 #include "bnx2x_sriov.h"
+#include <linux/crc32.h>
+
 /* place a given tlv on the tlv buffer at a given offset */
 void bnx2x_add_tlv(struct bnx2x *bp, void *tlvs_list, u16 offset, u16 type,
 		   u16 length)
@@ -360,6 +362,21 @@ static void bnx2x_vf_mbx_acquire(struct bnx2x *bp, struct bnx2x_virtf *vf,
 	bnx2x_vf_mbx_acquire_resp(bp, vf, mbx, rc);
 }
 
+static void bnx2x_vf_mbx_init_vf(struct bnx2x *bp, struct bnx2x_virtf *vf,
+			      struct bnx2x_vf_mbx *mbx)
+{
+	struct vfpf_init_tlv *init = &mbx->msg->req.init;
+
+	/* record ghost addresses from vf message */
+	vf->spq_map = init->spq_addr;
+	vf->fw_stat_map = init->stats_addr;
+
+	vf->op_rc = bnx2x_vf_init(bp, vf, (dma_addr_t *)init->sb_addr);
+
+	/* response */
+	bnx2x_vf_mbx_resp(bp, vf);
+}
+
 /* dispatch request */
 static void bnx2x_vf_mbx_request(struct bnx2x *bp, struct bnx2x_virtf *vf,
 				  struct bnx2x_vf_mbx *mbx)
@@ -379,6 +396,9 @@ static void bnx2x_vf_mbx_request(struct bnx2x *bp, struct bnx2x_virtf *vf,
 		case CHANNEL_TLV_ACQUIRE:
 			bnx2x_vf_mbx_acquire(bp, vf, mbx);
 			break;
+		case CHANNEL_TLV_INIT:
+			bnx2x_vf_mbx_init_vf(bp, vf, mbx);
+			break;
 		}
 
 	/* unknown TLV - this may belong to a VF driver from the future - a
-- 
1.7.9.GIT

^ permalink raw reply related

* [PATCH v2 net-next 08/22] bnx2x: VF fastpath
From: Ariel Elior @ 2012-11-15 16:47 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Ariel Elior, Eilon Greenstein
In-Reply-To: <1352998067-9707-1-git-send-email-ariele@broadcom.com>

When VF driver is transmitting it must supply the correct mac
address in the parsing BD. This is used for firmware validation
and enforcement and also for tx-switching.
Refactor interrupt ack flow to allow for different BAR addresses of
the hardware in the PF BAR vs the VF BAR.

Signed-off-by: Ariel Elior <ariele@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c  |   20 ++++++-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h  |   68 +++++++++++-----------
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c |   13 +----
 3 files changed, 52 insertions(+), 49 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 2519d5a..ba51259 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -3462,8 +3462,21 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		    cpu_to_le16(vlan_tx_tag_get(skb));
 		tx_start_bd->bd_flags.as_bitfield |=
 		    (X_ETH_OUTBAND_VLAN << ETH_TX_BD_FLAGS_VLAN_MODE_SHIFT);
-	} else
-		tx_start_bd->vlan_or_ethertype = cpu_to_le16(pkt_prod);
+	} else {
+
+		/* when transmitting in a vf, start bd must hold the ethertype
+		 * for fw to enforce it
+		 */
+		if (IS_VF(bp)) {
+			struct ethhdr *hdr = (struct ethhdr *)skb->data;
+			tx_start_bd->vlan_or_ethertype =
+				cpu_to_le16(ntohs(hdr->h_proto));
+		} else {
+			/* used by FW for packet accounting */
+			tx_start_bd->vlan_or_ethertype = cpu_to_le16(pkt_prod);
+
+		}
+	}
 
 	/* turn on parsing and get a BD */
 	bd_prod = TX_BD(NEXT_TX_IDX(bd_prod));
@@ -3479,7 +3492,8 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev)
 			hlen = bnx2x_set_pbd_csum_e2(bp, skb,
 						     &pbd_e2_parsing_data,
 						     xmit_type);
-		if (IS_MF_SI(bp)) {
+
+		if (IS_MF_SI(bp) || IS_VF(bp)) {
 			/*
 			 * fill in the MAC addresses in the PBD - for local
 			 * switching
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
index 2e32a21..948949d 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
@@ -499,6 +499,39 @@ int bnx2x_setup_tc(struct net_device *dev, u8 num_tc);
 /* select_queue callback */
 u16 bnx2x_select_queue(struct net_device *dev, struct sk_buff *skb);
 
+static inline void bnx2x_update_rx_prod(struct bnx2x *bp,
+					struct bnx2x_fastpath *fp,
+					u16 bd_prod, u16 rx_comp_prod,
+					u16 rx_sge_prod)
+{
+	struct ustorm_eth_rx_producers rx_prods = {0};
+	u32 i;
+
+	/* Update producers */
+	rx_prods.bd_prod = bd_prod;
+	rx_prods.cqe_prod = rx_comp_prod;
+	rx_prods.sge_prod = rx_sge_prod;
+
+	/* Make sure that the BD and SGE data is updated before updating the
+	 * producers since FW might read the BD/SGE right after the producer
+	 * is updated.
+	 * This is only applicable for weak-ordered memory model archs such
+	 * as IA-64. The following barrier is also mandatory since FW will
+	 * assumes BDs must have buffers.
+	 */
+	wmb();
+
+	for (i = 0; i < sizeof(rx_prods)/4; i++)
+		REG_WR(bp, fp->ustorm_rx_prods_offset + i*4,
+		       ((u32 *)&rx_prods)[i]);
+
+	mmiowb(); /* keep prod updates ordered */
+
+	DP(NETIF_MSG_RX_STATUS,
+	   "queue[%d]:  wrote  bd_prod %u  cqe_prod %u  sge_prod %u\n",
+	   fp->index, bd_prod, rx_comp_prod, rx_sge_prod);
+}
+
 /* reload helper */
 int bnx2x_reload_if_running(struct net_device *dev);
 
@@ -507,9 +540,6 @@ int bnx2x_change_mac_addr(struct net_device *dev, void *p);
 /* NAPI poll Rx part */
 int bnx2x_rx_int(struct bnx2x_fastpath *fp, int budget);
 
-void bnx2x_update_rx_prod(struct bnx2x *bp, struct bnx2x_fastpath *fp,
-			u16 bd_prod, u16 rx_comp_prod, u16 rx_sge_prod);
-
 /* NAPI poll Tx part */
 int bnx2x_tx_int(struct bnx2x *bp, struct bnx2x_fp_txdata *txdata);
 
@@ -612,38 +642,6 @@ static inline void bnx2x_update_fpsb_idx(struct bnx2x_fastpath *fp)
 	fp->fp_hc_idx = fp->sb_running_index[SM_RX_ID];
 }
 
-static inline void bnx2x_update_rx_prod_gen(struct bnx2x *bp,
-			struct bnx2x_fastpath *fp, u16 bd_prod,
-			u16 rx_comp_prod, u16 rx_sge_prod, u32 start)
-{
-	struct ustorm_eth_rx_producers rx_prods = {0};
-	u32 i;
-
-	/* Update producers */
-	rx_prods.bd_prod = bd_prod;
-	rx_prods.cqe_prod = rx_comp_prod;
-	rx_prods.sge_prod = rx_sge_prod;
-
-	/*
-	 * Make sure that the BD and SGE data is updated before updating the
-	 * producers since FW might read the BD/SGE right after the producer
-	 * is updated.
-	 * This is only applicable for weak-ordered memory model archs such
-	 * as IA-64. The following barrier is also mandatory since FW will
-	 * assumes BDs must have buffers.
-	 */
-	wmb();
-
-	for (i = 0; i < sizeof(rx_prods)/4; i++)
-		REG_WR(bp, start + i*4, ((u32 *)&rx_prods)[i]);
-
-	mmiowb(); /* keep prod updates ordered */
-
-	DP(NETIF_MSG_RX_STATUS,
-	   "queue[%d]:  wrote  bd_prod %u  cqe_prod %u  sge_prod %u\n",
-	   fp->index, bd_prod, rx_comp_prod, rx_sge_prod);
-}
-
 static inline void bnx2x_igu_ack_sb_gen(struct bnx2x *bp, u8 igu_sb_id,
 					u8 segment, u16 index, u8 op,
 					u8 update, u32 igu_addr)
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 16f73d3..bf575a9 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -1697,15 +1697,6 @@ void bnx2x_sp_event(struct bnx2x_fastpath *fp, union eth_rx_cqe *rr_cqe)
 	return;
 }
 
-void bnx2x_update_rx_prod(struct bnx2x *bp, struct bnx2x_fastpath *fp,
-			u16 bd_prod, u16 rx_comp_prod, u16 rx_sge_prod)
-{
-	u32 start = BAR_USTRORM_INTMEM + fp->ustorm_rx_prods_offset;
-
-	bnx2x_update_rx_prod_gen(bp, fp, bd_prod, rx_comp_prod, rx_sge_prod,
-				 start);
-}
-
 irqreturn_t bnx2x_interrupt(int irq, void *dev_instance)
 {
 	struct bnx2x *bp = netdev_priv(dev_instance);
@@ -4602,8 +4593,8 @@ static void bnx2x_attn_int(struct bnx2x *bp)
 void bnx2x_igu_ack_sb(struct bnx2x *bp, u8 igu_sb_id, u8 segment,
 		      u16 index, u8 op, u8 update)
 {
-	u32 igu_addr = BAR_IGU_INTMEM + (IGU_CMD_INT_ACK_BASE + igu_sb_id)*8;
-
+	u32 igu_addr = bp->igu_base_addr;
+	igu_addr += (IGU_CMD_INT_ACK_BASE + igu_sb_id)*8;
 	bnx2x_igu_ack_sb_gen(bp, igu_sb_id, segment, index, op, update,
 			     igu_addr);
 }
-- 
1.7.9.GIT

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox